fluent-plugin-kafka-enchanced 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.travis.yml +17 -0
- data/ChangeLog +49 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +221 -0
- data/Rakefile +12 -0
- data/fluent-plugin-kafka.gemspec +23 -0
- data/lib/fluent/plugin/in_kafka.rb +308 -0
- data/lib/fluent/plugin/in_kafka_group.rb +218 -0
- data/lib/fluent/plugin/kafka_plugin_util.rb +22 -0
- data/lib/fluent/plugin/kafka_producer_ext.rb +225 -0
- data/lib/fluent/plugin/out_kafka.rb +200 -0
- data/lib/fluent/plugin/out_kafka2.rb +187 -0
- data/lib/fluent/plugin/out_kafka_buffered.rb +279 -0
- data/test/helper.rb +27 -0
- data/test/plugin/test_out_kafka.rb +52 -0
- metadata +138 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 056609426bfbcfd6ea642db660c8360beaee7f72
|
4
|
+
data.tar.gz: acd3535de138c68109edcaf7ad318ad76cc8dc5d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1d25c86afc8ea2513504bd6d2b086751f3bdcc88c3f8f35673188bd3ee5edf7b23fc2d688392e79df3be2d5995b02bc168bc68a5b94564c8f0ebce7ce7e9a19e
|
7
|
+
data.tar.gz: b5c07478cd3595b420330490efa62b1d0878020962a742c9bf72f373186cb4229715ae723335e52fef2256087f17b22550faf849e0454fd66ffb1d1aedf8ee96
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/ChangeLog
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
Release 0.5.1 - 2017/02/06
|
2
|
+
|
3
|
+
* in_kafka_group: Fix uninitialized constant error
|
4
|
+
|
5
|
+
Release 0.5.0 - 2017/01/17
|
6
|
+
|
7
|
+
* output: Add out_kafka2 plugin with v0.14 API
|
8
|
+
|
9
|
+
Release 0.4.2 - 2016/12/10
|
10
|
+
|
11
|
+
* input: Add use_record_time and time_format parameters
|
12
|
+
* Update ruby-kafka dependency to 0.3.16.beta2
|
13
|
+
|
14
|
+
Release 0.4.1 - 2016/12/01
|
15
|
+
|
16
|
+
* output: Support specifying partition
|
17
|
+
|
18
|
+
Release 0.4.0 - 2016/11/08
|
19
|
+
|
20
|
+
* Remove zookeeper dependency
|
21
|
+
|
22
|
+
Release 0.3.5 - 2016/10/21
|
23
|
+
|
24
|
+
* output: Support message key and related parameters. #91
|
25
|
+
|
26
|
+
Release 0.3.4 - 2016/10/20
|
27
|
+
|
28
|
+
* output: Add exclude_topic_key and exclude_partition_key. #89
|
29
|
+
|
30
|
+
Release 0.3.3 - 2016/10/17
|
31
|
+
|
32
|
+
* out_kafka_buffered: Add get_kafka_client_log parameter. #83
|
33
|
+
* out_kafka_buffered: Skip and log invalid record to avoid buffer stuck. #86
|
34
|
+
* in_kafka_group: Add retry_emit_limit to handle BufferQueueLimitError. #87
|
35
|
+
|
36
|
+
Release 0.3.2 - 2016/10/06
|
37
|
+
|
38
|
+
* in_kafka_group: Re-fetch events after consumer error. #79
|
39
|
+
|
40
|
+
Release 0.3.1 - 2016/08/28
|
41
|
+
|
42
|
+
* output: Change default required_acks to -1. #70
|
43
|
+
* Support ruby version changed to 2.1.0 or later
|
44
|
+
|
45
|
+
Release 0.3.0 - 2016/08/24
|
46
|
+
|
47
|
+
* Fully replace poseidon ruby library with ruby-kafka to support latest kafka versions
|
48
|
+
|
49
|
+
See git commits for older changes
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 htgc
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,221 @@
|
|
1
|
+
# fluent-plugin-kafka, a plugin for [Fluentd](http://fluentd.org)
|
2
|
+
|
3
|
+
[](https://travis-ci.org/htgc/fluent-plugin-kafka)
|
4
|
+
|
5
|
+
A fluentd plugin to both consume and produce data for Apache Kafka.
|
6
|
+
|
7
|
+
TODO: Also, I need to write tests
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
gem 'fluent-plugin-kafka'
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install fluent-plugin-kafka
|
22
|
+
|
23
|
+
If you want to use zookeeper related parameters, you also need to install zookeeper gem. zookeeper gem includes native extension, so development tools are needed, e.g. gcc, make and etc.
|
24
|
+
|
25
|
+
## Requirements
|
26
|
+
|
27
|
+
- Ruby 2.1 or later
|
28
|
+
- Input plugins work with kafka v0.9 or later
|
29
|
+
- Output plugins work with kafka v0.8 or later
|
30
|
+
|
31
|
+
## Usage
|
32
|
+
|
33
|
+
### Common parameters
|
34
|
+
|
35
|
+
- ssl_ca_cert
|
36
|
+
- ssl_client_cert
|
37
|
+
- ssl_client_cert_key
|
38
|
+
|
39
|
+
Set path to SSL related files. See [Encryption and Authentication using SSL](https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl) for more detail.
|
40
|
+
|
41
|
+
### Input plugin (@type 'kafka')
|
42
|
+
|
43
|
+
Consume events by single consumer.
|
44
|
+
|
45
|
+
<source>
|
46
|
+
@type kafka
|
47
|
+
|
48
|
+
brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
49
|
+
topics <listening topics(separate with comma',')>
|
50
|
+
format <input text type (text|json|ltsv|msgpack)> :default => json
|
51
|
+
message_key <key (Optional, for text format only, default is message)>
|
52
|
+
add_prefix <tag prefix (Optional)>
|
53
|
+
add_suffix <tag suffix (Optional)>
|
54
|
+
|
55
|
+
# Optionally, you can manage topic offset by using zookeeper
|
56
|
+
offset_zookeeper <zookeer node list (<zookeeper1_host>:<zookeeper1_port>,<zookeeper2_host>:<zookeeper2_port>,..)>
|
57
|
+
offset_zk_root_node <offset path in zookeeper> default => '/fluent-plugin-kafka'
|
58
|
+
|
59
|
+
# ruby-kafka consumer options
|
60
|
+
max_bytes (integer) :default => nil (Use default of ruby-kafka)
|
61
|
+
max_wait_time (integer) :default => nil (Use default of ruby-kafka)
|
62
|
+
min_bytes (integer) :default => nil (Use default of ruby-kafka)
|
63
|
+
</source>
|
64
|
+
|
65
|
+
Supports a start of processing from the assigned offset for specific topics.
|
66
|
+
|
67
|
+
<source>
|
68
|
+
@type kafka
|
69
|
+
|
70
|
+
brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
71
|
+
format <input text type (text|json|ltsv|msgpack)>
|
72
|
+
<topic>
|
73
|
+
topic <listening topic>
|
74
|
+
partition <listening partition: default=0>
|
75
|
+
offset <listening start offset: default=-1>
|
76
|
+
</topic>
|
77
|
+
<topic>
|
78
|
+
topic <listening topic>
|
79
|
+
partition <listening partition: default=0>
|
80
|
+
offset <listening start offset: default=-1>
|
81
|
+
</topic>
|
82
|
+
</source>
|
83
|
+
|
84
|
+
See also [ruby-kafka README](https://github.com/zendesk/ruby-kafka#consuming-messages-from-kafka) for more detailed documentation about ruby-kafka.
|
85
|
+
|
86
|
+
### Input plugin (@type 'kafka_group', supports kafka group)
|
87
|
+
|
88
|
+
Consume events by kafka consumer group features..
|
89
|
+
|
90
|
+
<source>
|
91
|
+
@type kafka_group
|
92
|
+
|
93
|
+
brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
94
|
+
consumer_group <consumer group name, must set>
|
95
|
+
topics <listening topics(separate with comma',')>
|
96
|
+
format <input text type (text|json|ltsv|msgpack)> :default => json
|
97
|
+
message_key <key (Optional, for text format only, default is message)>
|
98
|
+
add_prefix <tag prefix (Optional)>
|
99
|
+
add_suffix <tag suffix (Optional)>
|
100
|
+
retry_emit_limit <Wait retry_emit_limit x 1s when BuffereQueueLimitError happens. The default is nil and it means waiting until BufferQueueLimitError is resolved>
|
101
|
+
use_record_time <If true, replace event time with contents of 'time' field of fetched record>
|
102
|
+
time_format <string (Optional when use_record_time is used)>
|
103
|
+
|
104
|
+
# ruby-kafka consumer options
|
105
|
+
max_bytes (integer) :default => 1048576
|
106
|
+
max_wait_time (integer) :default => nil (Use default of ruby-kafka)
|
107
|
+
min_bytes (integer) :default => nil (Use default of ruby-kafka)
|
108
|
+
offset_commit_interval (integer) :default => nil (Use default of ruby-kafka)
|
109
|
+
offset_commit_threshold (integer) :default => nil (Use default of ruby-kafka)
|
110
|
+
start_from_beginning (bool) :default => true
|
111
|
+
</source>
|
112
|
+
|
113
|
+
See also [ruby-kafka README](https://github.com/zendesk/ruby-kafka#consuming-messages-from-kafka) for more detailed documentation about ruby-kafka options.
|
114
|
+
|
115
|
+
### Buffered output plugin
|
116
|
+
|
117
|
+
This plugin uses ruby-kafka producer for writing data. This plugin works with recent kafka versions.
|
118
|
+
|
119
|
+
<match *.**>
|
120
|
+
@type kafka_buffered
|
121
|
+
|
122
|
+
# Brokers: you can choose either brokers or zookeeper. If you are not familiar with zookeeper, use brokers parameters.
|
123
|
+
brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
|
124
|
+
zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
|
125
|
+
zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for kafka
|
126
|
+
|
127
|
+
default_topic (string) :default => nil
|
128
|
+
default_partition_key (string) :default => nil
|
129
|
+
default_message_key (string) :default => nil
|
130
|
+
output_data_type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
|
131
|
+
output_include_tag (bool) :default => false
|
132
|
+
output_include_time (bool) :default => false
|
133
|
+
exclude_topic_key (bool) :default => false
|
134
|
+
exclude_partition_key (bool) :default => false
|
135
|
+
get_kafka_client_log (bool) :default => false
|
136
|
+
|
137
|
+
# See fluentd document for buffer related parameters: http://docs.fluentd.org/articles/buffer-plugin-overview
|
138
|
+
|
139
|
+
# ruby-kafka producer options
|
140
|
+
max_send_retries (integer) :default => 1
|
141
|
+
required_acks (integer) :default => -1
|
142
|
+
ack_timeout (integer) :default => nil (Use default of ruby-kafka)
|
143
|
+
compression_codec (gzip|snappy) :default => nil (No compression)
|
144
|
+
</match>
|
145
|
+
|
146
|
+
`<formatter name>` of `output_data_type` uses fluentd's formatter plugins. See [formatter article](http://docs.fluentd.org/articles/formatter-plugin-overview).
|
147
|
+
|
148
|
+
ruby-kafka sometimes returns `Kafka::DeliveryFailed` error without good information.
|
149
|
+
In this case, `get_kafka_client_log` is useful for identifying the error cause.
|
150
|
+
ruby-kafka's log is routed to fluentd log so you can see ruby-kafka's log in fluentd logs.
|
151
|
+
|
152
|
+
Supports following ruby-kafka's producer options.
|
153
|
+
|
154
|
+
- max_send_retries - default: 1 - Number of times to retry sending of messages to a leader.
|
155
|
+
- required_acks - default: -1 - The number of acks required per request. If you need flush performance, set lower value, e.g. 1, 2.
|
156
|
+
- ack_timeout - default: nil - How long the producer waits for acks. The unit is seconds.
|
157
|
+
- compression_codec - default: nil - The codec the producer uses to compress messages.
|
158
|
+
|
159
|
+
See also [Kafka::Client](http://www.rubydoc.info/gems/ruby-kafka/Kafka/Client) for more detailed documentation about ruby-kafka.
|
160
|
+
|
161
|
+
This plugin supports compression codec "snappy" also.
|
162
|
+
Install snappy module before you use snappy compression.
|
163
|
+
|
164
|
+
$ gem install snappy
|
165
|
+
|
166
|
+
snappy gem uses native extension, so you need to install several packages before.
|
167
|
+
On Ubuntu, need development packages and snappy library.
|
168
|
+
|
169
|
+
$ sudo apt-get install build-essential autoconf automake libtool libsnappy-dev
|
170
|
+
|
171
|
+
#### Load balancing
|
172
|
+
|
173
|
+
Messages will be assigned a partition at random as default by ruby-kafka, but messages with the same partition key will always be assigned to the same partition by setting `default_partition_key` in config file.
|
174
|
+
If key name `partition_key` exists in a message, this plugin set its value of partition_key as key.
|
175
|
+
|
176
|
+
|default_partition_key|partition_key| behavior |
|
177
|
+
| --- | --- | --- |
|
178
|
+
|Not set|Not exists| All messages are assigned a partition at random |
|
179
|
+
|Set| Not exists| All messages are assigned to the specific partition |
|
180
|
+
|Not set| Exists | Messages which have partition_key record are assigned to the specific partition, others are assigned a partition at random |
|
181
|
+
|Set| Exists | Messages which have partition_key record are assigned to the specific partition with parition_key, others are assigned to the specific partition with default_parition_key |
|
182
|
+
|
183
|
+
If key name `message_key` exists in a message, this plugin publishes the value of message_key to kafka and can be read by consumers. Same message key will be assigned to all messages by setting `default_message_key` in config file. If message_key exists and if partition_key is not set explicitly, messsage_key will be used for partitioning.
|
184
|
+
|
185
|
+
### Non-buffered output plugin
|
186
|
+
|
187
|
+
This plugin uses ruby-kafka producer for writing data. For performance and reliability concerns, use `kafka_bufferd` output instead. This is mainly for testing.
|
188
|
+
|
189
|
+
<match *.**>
|
190
|
+
@type kafka
|
191
|
+
|
192
|
+
# Brokers: you can choose either brokers or zookeeper.
|
193
|
+
brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
|
194
|
+
zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
|
195
|
+
zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for kafka
|
196
|
+
|
197
|
+
default_topic (string) :default => nil
|
198
|
+
default_partition_key (string) :default => nil
|
199
|
+
default_message_key (string) :default => nil
|
200
|
+
output_data_type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
|
201
|
+
output_include_tag (bool) :default => false
|
202
|
+
output_include_time (bool) :default => false
|
203
|
+
exclude_topic_key (bool) :default => false
|
204
|
+
exclude_partition_key (bool) :default => false
|
205
|
+
|
206
|
+
# ruby-kafka producer options
|
207
|
+
max_send_retries (integer) :default => 1
|
208
|
+
required_acks (integer) :default => -1
|
209
|
+
ack_timeout (integer) :default => nil (Use default of ruby-kafka)
|
210
|
+
compression_codec (gzip|snappy) :default => nil
|
211
|
+
</match>
|
212
|
+
|
213
|
+
This plugin also supports ruby-kafka related parameters. See Buffered output plugin section.
|
214
|
+
|
215
|
+
## Contributing
|
216
|
+
|
217
|
+
1. Fork it
|
218
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
219
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
220
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
221
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
Rake::TestTask.new(:test) do |test|
|
7
|
+
test.libs << 'lib' << 'test'
|
8
|
+
test.test_files = FileList['test/**/test_*.rb']
|
9
|
+
test.verbose = true
|
10
|
+
end
|
11
|
+
|
12
|
+
task :default => [:build]
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.authors = ["Paschenko Konstantin"]
|
5
|
+
gem.email = ["zhr0n4x@gmail.com"]
|
6
|
+
gem.description = %q{Fluentd plugin for Apache Kafka > 0.8}
|
7
|
+
gem.summary = %q{Fluentd plugin for Apache Kafka > 0.8}
|
8
|
+
gem.homepage = "https://github.com/fluent/fluent-plugin-kafka"
|
9
|
+
|
10
|
+
gem.files = `git ls-files`.split($\)
|
11
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
12
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
13
|
+
gem.name = "fluent-plugin-kafka-enchanced"
|
14
|
+
gem.require_paths = ["lib"]
|
15
|
+
gem.version = '0.5.1'
|
16
|
+
gem.required_ruby_version = ">= 2.1.0"
|
17
|
+
|
18
|
+
gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
|
19
|
+
gem.add_dependency 'ltsv'
|
20
|
+
gem.add_dependency 'ruby-kafka', '= 0.3.16.beta2'
|
21
|
+
gem.add_development_dependency "rake", ">= 0.9.2"
|
22
|
+
gem.add_development_dependency "test-unit", ">= 3.0.8"
|
23
|
+
end
|
@@ -0,0 +1,308 @@
|
|
1
|
+
require 'fluent/input'
|
2
|
+
require 'fluent/time'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
class Fluent::KafkaInput < Fluent::Input
|
6
|
+
Fluent::Plugin.register_input('kafka', self)
|
7
|
+
|
8
|
+
config_param :format, :string, :default => 'json',
|
9
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
10
|
+
config_param :message_key, :string, :default => 'message',
|
11
|
+
:desc => "For 'text' format only."
|
12
|
+
config_param :host, :string, :default => nil,
|
13
|
+
:desc => "Broker host"
|
14
|
+
config_param :port, :integer, :default => nil,
|
15
|
+
:desc => "Broker port"
|
16
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
17
|
+
:desc => "List of broker-host:port, separate with comma, must set."
|
18
|
+
config_param :interval, :integer, :default => 1, # seconds
|
19
|
+
:desc => "Interval (Unit: seconds)"
|
20
|
+
config_param :topics, :string, :default => nil,
|
21
|
+
:desc => "Listening topics(separate with comma',')"
|
22
|
+
config_param :client_id, :string, :default => 'kafka'
|
23
|
+
config_param :partition, :integer, :default => 0,
|
24
|
+
:desc => "Listening partition"
|
25
|
+
config_param :offset, :integer, :default => -1,
|
26
|
+
:desc => "Listening start offset"
|
27
|
+
config_param :add_prefix, :string, :default => nil,
|
28
|
+
:desc => "Tag prefix"
|
29
|
+
config_param :add_suffix, :string, :default => nil,
|
30
|
+
:desc => "tag suffix"
|
31
|
+
config_param :add_offset_in_record, :bool, :default => false
|
32
|
+
|
33
|
+
config_param :offset_zookeeper, :string, :default => nil
|
34
|
+
config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
|
35
|
+
config_param :use_record_time, :bool, :default => false,
|
36
|
+
:desc => "Replace message timestamp with contents of 'time' field."
|
37
|
+
config_param :time_format, :string, :default => nil,
|
38
|
+
:desc => "Time format to be used to parse 'time' filed."
|
39
|
+
|
40
|
+
# Kafka#fetch_messages options
|
41
|
+
config_param :max_bytes, :integer, :default => nil,
|
42
|
+
:desc => "Maximum number of bytes to fetch."
|
43
|
+
config_param :max_wait_time, :integer, :default => nil,
|
44
|
+
:desc => "How long to block until the server sends us data."
|
45
|
+
config_param :min_bytes, :integer, :default => nil,
|
46
|
+
:desc => "Smallest amount of data the server should send us."
|
47
|
+
|
48
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
49
|
+
|
50
|
+
unless method_defined?(:router)
|
51
|
+
define_method("router") { Fluent::Engine }
|
52
|
+
end
|
53
|
+
|
54
|
+
def initialize
|
55
|
+
super
|
56
|
+
require 'kafka'
|
57
|
+
|
58
|
+
@time_parser = nil
|
59
|
+
end
|
60
|
+
|
61
|
+
def configure(conf)
|
62
|
+
super
|
63
|
+
|
64
|
+
@topic_list = []
|
65
|
+
if @topics
|
66
|
+
@topic_list = @topics.split(',').map { |topic|
|
67
|
+
TopicEntry.new(topic.strip, @partition, @offset)
|
68
|
+
}
|
69
|
+
else
|
70
|
+
conf.elements.select { |element| element.name == 'topic' }.each do |element|
|
71
|
+
unless element.has_key?('topic')
|
72
|
+
raise Fluent::ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
|
73
|
+
end
|
74
|
+
partition = element.has_key?('partition') ? element['partition'].to_i : 0
|
75
|
+
offset = element.has_key?('offset') ? element['offset'].to_i : -1
|
76
|
+
@topic_list.push(TopicEntry.new(element['topic'], partition, offset))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
if @topic_list.empty?
|
81
|
+
raise Fluent::ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
|
82
|
+
end
|
83
|
+
|
84
|
+
# For backward compatibility
|
85
|
+
@brokers = case
|
86
|
+
when @host && @port
|
87
|
+
["#{@host}:#{@port}"]
|
88
|
+
when @host
|
89
|
+
["#{@host}:9092"]
|
90
|
+
when @port
|
91
|
+
["localhost:#{@port}"]
|
92
|
+
else
|
93
|
+
@brokers
|
94
|
+
end
|
95
|
+
|
96
|
+
if conf['max_wait_ms']
|
97
|
+
log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
|
98
|
+
@max_wait_time = conf['max_wait_ms'].to_i / 1000
|
99
|
+
end
|
100
|
+
|
101
|
+
@max_wait_time = @interval if @max_wait_time.nil?
|
102
|
+
|
103
|
+
require 'zookeeper' if @offset_zookeeper
|
104
|
+
|
105
|
+
@parser_proc = setup_parser
|
106
|
+
|
107
|
+
if @use_record_time and @time_format
|
108
|
+
@time_parser = Fluent::TextParser::TimeParser.new(@time_format)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def setup_parser
|
113
|
+
case @format
|
114
|
+
when 'json'
|
115
|
+
require 'yajl'
|
116
|
+
Proc.new { |msg, te|
|
117
|
+
r = Yajl::Parser.parse(msg.value)
|
118
|
+
add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
|
119
|
+
r
|
120
|
+
}
|
121
|
+
when 'ltsv'
|
122
|
+
require 'ltsv'
|
123
|
+
Proc.new { |msg, te|
|
124
|
+
r = LTSV.parse(msg.value).first
|
125
|
+
add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
|
126
|
+
r
|
127
|
+
}
|
128
|
+
when 'msgpack'
|
129
|
+
require 'msgpack'
|
130
|
+
Proc.new { |msg, te|
|
131
|
+
r = MessagePack.unpack(msg.value)
|
132
|
+
add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
|
133
|
+
r
|
134
|
+
}
|
135
|
+
when 'text'
|
136
|
+
Proc.new { |msg, te|
|
137
|
+
r = {@message_key => msg.value}
|
138
|
+
add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
|
139
|
+
r
|
140
|
+
}
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def add_offset_in_hash(hash, te, offset)
|
145
|
+
hash['kafka_topic'.freeze] = te.topic
|
146
|
+
hash['kafka_partition'.freeze] = te.partition
|
147
|
+
hash['kafka_offset'.freeze] = offset
|
148
|
+
end
|
149
|
+
|
150
|
+
def start
|
151
|
+
super
|
152
|
+
|
153
|
+
@loop = Coolio::Loop.new
|
154
|
+
opt = {}
|
155
|
+
opt[:max_bytes] = @max_bytes if @max_bytes
|
156
|
+
opt[:max_wait_time] = @max_wait_time if @max_wait_time
|
157
|
+
opt[:min_bytes] = @min_bytes if @min_bytes
|
158
|
+
|
159
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id,
|
160
|
+
ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
161
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert),
|
162
|
+
ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key))
|
163
|
+
@zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
|
164
|
+
|
165
|
+
@topic_watchers = @topic_list.map {|topic_entry|
|
166
|
+
offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
|
167
|
+
TopicWatcher.new(
|
168
|
+
topic_entry,
|
169
|
+
@kafka,
|
170
|
+
interval,
|
171
|
+
@parser_proc,
|
172
|
+
@add_prefix,
|
173
|
+
@add_suffix,
|
174
|
+
offset_manager,
|
175
|
+
router,
|
176
|
+
opt)
|
177
|
+
}
|
178
|
+
@topic_watchers.each {|tw|
|
179
|
+
tw.attach(@loop)
|
180
|
+
}
|
181
|
+
@thread = Thread.new(&method(:run))
|
182
|
+
end
|
183
|
+
|
184
|
+
def shutdown
|
185
|
+
@loop.stop
|
186
|
+
@zookeeper.close! if @zookeeper
|
187
|
+
@thread.join
|
188
|
+
@kafka.close
|
189
|
+
super
|
190
|
+
end
|
191
|
+
|
192
|
+
def run
|
193
|
+
@loop.run
|
194
|
+
rescue => e
|
195
|
+
$log.error "unexpected error", :error => e.to_s
|
196
|
+
$log.error_backtrace
|
197
|
+
end
|
198
|
+
|
199
|
+
class TopicWatcher < Coolio::TimerWatcher
|
200
|
+
def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, options={})
|
201
|
+
@topic_entry = topic_entry
|
202
|
+
@kafka = kafka
|
203
|
+
@callback = method(:consume)
|
204
|
+
@parser = parser
|
205
|
+
@add_prefix = add_prefix
|
206
|
+
@add_suffix = add_suffix
|
207
|
+
@options = options
|
208
|
+
@offset_manager = offset_manager
|
209
|
+
@router = router
|
210
|
+
|
211
|
+
@next_offset = @topic_entry.offset
|
212
|
+
if @topic_entry.offset == -1 && offset_manager
|
213
|
+
@next_offset = offset_manager.next_offset
|
214
|
+
end
|
215
|
+
@fetch_args = {
|
216
|
+
topic: @topic_entry.topic,
|
217
|
+
partition: @topic_entry.partition,
|
218
|
+
}.merge(@options)
|
219
|
+
|
220
|
+
super(interval, true)
|
221
|
+
end
|
222
|
+
|
223
|
+
def on_timer
|
224
|
+
@callback.call
|
225
|
+
rescue => e
|
226
|
+
# TODO log?
|
227
|
+
$log.error e.to_s
|
228
|
+
$log.error_backtrace
|
229
|
+
end
|
230
|
+
|
231
|
+
def consume
|
232
|
+
offset = @next_offset
|
233
|
+
@fetch_args[:offset] = offset
|
234
|
+
messages = @kafka.fetch_messages(@fetch_args)
|
235
|
+
|
236
|
+
return if messages.size.zero?
|
237
|
+
|
238
|
+
es = Fluent::MultiEventStream.new
|
239
|
+
tag = @topic_entry.topic
|
240
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
241
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
242
|
+
|
243
|
+
messages.each { |msg|
|
244
|
+
begin
|
245
|
+
record = @parser.call(msg, @topic_entry)
|
246
|
+
if @use_record_time
|
247
|
+
if @time_format
|
248
|
+
record_time = @time_parser.parse(record['time'])
|
249
|
+
else
|
250
|
+
record_time = record['time']
|
251
|
+
end
|
252
|
+
else
|
253
|
+
record_time = Fluent::Engine.now
|
254
|
+
end
|
255
|
+
es.add(record_time, record)
|
256
|
+
rescue => e
|
257
|
+
$log.warn "parser error in #{@topic_entry.topic}/#{@topic_entry.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
|
258
|
+
$log.debug_backtrace
|
259
|
+
end
|
260
|
+
}
|
261
|
+
offset = messages.last.offset + 1
|
262
|
+
|
263
|
+
unless es.empty?
|
264
|
+
@router.emit_stream(tag, es)
|
265
|
+
|
266
|
+
if @offset_manager
|
267
|
+
@offset_manager.save_offset(offset)
|
268
|
+
end
|
269
|
+
@next_offset = offset
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
class TopicEntry
|
275
|
+
def initialize(topic, partition, offset)
|
276
|
+
@topic = topic
|
277
|
+
@partition = partition
|
278
|
+
@offset = offset
|
279
|
+
end
|
280
|
+
attr_reader :topic, :partition, :offset
|
281
|
+
end
|
282
|
+
|
283
|
+
class OffsetManager
|
284
|
+
def initialize(topic_entry, zookeeper, zk_root_node)
|
285
|
+
@zookeeper = zookeeper
|
286
|
+
@zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
|
287
|
+
create_node(@zk_path, topic_entry.topic, topic_entry.partition)
|
288
|
+
end
|
289
|
+
|
290
|
+
def create_node(zk_path, topic, partition)
|
291
|
+
path = ""
|
292
|
+
zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
|
293
|
+
path = path + dir
|
294
|
+
@zookeeper.create(:path => "#{path}")
|
295
|
+
}
|
296
|
+
$log.trace "use zk offset node : #{path}"
|
297
|
+
end
|
298
|
+
|
299
|
+
def next_offset
|
300
|
+
@zookeeper.get(:path => @zk_path)[:data].to_i
|
301
|
+
end
|
302
|
+
|
303
|
+
def save_offset(offset)
|
304
|
+
@zookeeper.set(:path => @zk_path, :data => offset.to_s)
|
305
|
+
$log.trace "update zk offset node : #{offset.to_s}"
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|