sk-fluent-plugin-kafka 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 86ed6fb9afbe05c446370bc1c434ee727dde5dd1aa544640e5d535009fb29569
4
+ data.tar.gz: fc1030930eb993b1b4a828a4b2e81cbb95fb9c0e67a7832ade312a84e2c8cfb4
5
+ SHA512:
6
+ metadata.gz: 6134b73d32ab965833fb2607c646be42859b9f311f3aeb1d942683795a6cf61761e66473a23fcf9a196c4ba7c48fdd715d5950144d7a2035f261c45a3d6c25ae
7
+ data.tar.gz: 37d5bd24956e1ab3d6ce383d8e29f98f6df670268b24b92f1f6100073e247ff72afc952b074cbcb2003a2980f8dc121d532e99be83fee52a1e755c6fbe4fc062
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ /Gemfile.lock
2
+ *.swp
data/.travis.yml ADDED
@@ -0,0 +1,25 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 2.1
5
+ - 2.2
6
+ - 2.3.1
7
+ - 2.4.1
8
+ - 2.5.0
9
+ - ruby-head
10
+
11
+ before_install:
12
+ - gem update --system
13
+ script:
14
+ - bundle exec rake test
15
+
16
+ sudo: false
17
+
18
+ matrix:
19
+ allow_failures:
20
+ - rvm: ruby-head
21
+ deploy:
22
+ provider: rubygems
23
+ api_key: "bc5cc687adff822e861d4df96c0986e4"
24
+ on:
25
+ tags: true
data/ChangeLog ADDED
@@ -0,0 +1,161 @@
1
+ Release 0.8.0 - 2018/10/18
2
+
3
+ * output: Support SASL without SSL
4
+ * output: Add rdkafka based output
5
+ * Update ruby-kafka dependency to v0.7 or later
6
+
7
+ Release 0.7.9 - 2018/09/11
8
+
9
+ * in_kafka_group: Add fetcher_max_queue_size parameter
10
+
11
+ Release 0.7.8 - 2018/08/29
12
+
13
+ * input: Fix regression of kafka initialization
14
+
15
+ Release 0.7.7 - 2018/08/27
16
+
17
+ * in_kafka_group: Add retry_limit and disable_retry_limit parameters
18
+ * input: Support SSL parameters
19
+ * Add ssl_ca_certs_from_system parameter
20
+ * Make password parameter secret
21
+
22
+ Release 0.7.6 - 2018/08/15
23
+
24
+ * out_kafka2: Add ignore_exceptions and exception_backup parameters
25
+
26
+ Release 0.7.5 - 2018/08/14
27
+
28
+ * out_kafka2: Fix `message_key_key` parameter handling
29
+ * Limit ruby-kafka version('< 0.7.0') to avoid runtime error
30
+
31
+ Release 0.7.4 - 2018/07/11
32
+
33
+ * out_kafka2: Need event_emitter helper for notification
34
+
35
+ Release 0.7.3 - 2018/05/30
36
+
37
+ * output: Use SSL endpoint when ssl_client_cert is true
38
+
39
+ Release 0.7.2 - 2018/03/12
40
+
41
+ * output: Fix the regression of get_kafka_client_log parameter
42
+
43
+ Release 0.7.1 - 2018/03/05
44
+
45
+ * out_kafka2: Fix the regression of kafka client initialization
46
+
47
+ Release 0.7.0 - 2018/02/27
48
+
49
+ * output: SASL PLAIN/SCRAM support
50
+ * input: Fix TimeParser location bug in v1
51
+ * out_kafka2: Fix warning condition for tag chunk key
52
+
53
+ Release 0.6.6 - 2017/12/25
54
+
55
+ * output: Make topic/partition/partition_key/message_key configurable
56
+
57
+ Release 0.6.5 - 2017/12/14
58
+
59
+ * in_kafka_group: Add client_id parameter
60
+
61
+ Release 0.6.4 - 2017/11/23
62
+
63
+ * Multi worker support for v0.14
64
+ * Add kafka_message_key parameter to input plugins
65
+ * Relax ruby-kafka version for 0.5 or later
66
+ * Use oj for json when it is installed
67
+
68
+ Release 0.6.3 - 2017/11/14
69
+
70
+ * in_kafka_group: re-create consumer when error happens during event fetch
71
+
72
+ Release 0.6.2 - 2017/11/1
73
+
74
+ * Fix ltsv parsing issue which generates symbol keys
75
+
76
+ Release 0.6.1 - 2017/08/30
77
+
78
+ * Add stats and datadog monitoring support
79
+ * ssl_ca_certs now accepts multiple paths
80
+ * Fix bug by ruby-kafka 0.4.1 changes
81
+ * Update ruby-kafka dependency to v0.4.1
82
+
83
+ Release 0.6.0 - 2017/07/25
84
+
85
+ * Add principal and keytab parameters for SASL support
86
+
87
+ Release 0.5.7 - 2017/07/13
88
+
89
+ * out_kafka_buffered: Add kafka_agg_max_messages parameter
90
+
91
+ Release 0.5.6 - 2017/07/10
92
+
93
+ * output: Add ActiveSupport notification support
94
+
95
+ Release 0.5.5 - 2017/04/19
96
+
97
+ * output: Some trace log level changed to debug
98
+ * out_kafka_buffered: Add discard_kafka_delivery_failed parameter
99
+
100
+ Release 0.5.4 - 2017/04/12
101
+
102
+ * out_kafka_buffered: Add max_send_limit_bytes parameter
103
+ * out_kafka: Improve buffer overflow handling of ruby-kafka
104
+
105
+ Release 0.5.3 - 2017/02/13
106
+
107
+ * Relax ruby-kafka dependency
108
+
109
+ Release 0.5.2 - 2017/02/13
110
+
111
+ * in_kafka_group: Add max_bytes parameter
112
+
113
+ Release 0.5.1 - 2017/02/06
114
+
115
+ * in_kafka_group: Fix uninitialized constant error
116
+
117
+ Release 0.5.0 - 2017/01/17
118
+
119
+ * output: Add out_kafka2 plugin with v0.14 API
120
+
121
+ Release 0.4.2 - 2016/12/10
122
+
123
+ * input: Add use_record_time and time_format parameters
124
+ * Update ruby-kafka dependency to 0.3.16.beta2
125
+
126
+ Release 0.4.1 - 2016/12/01
127
+
128
+ * output: Support specifying partition
129
+
130
+ Release 0.4.0 - 2016/11/08
131
+
132
+ * Remove zookeeper dependency
133
+
134
+ Release 0.3.5 - 2016/10/21
135
+
136
+ * output: Support message key and related parameters. #91
137
+
138
+ Release 0.3.4 - 2016/10/20
139
+
140
+ * output: Add exclude_topic_key and exclude_partition_key. #89
141
+
142
+ Release 0.3.3 - 2016/10/17
143
+
144
+ * out_kafka_buffered: Add get_kafka_client_log parameter. #83
145
+ * out_kafka_buffered: Skip and log invalid record to avoid buffer stuck. #86
146
+ * in_kafka_group: Add retry_emit_limit to handle BufferQueueLimitError. #87
147
+
148
+ Release 0.3.2 - 2016/10/06
149
+
150
+ * in_kafka_group: Re-fetch events after consumer error. #79
151
+
152
+ Release 0.3.1 - 2016/08/28
153
+
154
+ * output: Change default required_acks to -1. #70
155
+ * Support ruby version changed to 2.1.0 or later
156
+
157
+ Release 0.3.0 - 2016/08/24
158
+
159
+ * Fully replace poseidon ruby library with ruby-kafka to support latest kafka versions
160
+
161
+ See git commits for older changes
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fluent-plugin-kafka.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,14 @@
1
+ Copyright (C) 2014 htgc
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+
data/README.md ADDED
@@ -0,0 +1,319 @@
1
+ # fluent-plugin-kafka, a plugin for [Fluentd](http://fluentd.org)
2
+
3
+ [![Build Status](https://travis-ci.org/fluent/fluent-plugin-kafka.svg?branch=master)](https://travis-ci.org/fluent/fluent-plugin-kafka)
4
+
5
+ A fluentd plugin to both consume and produce data for Apache Kafka.
6
+
7
+ TODO: Also, I need to write tests
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'fluent-plugin-kafka'
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install fluent-plugin-kafka --no-document
22
+
23
+ If you want to use zookeeper related parameters, you also need to install zookeeper gem. zookeeper gem includes native extension, so development tools are needed, e.g. gcc, make and etc.
24
+
25
+ ## Requirements
26
+
27
+ - Ruby 2.1 or later
28
+ - Input plugins work with kafka v0.9 or later
29
+ - Output plugins work with kafka v0.8 or later
30
+
31
+ ## Usage
32
+
33
+ ### Common parameters
34
+
35
+ #### SSL authentication
36
+
37
+ - ssl_ca_cert
38
+ - ssl_client_cert
39
+ - ssl_client_cert_key
40
+ - ssl_ca_certs_from_system
41
+
42
+ Set path to SSL related files. See [Encryption and Authentication using SSL](https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl) for more detail.
43
+
44
+ #### SASL authentication
45
+
46
+ - principal
47
+ - keytab
48
+
49
+ Set principal and path to keytab for SASL/GSSAPI authentication. See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentication-using-sasl) for more details.
50
+
51
+ ### Input plugin (@type 'kafka')
52
+
53
+ Consume events by single consumer.
54
+
55
+ <source>
56
+ @type kafka
57
+
58
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
59
+ topics <listening topics(separate with comma',')>
60
+ format <input text type (text|json|ltsv|msgpack)> :default => json
61
+ message_key <key (Optional, for text format only, default is message)>
62
+ add_prefix <tag prefix (Optional)>
63
+ add_suffix <tag suffix (Optional)>
64
+
65
+ # Optionally, you can manage topic offset by using zookeeper
66
+ offset_zookeeper <zookeer node list (<zookeeper1_host>:<zookeeper1_port>,<zookeeper2_host>:<zookeeper2_port>,..)>
67
+ offset_zk_root_node <offset path in zookeeper> default => '/fluent-plugin-kafka'
68
+
69
+ # ruby-kafka consumer options
70
+ max_bytes (integer) :default => nil (Use default of ruby-kafka)
71
+ max_wait_time (integer) :default => nil (Use default of ruby-kafka)
72
+ min_bytes (integer) :default => nil (Use default of ruby-kafka)
73
+ </source>
74
+
75
+ Supports a start of processing from the assigned offset for specific topics.
76
+
77
+ <source>
78
+ @type kafka
79
+
80
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
81
+ format <input text type (text|json|ltsv|msgpack)>
82
+ <topic>
83
+ topic <listening topic>
84
+ partition <listening partition: default=0>
85
+ offset <listening start offset: default=-1>
86
+ </topic>
87
+ <topic>
88
+ topic <listening topic>
89
+ partition <listening partition: default=0>
90
+ offset <listening start offset: default=-1>
91
+ </topic>
92
+ </source>
93
+
94
+ See also [ruby-kafka README](https://github.com/zendesk/ruby-kafka#consuming-messages-from-kafka) for more detailed documentation about ruby-kafka.
95
+
96
+ Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameters. With `add_prefix kafka`, the tag is `kafka.app_event`.
97
+
98
+ ### Input plugin (@type 'kafka_group', supports kafka group)
99
+
100
+ Consume events by kafka consumer group features..
101
+
102
+ <source>
103
+ @type kafka_group
104
+
105
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
106
+ consumer_group <consumer group name, must set>
107
+ topics <listening topics(separate with comma',')>
108
+ format <input text type (text|json|ltsv|msgpack)> :default => json
109
+ message_key <key (Optional, for text format only, default is message)>
110
+ add_prefix <tag prefix (Optional)>
111
+ add_suffix <tag suffix (Optional)>
112
+ retry_emit_limit <Wait retry_emit_limit x 1s when BuffereQueueLimitError happens. The default is nil and it means waiting until BufferQueueLimitError is resolved>
113
+ use_record_time <If true, replace event time with contents of 'time' field of fetched record>
114
+ time_format <string (Optional when use_record_time is used)>
115
+
116
+ # ruby-kafka consumer options
117
+ max_bytes (integer) :default => 1048576
118
+ max_wait_time (integer) :default => nil (Use default of ruby-kafka)
119
+ min_bytes (integer) :default => nil (Use default of ruby-kafka)
120
+ offset_commit_interval (integer) :default => nil (Use default of ruby-kafka)
121
+ offset_commit_threshold (integer) :default => nil (Use default of ruby-kafka)
122
+ fetcher_max_queue_size (integer) :default => nil (Use default of ruby-kafka)
123
+ start_from_beginning (bool) :default => true
124
+ </source>
125
+
126
+ See also [ruby-kafka README](https://github.com/zendesk/ruby-kafka#consuming-messages-from-kafka) for more detailed documentation about ruby-kafka options.
127
+
128
+ Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
129
+
130
+ ### Buffered output plugin
131
+
132
+ This plugin uses ruby-kafka producer for writing data. This plugin works with recent kafka versions.
133
+
134
+ <match app.**>
135
+ @type kafka_buffered
136
+
137
+ # Brokers: you can choose either brokers or zookeeper. If you are not familiar with zookeeper, use brokers parameters.
138
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
139
+ zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
140
+ zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for kafka
141
+
142
+ topic_key (string) :default => 'topic'
143
+ partition_key (string) :default => 'partition'
144
+ partition_key_key (string) :default => 'partition_key'
145
+ message_key_key (string) :default => 'message_key'
146
+ default_topic (string) :default => nil
147
+ default_partition_key (string) :default => nil
148
+ default_message_key (string) :default => nil
149
+ output_data_type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
150
+ output_include_tag (bool) :default => false
151
+ output_include_time (bool) :default => false
152
+ exclude_topic_key (bool) :default => false
153
+ exclude_partition_key (bool) :default => false
154
+ get_kafka_client_log (bool) :default => false
155
+
156
+ # See fluentd document for buffer related parameters: http://docs.fluentd.org/articles/buffer-plugin-overview
157
+
158
+ # ruby-kafka producer options
159
+ max_send_retries (integer) :default => 1
160
+ required_acks (integer) :default => -1
161
+ ack_timeout (integer) :default => nil (Use default of ruby-kafka)
162
+ compression_codec (gzip|snappy) :default => nil (No compression)
163
+ kafka_agg_max_bytes (integer) :default => 4096
164
+ kafka_agg_max_messages (integer) :default => nil (No limit)
165
+ max_send_limit_bytes (integer) :default => nil (No drop)
166
+ discard_kafka_delivery_failed (bool) :default => false (No discard)
167
+ monitoring_list (array) :default => []
168
+ </match>
169
+
170
+ `<formatter name>` of `output_data_type` uses fluentd's formatter plugins. See [formatter article](http://docs.fluentd.org/articles/formatter-plugin-overview).
171
+
172
+ ruby-kafka sometimes returns `Kafka::DeliveryFailed` error without good information.
173
+ In this case, `get_kafka_client_log` is useful for identifying the error cause.
174
+ ruby-kafka's log is routed to fluentd log so you can see ruby-kafka's log in fluentd logs.
175
+
176
+ Supports following ruby-kafka's producer options.
177
+
178
+ - max_send_retries - default: 1 - Number of times to retry sending of messages to a leader.
179
+ - required_acks - default: -1 - The number of acks required per request. If you need flush performance, set lower value, e.g. 1, 2.
180
+ - ack_timeout - default: nil - How long the producer waits for acks. The unit is seconds.
181
+ - compression_codec - default: nil - The codec the producer uses to compress messages.
182
+ - kafka_agg_max_bytes - default: 4096 - Maximum value of total message size to be included in one batch transmission.
183
+ - kafka_agg_max_messages - default: nil - Maximum number of messages to include in one batch transmission.
184
+ - max_send_limit_bytes - default: nil - Max byte size to send message to avoid MessageSizeTooLarge. For example, if you set 1000000(message.max.bytes in kafka), Message more than 1000000 byes will be dropped.
185
+ - discard_kafka_delivery_failed - default: false - discard the record where [Kafka::DeliveryFailed](http://www.rubydoc.info/gems/ruby-kafka/Kafka/DeliveryFailed) occurred
186
+ - monitoring_list - default: [] - library to be used to monitor. statsd and datadog are supported
187
+
188
+ If you want to know about detail of monitoring, see also https://github.com/zendesk/ruby-kafka#monitoring
189
+
190
+ See also [Kafka::Client](http://www.rubydoc.info/gems/ruby-kafka/Kafka/Client) for more detailed documentation about ruby-kafka.
191
+
192
+ This plugin supports compression codec "snappy" also.
193
+ Install snappy module before you use snappy compression.
194
+
195
+ $ gem install snappy --no-document
196
+
197
+ snappy gem uses native extension, so you need to install several packages before.
198
+ On Ubuntu, need development packages and snappy library.
199
+
200
+ $ sudo apt-get install build-essential autoconf automake libtool libsnappy-dev
201
+
202
+ On CentOS 7 installation is also necessary.
203
+
204
+ $ sudo yum install gcc autoconf automake libtool snappy-devel
205
+
206
+ #### Load balancing
207
+
208
+ Messages will be assigned a partition at random as default by ruby-kafka, but messages with the same partition key will always be assigned to the same partition by setting `default_partition_key` in config file.
209
+ If key name `partition_key` exists in a message, this plugin set its value of partition_key as key.
210
+
211
+ |default_partition_key|partition_key| behavior |
212
+ | --- | --- | --- |
213
+ |Not set|Not exists| All messages are assigned a partition at random |
214
+ |Set| Not exists| All messages are assigned to the specific partition |
215
+ |Not set| Exists | Messages which have partition_key record are assigned to the specific partition, others are assigned a partition at random |
216
+ |Set| Exists | Messages which have partition_key record are assigned to the specific partition with parition_key, others are assigned to the specific partition with default_parition_key |
217
+
218
+ If key name `message_key` exists in a message, this plugin publishes the value of message_key to kafka and can be read by consumers. Same message key will be assigned to all messages by setting `default_message_key` in config file. If message_key exists and if partition_key is not set explicitly, messsage_key will be used for partitioning.
219
+
220
+ ### Output plugin
221
+
222
+ This plugin is for fluentd v1.0 or later. This will be `out_kafka` plugin in the future.
223
+
224
+ <match app.**>
225
+ @type kafka2
226
+
227
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
228
+
229
+ topic_key (string) :default => 'topic'
230
+ partition_key (string) :default => 'partition'
231
+ partition_key_key (string) :default => 'partition_key'
232
+ message_key_key (string) :default => 'message_key'
233
+ default_topic (string) :default => nil
234
+ default_partition_key (string) :default => nil
235
+ default_message_key (string) :default => nil
236
+ exclude_topic_key (bool) :default => false
237
+ exclude_partition_key (bool) :default => false
238
+ get_kafka_client_log (bool) :default => false
239
+
240
+ <format>
241
+ @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
242
+ </format>
243
+ <inject>
244
+ tag_key tag
245
+ time_key time
246
+ </inject>
247
+
248
+ # See fluentd document for buffer related parameters: http://docs.fluentd.org/articles/buffer-plugin-overview
249
+ # Buffer chunk key should be same with topic_key. If value is not found in the record, default_topic is used.
250
+ <buffer topic>
251
+ flush_interval 10s
252
+ </buffer>
253
+
254
+ # ruby-kafka producer options
255
+ max_send_retries (integer) :default => 1
256
+ required_acks (integer) :default => -1
257
+ ack_timeout (integer) :default => nil (Use default of ruby-kafka)
258
+ compression_codec (gzip|snappy) :default => nil (No compression)
259
+ </match>
260
+
261
+ ### Non-buffered output plugin
262
+
263
+ This plugin uses ruby-kafka producer for writing data. For performance and reliability concerns, use `kafka_bufferd` output instead. This is mainly for testing.
264
+
265
+ <match app.**>
266
+ @type kafka
267
+
268
+ # Brokers: you can choose either brokers or zookeeper.
269
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
270
+ zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
271
+ zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for kafka
272
+
273
+ default_topic (string) :default => nil
274
+ default_partition_key (string) :default => nil
275
+ default_message_key (string) :default => nil
276
+ output_data_type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
277
+ output_include_tag (bool) :default => false
278
+ output_include_time (bool) :default => false
279
+ exclude_topic_key (bool) :default => false
280
+ exclude_partition_key (bool) :default => false
281
+
282
+ # ruby-kafka producer options
283
+ max_send_retries (integer) :default => 1
284
+ required_acks (integer) :default => -1
285
+ ack_timeout (integer) :default => nil (Use default of ruby-kafka)
286
+ compression_codec (gzip|snappy) :default => nil
287
+ max_buffer_size (integer) :default => nil (Use default of ruby-kafka)
288
+ max_buffer_bytesize (integer) :default => nil (Use default of ruby-kafka)
289
+ </match>
290
+
291
+ This plugin also supports ruby-kafka related parameters. See Buffered output plugin section.
292
+
293
+ ### rdkafka based output plugin
294
+
295
+ This plugin uses `rdkafka` instead of `ruby-kafka` for ruby client.
296
+ You need to install rdkafka gem.
297
+
298
+ # rdkafka is C extension library so need development tools like ruby-devel, gcc and etc
299
+ $ gem install rdkafka --no-document
300
+
301
+ <match kafka.**>
302
+ @type rdkafka
303
+
304
+ default_topic kafka
305
+ flush_interval 1s
306
+ output_data_type json
307
+
308
+ rdkafka_options {
309
+ "log_level" : 7
310
+ }
311
+ </match>
312
+
313
+ ## Contributing
314
+
315
+ 1. Fork it
316
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
317
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
318
+ 4. Push to the branch (`git push origin my-new-feature`)
319
+ 5. Create new Pull Request