fluent-plugin-kafka-xst 0.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
  3. data/.github/ISSUE_TEMPLATE/config.yml +5 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
  5. data/.github/dependabot.yml +6 -0
  6. data/.github/workflows/linux.yml +45 -0
  7. data/.github/workflows/stale-actions.yml +24 -0
  8. data/.gitignore +2 -0
  9. data/ChangeLog +344 -0
  10. data/Gemfile +6 -0
  11. data/LICENSE +14 -0
  12. data/README.md +594 -0
  13. data/Rakefile +12 -0
  14. data/ci/prepare-kafka-server.sh +33 -0
  15. data/examples/README.md +3 -0
  16. data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
  17. data/examples/out_kafka2/protobuf-formatter.conf +23 -0
  18. data/examples/out_kafka2/record_key.conf +31 -0
  19. data/fluent-plugin-kafka.gemspec +27 -0
  20. data/lib/fluent/plugin/in_kafka.rb +388 -0
  21. data/lib/fluent/plugin/in_kafka_group.rb +394 -0
  22. data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
  23. data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
  24. data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
  25. data/lib/fluent/plugin/out_kafka.rb +268 -0
  26. data/lib/fluent/plugin/out_kafka2.rb +427 -0
  27. data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
  28. data/lib/fluent/plugin/out_rdkafka.rb +324 -0
  29. data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
  30. data/test/helper.rb +34 -0
  31. data/test/plugin/test_in_kafka.rb +66 -0
  32. data/test/plugin/test_in_kafka_group.rb +69 -0
  33. data/test/plugin/test_kafka_plugin_util.rb +44 -0
  34. data/test/plugin/test_out_kafka.rb +68 -0
  35. data/test/plugin/test_out_kafka2.rb +138 -0
  36. data/test/plugin/test_out_kafka_buffered.rb +68 -0
  37. data/test/plugin/test_out_rdkafka2.rb +182 -0
  38. metadata +214 -0
data/README.md ADDED
@@ -0,0 +1,594 @@
1
+ # fluent-plugin-kafka, a plugin for [Fluentd](http://fluentd.org)
2
+
3
+ [![GitHub Actions Status](https://github.com/fluent/fluent-plugin-kafka/actions/workflows/linux.yml/badge.svg)](https://github.com/fluent/fluent-plugin-kafka/actions/workflows/linux.yml)
4
+
5
+
6
+ A fluentd plugin to both consume and produce data for Apache Kafka.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'fluent-plugin-kafka'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install fluent-plugin-kafka --no-document
21
+
22
+ If you want to use zookeeper related parameters, you also need to install zookeeper gem. zookeeper gem includes native extension, so development tools are needed, e.g. ruby-devel, gcc, make and etc.
23
+
24
+ ## Requirements
25
+
26
+ - Ruby 2.1 or later
27
+ - Input plugins work with kafka v0.9 or later
28
+ - Output plugins work with kafka v0.8 or later
29
+
30
+ ## Usage
31
+
32
+ ### Common parameters
33
+
34
+ #### SSL authentication
35
+
36
+ - ssl_ca_cert
37
+ - ssl_client_cert
38
+ - ssl_client_cert_key
39
+ - ssl_client_cert_key_password
40
+ - ssl_ca_certs_from_system
41
+
42
+ Set path to SSL related files. See [Encryption and Authentication using SSL](https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl) for more detail.
43
+
44
+ #### SASL authentication
45
+
46
+ ##### with GSSAPI
47
+
48
+ - principal
49
+ - keytab
50
+
51
+ Set principal and path to keytab for SASL/GSSAPI authentication.
52
+ See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentication-using-sasl) for more details.
53
+
54
+ ##### with Plain/SCRAM
55
+
56
+ - username
57
+ - password
58
+ - scram_mechanism
59
+ - sasl_over_ssl
60
+
61
+ Set username, password, scram_mechanism and sasl_over_ssl for SASL/Plain or Scram authentication.
62
+ See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentication-using-sasl) for more details.
63
+
64
+ ### Input plugin (@type 'kafka')
65
+
66
+ Consume events by single consumer.
67
+
68
+ <source>
69
+ @type kafka
70
+
71
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
72
+ topics <listening topics(separate with comma',')>
73
+ format <input text type (text|json|ltsv|msgpack)> :default => json
74
+ message_key <key (Optional, for text format only, default is message)>
75
+ add_prefix <tag prefix (Optional)>
76
+ add_suffix <tag suffix (Optional)>
77
+
78
+ # Optionally, you can manage topic offset by using zookeeper
79
+ offset_zookeeper <zookeer node list (<zookeeper1_host>:<zookeeper1_port>,<zookeeper2_host>:<zookeeper2_port>,..)>
80
+ offset_zk_root_node <offset path in zookeeper> default => '/fluent-plugin-kafka'
81
+
82
+ # ruby-kafka consumer options
83
+ max_bytes (integer) :default => nil (Use default of ruby-kafka)
84
+ max_wait_time (integer) :default => nil (Use default of ruby-kafka)
85
+ min_bytes (integer) :default => nil (Use default of ruby-kafka)
86
+ </source>
87
+
88
+ Supports a start of processing from the assigned offset for specific topics.
89
+
90
+ <source>
91
+ @type kafka
92
+
93
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
94
+ format <input text type (text|json|ltsv|msgpack)>
95
+ <topic>
96
+ topic <listening topic>
97
+ partition <listening partition: default=0>
98
+ offset <listening start offset: default=-1>
99
+ </topic>
100
+ <topic>
101
+ topic <listening topic>
102
+ partition <listening partition: default=0>
103
+ offset <listening start offset: default=-1>
104
+ </topic>
105
+ </source>
106
+
107
+ See also [ruby-kafka README](https://github.com/zendesk/ruby-kafka#consuming-messages-from-kafka) for more detailed documentation about ruby-kafka.
108
+
109
+ Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameters. With `add_prefix kafka`, the tag is `kafka.app_event`.
110
+
111
+ ### Input plugin (@type 'kafka_group', supports kafka group)
112
+
113
+ Consume events by kafka consumer group features..
114
+
115
+ <source>
116
+ @type kafka_group
117
+
118
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
119
+ consumer_group <consumer group name, must set>
120
+ topics <listening topics(separate with comma',')>
121
+ format <input text type (text|json|ltsv|msgpack)> :default => json
122
+ message_key <key (Optional, for text format only, default is message)>
123
+ kafka_message_key <key (Optional, If specified, set kafka's message key to this key)>
124
+ add_headers <If true, add kafka's message headers to record>
125
+ add_prefix <tag prefix (Optional)>
126
+ add_suffix <tag suffix (Optional)>
127
+ retry_emit_limit <Wait retry_emit_limit x 1s when BuffereQueueLimitError happens. The default is nil and it means waiting until BufferQueueLimitError is resolved>
128
+ use_record_time (Deprecated. Use 'time_source record' instead.) <If true, replace event time with contents of 'time' field of fetched record>
129
+ time_source <source for message timestamp (now|kafka|record)> :default => now
130
+ time_format <string (Optional when use_record_time is used)>
131
+
132
+ # ruby-kafka consumer options
133
+ max_bytes (integer) :default => 1048576
134
+ max_wait_time (integer) :default => nil (Use default of ruby-kafka)
135
+ min_bytes (integer) :default => nil (Use default of ruby-kafka)
136
+ offset_commit_interval (integer) :default => nil (Use default of ruby-kafka)
137
+ offset_commit_threshold (integer) :default => nil (Use default of ruby-kafka)
138
+ fetcher_max_queue_size (integer) :default => nil (Use default of ruby-kafka)
139
+ refresh_topic_interval (integer) :default => nil (Use default of ruby-kafka)
140
+ start_from_beginning (bool) :default => true
141
+ </source>
142
+
143
+ See also [ruby-kafka README](https://github.com/zendesk/ruby-kafka#consuming-messages-from-kafka) for more detailed documentation about ruby-kafka options.
144
+
145
+ `topics` supports regex pattern since v0.13.1. If you want to use regex pattern, use `/pattern/` like `/foo.*/`.
146
+
147
+ Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
148
+
149
+ ### Input plugin (@type 'rdkafka_group', supports kafka consumer groups, uses rdkafka-ruby)
150
+
151
+ :warning: **The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!**
152
+
153
+ With the introduction of the rdkafka-ruby based input plugin we hope to support Kafka brokers above version 2.1 where we saw [compatibility issues](https://github.com/fluent/fluent-plugin-kafka/issues/315) when using the ruby-kafka based @kafka_group input type. The rdkafka-ruby lib wraps the highly performant and production ready librdkafka C lib.
154
+
155
+ <source>
156
+ @type rdkafka_group
157
+ topics <listening topics(separate with comma',')>
158
+ format <input text type (text|json|ltsv|msgpack)> :default => json
159
+ message_key <key (Optional, for text format only, default is message)>
160
+ kafka_message_key <key (Optional, If specified, set kafka's message key to this key)>
161
+ add_headers <If true, add kafka's message headers to record>
162
+ add_prefix <tag prefix (Optional)>
163
+ add_suffix <tag suffix (Optional)>
164
+ retry_emit_limit <Wait retry_emit_limit x 1s when BuffereQueueLimitError happens. The default is nil and it means waiting until BufferQueueLimitError is resolved>
165
+ use_record_time (Deprecated. Use 'time_source record' instead.) <If true, replace event time with contents of 'time' field of fetched record>
166
+ time_source <source for message timestamp (now|kafka|record)> :default => now
167
+ time_format <string (Optional when use_record_time is used)>
168
+
169
+ # kafka consumer options
170
+ max_wait_time_ms 500
171
+ max_batch_size 10000
172
+ kafka_configs {
173
+ "bootstrap.servers": "brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>",
174
+ "group.id": "<consumer group name>"
175
+ }
176
+ </source>
177
+
178
+ See also [rdkafka-ruby](https://github.com/appsignal/rdkafka-ruby) and [librdkafka](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for more detailed documentation about Kafka consumer options.
179
+
180
+ Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
181
+
182
+ ### Output plugin
183
+
184
+ This `kafka2` plugin is for fluentd v1 or later. This plugin uses `ruby-kafka` producer for writing data.
185
+ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin instead. This will be `out_kafka` plugin in the future.
186
+
187
+ <match app.**>
188
+ @type kafka2
189
+
190
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
191
+
192
+ # Kafka topic, placerholders are supported. Chunk keys are required in the Buffer section inorder for placeholders
193
+ # to work.
194
+ topic (string) :default => nil
195
+ topic_key (string) :default => 'topic'
196
+ partition_key (string) :default => 'partition'
197
+ partition_key_key (string) :default => 'partition_key'
198
+ message_key_key (string) :default => 'message_key'
199
+ default_topic (string) :default => nil
200
+ default_partition_key (string) :default => nil
201
+ record_key (string) :default => nil
202
+ default_message_key (string) :default => nil
203
+ exclude_topic_key (bool) :default => false
204
+ exclude_partition_key (bool) :default => false
205
+ exclude_partition (bool) :default => false
206
+ exclude_message_key (bool) :default => false
207
+ get_kafka_client_log (bool) :default => false
208
+ headers (hash) :default => {}
209
+ headers_from_record (hash) :default => {}
210
+ use_event_time (bool) :default => false
211
+ use_default_for_unknown_topic (bool) :default => false
212
+ discard_kafka_delivery_failed (bool) :default => false (No discard)
213
+ partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
214
+ share_producer (bool) :default => false
215
+
216
+ # If you intend to rely on AWS IAM auth to MSK with long lived credentials
217
+ # https://docs.aws.amazon.com/msk/latest/developerguide/iam-access-control.html
218
+ #
219
+ # For AWS STS support, see status in
220
+ # - https://github.com/zendesk/ruby-kafka/issues/944
221
+ # - https://github.com/zendesk/ruby-kafka/pull/951
222
+ sasl_aws_msk_iam_access_key_id (string) :default => nil
223
+ sasl_aws_msk_iam_secret_key_id (string) :default => nil
224
+ sasl_aws_msk_iam_aws_region (string) :default => nil
225
+
226
+ <format>
227
+ @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
228
+ </format>
229
+
230
+ # Optional. See https://docs.fluentd.org/v/1.0/configuration/inject-section
231
+ <inject>
232
+ tag_key tag
233
+ time_key time
234
+ </inject>
235
+
236
+ # See fluentd document for buffer related parameters: https://docs.fluentd.org/v/1.0/configuration/buffer-section
237
+ # Buffer chunk key should be same with topic_key. If value is not found in the record, default_topic is used.
238
+ <buffer topic>
239
+ flush_interval 10s
240
+ </buffer>
241
+
242
+ # ruby-kafka producer options
243
+ idempotent (bool) :default => false
244
+ sasl_over_ssl (bool) :default => true
245
+ max_send_retries (integer) :default => 1
246
+ required_acks (integer) :default => -1
247
+ ack_timeout (integer) :default => nil (Use default of ruby-kafka)
248
+ compression_codec (string) :default => nil (No compression. Depends on ruby-kafka: https://github.com/zendesk/ruby-kafka#compression)
249
+ </match>
250
+
251
+ The `<formatter name>` in `<format>` uses fluentd's formatter plugins. See [formatter article](https://docs.fluentd.org/v/1.0/formatter).
252
+
253
+ **Note:** Java based Kafka client uses `murmur2` as partitioner function by default. If you want to use same partitioning behavior with fluent-plugin-kafka, change it to `murmur2` instead of `crc32`. Note that for using `murmur2` hash partitioner function, you must install `digest-murmurhash` gem.
254
+
255
+ ruby-kafka sometimes returns `Kafka::DeliveryFailed` error without good information.
256
+ In this case, `get_kafka_client_log` is useful for identifying the error cause.
257
+ ruby-kafka's log is routed to fluentd log so you can see ruby-kafka's log in fluentd logs.
258
+
259
+ Supports following ruby-kafka's producer options.
260
+
261
+ - max_send_retries - default: 2 - Number of times to retry sending of messages to a leader.
262
+ - required_acks - default: -1 - The number of acks required per request. If you need flush performance, set lower value, e.g. 1, 2.
263
+ - ack_timeout - default: nil - How long the producer waits for acks. The unit is seconds.
264
+ - compression_codec - default: nil - The codec the producer uses to compress messages.
265
+ - max_send_limit_bytes - default: nil - Max byte size to send message to avoid MessageSizeTooLarge. For example, if you set 1000000(message.max.bytes in kafka), Message more than 1000000 byes will be dropped.
266
+ - discard_kafka_delivery_failed - default: false - discard the record where [Kafka::DeliveryFailed](http://www.rubydoc.info/gems/ruby-kafka/Kafka/DeliveryFailed) occurred
267
+
268
+ If you want to know about detail of monitoring, see also https://github.com/zendesk/ruby-kafka#monitoring
269
+
270
+ See also [Kafka::Client](http://www.rubydoc.info/gems/ruby-kafka/Kafka/Client) for more detailed documentation about ruby-kafka.
271
+
272
+ This plugin supports compression codec "snappy" also.
273
+ Install snappy module before you use snappy compression.
274
+
275
+ $ gem install snappy --no-document
276
+
277
+ snappy gem uses native extension, so you need to install several packages before.
278
+ On Ubuntu, need development packages and snappy library.
279
+
280
+ $ sudo apt-get install build-essential autoconf automake libtool libsnappy-dev
281
+
282
+ On CentOS 7 installation is also necessary.
283
+
284
+ $ sudo yum install gcc autoconf automake libtool snappy-devel
285
+
286
+ This plugin supports compression codec "lz4" also.
287
+ Install extlz4 module before you use lz4 compression.
288
+
289
+ $ gem install extlz4 --no-document
290
+
291
+ This plugin supports compression codec "zstd" also.
292
+ Install zstd-ruby module before you use zstd compression.
293
+
294
+ $ gem install zstd-ruby --no-document
295
+
296
+ #### Load balancing
297
+
298
+ Messages will be assigned a partition at random as default by ruby-kafka, but messages with the same partition key will always be assigned to the same partition by setting `default_partition_key` in config file.
299
+ If key name `partition_key_key` exists in a message, this plugin set the value of partition_key_key as key.
300
+
301
+ |default_partition_key|partition_key_key| behavior |
302
+ | --- | --- | --- |
303
+ |Not set|Not exists| All messages are assigned a partition at random |
304
+ |Set| Not exists| All messages are assigned to the specific partition |
305
+ |Not set| Exists | Messages which have partition_key_key record are assigned to the specific partition, others are assigned a partition at random |
306
+ |Set| Exists | Messages which have partition_key_key record are assigned to the specific partition with partition_key_key, others are assigned to the specific partition with default_parition_key |
307
+
308
+ If key name `message_key_key` exists in a message, this plugin publishes the value of message_key_key to kafka and can be read by consumers. Same message key will be assigned to all messages by setting `default_message_key` in config file. If message_key_key exists and if partition_key_key is not set explicitly, messsage_key_key will be used for partitioning.
309
+
310
+ #### Headers
311
+ It is possible to set headers on Kafka messages. This only works for kafka2 and rdkafka2 output plugin.
312
+
313
+ The format is like key1:value1,key2:value2. For example:
314
+
315
+ <match app.**>
316
+ @type kafka2
317
+ [...]
318
+ headers some_header_name:some_header_value
319
+ <match>
320
+
321
+ You may set header values based on a value of a fluentd record field. For example, imagine a fluentd record like:
322
+
323
+ {"source": { "ip": "127.0.0.1" }, "payload": "hello world" }
324
+
325
+ And the following fluentd config:
326
+
327
+ <match app.**>
328
+ @type kafka2
329
+ [...]
330
+ headers_from_record source_ip:$.source.ip
331
+ <match>
332
+
333
+ The Kafka message will have a header of source_ip=12.7.0.0.1.
334
+
335
+ The configuration format is jsonpath. It is descibed in https://docs.fluentd.org/plugin-helper-overview/api-plugin-helper-record_accessor
336
+
337
+ #### Excluding fields
338
+ Fields can be excluded from output data. Only works for kafka2 and rdkafka2 output plugin.
339
+
340
+ Fields must be specified using an array of dot notation `$.`, for example:
341
+
342
+ <match app.**>
343
+ @type kafka2
344
+ [...]
345
+ exclude_fields $.source.ip,$.HTTP_FOO
346
+ <match>
347
+
348
+ This config can be used to remove fields used on another configs.
349
+
350
+ For example, `$.source.ip` can be extracted with config `headers_from_record` and excluded from message payload.
351
+
352
+ > Using this config to remove unused fields is discouraged. A [filter plugin](https://docs.fluentd.org/v/0.12/filter) can be used for this purpose.
353
+
354
+ #### Send only a sub field as a message payload
355
+
356
+ If `record_key` is provided, the plugin sends only a sub field given by that key.
357
+ The configuration format is jsonpath.
358
+
359
+ e.g. When the following configuration and the incoming record are given:
360
+
361
+ configuration:
362
+
363
+ <match **>
364
+ @type kafka2
365
+ [...]
366
+ record_key '$.data'
367
+ </match>
368
+
369
+ record:
370
+
371
+ {
372
+ "specversion" : "1.0",
373
+ "type" : "com.example.someevent",
374
+ "id" : "C234-1234-1234",
375
+ "time" : "2018-04-05T17:31:00Z",
376
+ "datacontenttype" : "application/json",
377
+ "data" : {
378
+ "appinfoA" : "abc",
379
+ "appinfoB" : 123,
380
+ "appinfoC" : true
381
+ },
382
+ ...
383
+ }
384
+
385
+ only the `data` field will be serialized by the formatter and sent to Kafka.
386
+ The toplevel `data` key will be removed.
387
+
388
+ ### Buffered output plugin
389
+
390
+ This plugin uses ruby-kafka producer for writing data. This plugin is for v0.12. If you use v1, see `kafka2`.
391
+ Support of fluentd v0.12 has ended. `kafka_buffered` will be an alias of `kafka2` and will be removed in the future.
392
+
393
+ <match app.**>
394
+ @type kafka_buffered
395
+
396
+ # Brokers: you can choose either brokers or zookeeper. If you are not familiar with zookeeper, use brokers parameters.
397
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
398
+ zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
399
+ zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for kafka
400
+
401
+ topic_key (string) :default => 'topic'
402
+ partition_key (string) :default => 'partition'
403
+ partition_key_key (string) :default => 'partition_key'
404
+ message_key_key (string) :default => 'message_key'
405
+ default_topic (string) :default => nil
406
+ default_partition_key (string) :default => nil
407
+ default_message_key (string) :default => nil
408
+ exclude_topic_key (bool) :default => false
409
+ exclude_partition_key (bool) :default => false
410
+ exclude_partition (bool) :default => false
411
+ exclude_message_key (bool) :default => false
412
+ output_data_type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
413
+ output_include_tag (bool) :default => false
414
+ output_include_time (bool) :default => false
415
+ exclude_topic_key (bool) :default => false
416
+ exclude_partition_key (bool) :default => false
417
+ get_kafka_client_log (bool) :default => false
418
+ use_event_time (bool) :default => false
419
+ partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
420
+
421
+ # See fluentd document for buffer related parameters: https://docs.fluentd.org/v/0.12/buffer
422
+
423
+ # ruby-kafka producer options
424
+ idempotent (bool) :default => false
425
+ sasl_over_ssl (bool) :default => true
426
+ max_send_retries (integer) :default => 1
427
+ required_acks (integer) :default => -1
428
+ ack_timeout (integer) :default => nil (Use default of ruby-kafka)
429
+ compression_codec (string) :default => nil (No compression. Depends on ruby-kafka: https://github.com/zendesk/ruby-kafka#compression)
430
+ kafka_agg_max_bytes (integer) :default => 4096
431
+ kafka_agg_max_messages (integer) :default => nil (No limit)
432
+ max_send_limit_bytes (integer) :default => nil (No drop)
433
+ discard_kafka_delivery_failed (bool) :default => false (No discard)
434
+ monitoring_list (array) :default => []
435
+ </match>
436
+
437
+ `kafka_buffered` supports the following `ruby-kafka` parameters:
438
+
439
+ - max_send_retries - default: 2 - Number of times to retry sending of messages to a leader.
440
+ - required_acks - default: -1 - The number of acks required per request. If you need flush performance, set lower value, e.g. 1, 2.
441
+ - ack_timeout - default: nil - How long the producer waits for acks. The unit is seconds.
442
+ - compression_codec - default: nil - The codec the producer uses to compress messages.
443
+ - max_send_limit_bytes - default: nil - Max byte size to send message to avoid MessageSizeTooLarge. For example, if you set 1000000(message.max.bytes in kafka), Message more than 1000000 byes will be dropped.
444
+ - discard_kafka_delivery_failed - default: false - discard the record where [Kafka::DeliveryFailed](http://www.rubydoc.info/gems/ruby-kafka/Kafka/DeliveryFailed) occurred
445
+ - monitoring_list - default: [] - library to be used to monitor. statsd and datadog are supported
446
+
447
+ `kafka_buffered` has two additional parameters:
448
+
449
+ - kafka_agg_max_bytes - default: 4096 - Maximum value of total message size to be included in one batch transmission.
450
+ - kafka_agg_max_messages - default: nil - Maximum number of messages to include in one batch transmission.
451
+
452
+ **Note:** Java based Kafka client uses `murmur2` as partitioner function by default. If you want to use same partitioning behavior with fluent-plugin-kafka, change it to `murmur2` instead of `crc32`. Note that for using `murmur2` hash partitioner function, you must install `digest-murmurhash` gem.
453
+
454
+ ### Non-buffered output plugin
455
+
456
+ This plugin uses ruby-kafka producer for writing data. For performance and reliability concerns, use `kafka_bufferd` output instead. This is mainly for testing.
457
+
458
+ <match app.**>
459
+ @type kafka
460
+
461
+ # Brokers: you can choose either brokers or zookeeper.
462
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
463
+ zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
464
+ zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for kafka
465
+
466
+ default_topic (string) :default => nil
467
+ default_partition_key (string) :default => nil
468
+ default_message_key (string) :default => nil
469
+ output_data_type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
470
+ output_include_tag (bool) :default => false
471
+ output_include_time (bool) :default => false
472
+ exclude_topic_key (bool) :default => false
473
+ exclude_partition_key (bool) :default => false
474
+ partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
475
+
476
+ # ruby-kafka producer options
477
+ max_send_retries (integer) :default => 1
478
+ required_acks (integer) :default => -1
479
+ ack_timeout (integer) :default => nil (Use default of ruby-kafka)
480
+ compression_codec (string) :default => nil (No compression. Depends on ruby-kafka: https://github.com/zendesk/ruby-kafka#compression)
481
+ max_buffer_size (integer) :default => nil (Use default of ruby-kafka)
482
+ max_buffer_bytesize (integer) :default => nil (Use default of ruby-kafka)
483
+ </match>
484
+
485
+ This plugin also supports ruby-kafka related parameters. See Buffered output plugin section.
486
+
487
+ **Note:** Java based Kafka client uses `murmur2` as partitioner function by default. If you want to use same partitioning behavior with fluent-plugin-kafka, change it to `murmur2` instead of `crc32`. Note that for using `murmur2` hash partitioner function, you must install `digest-murmurhash` gem.
488
+
489
+ ### rdkafka based output plugin
490
+
491
+ This plugin uses `rdkafka` instead of `ruby-kafka` for kafka client.
492
+ You need to install rdkafka gem.
493
+
494
+ # rdkafka is C extension library. Need to install development tools like ruby-devel, gcc and etc
495
+ # for v0.12 or later
496
+ $ gem install rdkafka --no-document
497
+ # for v0.11 or earlier
498
+ $ gem install rdkafka -v 0.6.0 --no-document
499
+
500
+ `rdkafka2` is for fluentd v1.0 or later.
501
+
502
+ <match app.**>
503
+ @type rdkafka2
504
+
505
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
506
+
507
+ topic_key (string) :default => 'topic'
508
+ default_topic (string) :default => nil
509
+ partition_key (string) :default => 'partition'
510
+ partition_key_key (string) :default => 'partition_key'
511
+ message_key_key (string) :default => 'message_key'
512
+ default_topic (string) :default => nil
513
+ use_default_for_unknown_topic (bool) :default => false
514
+ use_default_for_unknown_partition_error (bool) :default => false
515
+ default_partition_key (string) :default => nil
516
+ default_message_key (string) :default => nil
517
+ exclude_topic_key (bool) :default => false
518
+ exclude_partition_key (bool) :default => false
519
+ discard_kafka_delivery_failed (bool) :default => false (No discard)
520
+ use_event_time (bool) :default => false
521
+
522
+ # same with kafka2
523
+ headers (hash) :default => {}
524
+ headers_from_record (hash) :default => {}
525
+ record_key (string) :default => nil
526
+
527
+ <format>
528
+ @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
529
+ </format>
530
+
531
+ # Optional. See https://docs.fluentd.org/v/1.0/configuration/inject-section
532
+ <inject>
533
+ tag_key tag
534
+ time_key time
535
+ </inject>
536
+
537
+ # See fluentd document for buffer section parameters: https://docs.fluentd.org/v/1.0/configuration/buffer-section
538
+ # Buffer chunk key should be same with topic_key. If value is not found in the record, default_topic is used.
539
+ <buffer topic>
540
+ flush_interval 10s
541
+ </buffer>
542
+
543
+ # You can set any rdkafka configuration via this parameter: https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
544
+ rdkafka_options {
545
+ "log_level" : 7
546
+ }
547
+
548
+ # rdkafka2 specific parameters
549
+
550
+ # share kafka producer between flush threads. This is mainly for reducing kafka operations like kerberos
551
+ share_producer (bool) :default => false
552
+ # Timeout for polling message wait. If 0, no wait.
553
+ rdkafka_delivery_handle_poll_timeout (integer) :default => 30
554
+ # If the record size is larger than this value, such records are ignored. Default is no limit
555
+ max_send_limit_bytes (integer) :default => nil
556
+ # The maximum number of enqueueing bytes per second. It can reduce the
557
+ # load of both Fluentd and Kafka when excessive messages are attempted
558
+ # to send. Default is no limit.
559
+ max_enqueue_bytes_per_second (integer) :default => nil
560
+ </match>
561
+
562
+ If you use v0.12, use `rdkafka` instead.
563
+
564
+ <match kafka.**>
565
+ @type rdkafka
566
+
567
+ default_topic kafka
568
+ flush_interval 1s
569
+ output_data_type json
570
+
571
+ rdkafka_options {
572
+ "log_level" : 7
573
+ }
574
+ </match>
575
+
576
+ ## FAQ
577
+
578
+ ### Why fluent-plugin-kafka can't send data to our kafka cluster?
579
+
580
+ We got lots of similar questions. Almost cases, this problem happens by version mismatch between ruby-kafka and kafka cluster.
581
+ See ruby-kafka README for more details: https://github.com/zendesk/ruby-kafka#compatibility
582
+
583
+ To avoid the problem, there are 2 approaches:
584
+
585
+ - Upgrade your kafka cluster to latest version. This is better because recent version is faster and robust.
586
+ - Downgrade ruby-kafka/fluent-plugin-kafka to work with your older kafka.
587
+
588
+ ## Contributing
589
+
590
+ 1. Fork it
591
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
592
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
593
+ 4. Push to the branch (`git push origin my-new-feature`)
594
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/**/test_*.rb']
9
+ test.verbose = true
10
+ end
11
+
12
+ task :default => [:build]
@@ -0,0 +1,33 @@
1
+ #!/bin/sh
2
+
3
+ export KAFKA_OPTS=-Dzookeeper.4lw.commands.whitelist=ruok
4
+ /usr/bin/zookeeper-server-start /etc/kafka/zookeeper.properties &
5
+ N_POLLING=30
6
+ n=1
7
+ while true ; do
8
+ sleep 1
9
+ status=$(echo ruok | nc localhost 2181)
10
+ if [ "$status" = "imok" ]; then
11
+ break
12
+ fi
13
+ n=$((n + 1))
14
+ if [ $n -ge $N_POLLING ]; then
15
+ echo "failed to get response from zookeeper-server"
16
+ exit 1
17
+ fi
18
+ done
19
+ /usr/bin/kafka-server-start /etc/kafka/server.properties &
20
+ n=1
21
+ while true ; do
22
+ sleep 1
23
+ status=$(/usr/bin/zookeeper-shell localhost:2181 ls /brokers/ids | sed -n 6p)
24
+ if [ "$status" = "[0]" ]; then
25
+ break
26
+ fi
27
+ n=$((n + 1))
28
+ if [ $n -ge $N_POLLING ]; then
29
+ echo "failed to get response from kafka-server"
30
+ exit 1
31
+ fi
32
+ done
33
+ /usr/bin/kafka-topics --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test
@@ -0,0 +1,3 @@
1
+ # Examples
2
+
3
+ This directory contains example Fluentd config for this plugin
@@ -0,0 +1,32 @@
1
+ <source>
2
+ @type sample
3
+ sample {"hello": "world"}
4
+ rate 7000
5
+ tag sample.hello.world
6
+ </source>
7
+
8
+ <match sample.**>
9
+ @type kafka2
10
+
11
+ brokers "broker:29092"
12
+
13
+ # Writes to topic `events.sample.hello.world`
14
+ topic "events.${tag}"
15
+
16
+ # Writes to topic `hello.world`
17
+ # topic "${tag[1]}.${tag[2]}"
18
+
19
+ <format>
20
+ @type json
21
+ </format>
22
+
23
+ <buffer tag>
24
+ flush_at_shutdown true
25
+ flush_mode interval
26
+ flush_interval 1s
27
+ chunk_limit_size 3MB
28
+ chunk_full_threshold 1
29
+ total_limit_size 1024MB
30
+ overflow_action block
31
+ </buffer>
32
+ </match>