fluent-plugin-kafka-xst 0.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
  3. data/.github/ISSUE_TEMPLATE/config.yml +5 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
  5. data/.github/dependabot.yml +6 -0
  6. data/.github/workflows/linux.yml +45 -0
  7. data/.github/workflows/stale-actions.yml +24 -0
  8. data/.gitignore +2 -0
  9. data/ChangeLog +344 -0
  10. data/Gemfile +6 -0
  11. data/LICENSE +14 -0
  12. data/README.md +594 -0
  13. data/Rakefile +12 -0
  14. data/ci/prepare-kafka-server.sh +33 -0
  15. data/examples/README.md +3 -0
  16. data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
  17. data/examples/out_kafka2/protobuf-formatter.conf +23 -0
  18. data/examples/out_kafka2/record_key.conf +31 -0
  19. data/fluent-plugin-kafka.gemspec +27 -0
  20. data/lib/fluent/plugin/in_kafka.rb +388 -0
  21. data/lib/fluent/plugin/in_kafka_group.rb +394 -0
  22. data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
  23. data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
  24. data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
  25. data/lib/fluent/plugin/out_kafka.rb +268 -0
  26. data/lib/fluent/plugin/out_kafka2.rb +427 -0
  27. data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
  28. data/lib/fluent/plugin/out_rdkafka.rb +324 -0
  29. data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
  30. data/test/helper.rb +34 -0
  31. data/test/plugin/test_in_kafka.rb +66 -0
  32. data/test/plugin/test_in_kafka_group.rb +69 -0
  33. data/test/plugin/test_kafka_plugin_util.rb +44 -0
  34. data/test/plugin/test_out_kafka.rb +68 -0
  35. data/test/plugin/test_out_kafka2.rb +138 -0
  36. data/test/plugin/test_out_kafka_buffered.rb +68 -0
  37. data/test/plugin/test_out_rdkafka2.rb +182 -0
  38. metadata +214 -0
@@ -0,0 +1,23 @@
1
+ <source>
2
+ @type sample
3
+ sample {"hello": "world", "some_record":{"event":"message"}}
4
+ rate 7000
5
+ tag sample.hello.world
6
+ </source>
7
+
8
+ <match sample.**>
9
+ @type kafka2
10
+
11
+ brokers "broker:29092"
12
+
13
+ record_key "some_record"
14
+ default_topic "events"
15
+
16
+ <format>
17
+ # requires the fluent-plugin-formatter-protobuf gem
18
+ # see its docs for full usage
19
+ @type protobuf
20
+ class_name SomeRecord
21
+ include_paths ["/opt/fluent-plugin-formatter-protobuf/some_record_pb.rb"]
22
+ </format>
23
+ </match>
@@ -0,0 +1,31 @@
1
+ <source>
2
+ @type sample
3
+ sample {"hello": "world", "some_record":{"event":"message"}}
4
+ rate 7000
5
+ tag sample.hello.world
6
+ </source>
7
+
8
+ <match sample.**>
9
+ @type kafka2
10
+
11
+ brokers "broker:29092"
12
+
13
+ # {"event": "message"} will be formatted and sent to Kafka
14
+ record_key "some_record"
15
+
16
+ default_topic "events"
17
+
18
+ <format>
19
+ @type json
20
+ </format>
21
+
22
+ <buffer>
23
+ flush_at_shutdown true
24
+ flush_mode interval
25
+ flush_interval 1s
26
+ chunk_limit_size 3MB
27
+ chunk_full_threshold 1
28
+ total_limit_size 1024MB
29
+ overflow_action block
30
+ </buffer>
31
+ </match>
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ["Hidemasa Togashi", "Masahiro Nakagawa"]
5
+ gem.email = ["togachiro@gmail.com", "repeatedly@gmail.com"]
6
+ gem.description = %q{Fluentd plugin for Apache Kafka > 0.8}
7
+ gem.summary = %q{Fluentd plugin for Apache Kafka > 0.8}
8
+ gem.homepage = "https://github.com/fluent/fluent-plugin-kafka"
9
+ gem.license = "Apache-2.0"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "fluent-plugin-kafka-xst"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = '0.19.1'
17
+ gem.required_ruby_version = ">= 2.1.0"
18
+
19
+ gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
20
+ gem.add_dependency 'ltsv'
21
+ gem.add_dependency 'ruby-kafka', '>= 1.5.0', '< 2'
22
+ gem.add_development_dependency "rake", ">= 0.9.2"
23
+ gem.add_development_dependency "test-unit", ">= 3.0.8"
24
+ gem.add_development_dependency "test-unit-rr", "~> 1.0"
25
+ gem.add_development_dependency "webrick"
26
+ gem.add_development_dependency "digest-murmurhash"
27
+ end
@@ -0,0 +1,388 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka', self)
7
+
8
+ config_param :format, :string, :default => 'json',
9
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
10
+ config_param :message_key, :string, :default => 'message',
11
+ :desc => "For 'text' format only."
12
+ config_param :host, :string, :default => nil,
13
+ :desc => "Broker host"
14
+ config_param :port, :integer, :default => nil,
15
+ :desc => "Broker port"
16
+ config_param :brokers, :string, :default => 'localhost:9092',
17
+ :desc => "List of broker-host:port, separate with comma, must set."
18
+ config_param :interval, :integer, :default => 1, # seconds
19
+ :desc => "Interval (Unit: seconds)"
20
+ config_param :topics, :string, :default => nil,
21
+ :desc => "Listening topics(separate with comma',')"
22
+ config_param :client_id, :string, :default => 'kafka'
23
+ config_param :sasl_over_ssl, :bool, :default => true,
24
+ :desc => "Set to false to prevent SSL strict mode when using SASL authentication"
25
+ config_param :partition, :integer, :default => 0,
26
+ :desc => "Listening partition"
27
+ config_param :offset, :integer, :default => -1,
28
+ :desc => "Listening start offset"
29
+ config_param :add_prefix, :string, :default => nil,
30
+ :desc => "Tag prefix"
31
+ config_param :add_suffix, :string, :default => nil,
32
+ :desc => "tag suffix"
33
+ config_param :add_offset_in_record, :bool, :default => false
34
+ config_param :tag_source, :enum, :list => [:topic, :record], :default => :topic,
35
+ :desc => "Source for the fluentd event tag"
36
+ config_param :record_tag_key, :string, :default => 'tag',
37
+ :desc => "Tag field when tag_source is 'record'"
38
+
39
+ config_param :offset_zookeeper, :string, :default => nil
40
+ config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
41
+ config_param :use_record_time, :bool, :default => false,
42
+ :desc => "Replace message timestamp with contents of 'time' field.",
43
+ :deprecated => "Use 'time_source record' instead."
44
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
45
+ :desc => "Source for message timestamp."
46
+ config_param :record_time_key, :string, :default => 'time',
47
+ :desc => "Time field when time_source is 'record'"
48
+ config_param :get_kafka_client_log, :bool, :default => false
49
+ config_param :time_format, :string, :default => nil,
50
+ :desc => "Time format to be used to parse 'time' field."
51
+ config_param :kafka_message_key, :string, :default => nil,
52
+ :desc => "Set kafka's message key to this field"
53
+
54
+ # Kafka#fetch_messages options
55
+ config_param :max_bytes, :integer, :default => nil,
56
+ :desc => "Maximum number of bytes to fetch."
57
+ config_param :max_wait_time, :integer, :default => nil,
58
+ :desc => "How long to block until the server sends us data."
59
+ config_param :min_bytes, :integer, :default => nil,
60
+ :desc => "Smallest amount of data the server should send us."
61
+
62
+ include Fluent::KafkaPluginUtil::SSLSettings
63
+ include Fluent::KafkaPluginUtil::SaslSettings
64
+
65
+ unless method_defined?(:router)
66
+ define_method("router") { Fluent::Engine }
67
+ end
68
+
69
+ def initialize
70
+ super
71
+ require 'kafka'
72
+
73
+ @time_parser = nil
74
+ @zookeeper = nil
75
+ end
76
+
77
+ def configure(conf)
78
+ super
79
+
80
+ @topic_list = []
81
+ if @topics
82
+ @topic_list = @topics.split(',').map { |topic|
83
+ TopicEntry.new(topic.strip, @partition, @offset)
84
+ }
85
+ else
86
+ conf.elements.select { |element| element.name == 'topic' }.each do |element|
87
+ unless element.has_key?('topic')
88
+ raise Fluent::ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
89
+ end
90
+ partition = element.has_key?('partition') ? element['partition'].to_i : 0
91
+ offset = element.has_key?('offset') ? element['offset'].to_i : -1
92
+ @topic_list.push(TopicEntry.new(element['topic'], partition, offset))
93
+ end
94
+ end
95
+
96
+ if @topic_list.empty?
97
+ raise Fluent::ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
98
+ end
99
+
100
+ # For backward compatibility
101
+ @brokers = case
102
+ when @host && @port
103
+ ["#{@host}:#{@port}"]
104
+ when @host
105
+ ["#{@host}:9092"]
106
+ when @port
107
+ ["localhost:#{@port}"]
108
+ else
109
+ @brokers
110
+ end
111
+
112
+ if conf['max_wait_ms']
113
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
114
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
115
+ end
116
+
117
+ @max_wait_time = @interval if @max_wait_time.nil?
118
+
119
+ require 'zookeeper' if @offset_zookeeper
120
+
121
+ @parser_proc = setup_parser(conf)
122
+
123
+ @time_source = :record if @use_record_time
124
+
125
+ if @time_source == :record and @time_format
126
+ if defined?(Fluent::TimeParser)
127
+ @time_parser = Fluent::TimeParser.new(@time_format)
128
+ else
129
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
130
+ end
131
+ end
132
+ end
133
+
134
+ def setup_parser(conf)
135
+ case @format
136
+ when 'json'
137
+ begin
138
+ require 'oj'
139
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
140
+ Proc.new { |msg, te|
141
+ r = Oj.load(msg.value)
142
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
143
+ r
144
+ }
145
+ rescue LoadError
146
+ require 'yajl'
147
+ Proc.new { |msg, te|
148
+ r = Yajl::Parser.parse(msg.value)
149
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
150
+ r
151
+ }
152
+ end
153
+ when 'ltsv'
154
+ require 'ltsv'
155
+ Proc.new { |msg, te|
156
+ r = LTSV.parse(msg.value, {:symbolize_keys => false}).first
157
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
158
+ r
159
+ }
160
+ when 'msgpack'
161
+ require 'msgpack'
162
+ Proc.new { |msg, te|
163
+ r = MessagePack.unpack(msg.value)
164
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
165
+ r
166
+ }
167
+ when 'text'
168
+ Proc.new { |msg, te|
169
+ r = {@message_key => msg.value}
170
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
171
+ r
172
+ }
173
+ else
174
+ @custom_parser = Fluent::Plugin.new_parser(conf['format'])
175
+ @custom_parser.configure(conf)
176
+ Proc.new { |msg|
177
+ @custom_parser.parse(msg.value) {|_time, record|
178
+ record
179
+ }
180
+ }
181
+ end
182
+ end
183
+
184
+ def add_offset_in_hash(hash, te, offset)
185
+ hash['kafka_topic'.freeze] = te.topic
186
+ hash['kafka_partition'.freeze] = te.partition
187
+ hash['kafka_offset'.freeze] = offset
188
+ end
189
+
190
+ def start
191
+ super
192
+
193
+ @loop = Coolio::Loop.new
194
+ opt = {}
195
+ opt[:max_bytes] = @max_bytes if @max_bytes
196
+ opt[:max_wait_time] = @max_wait_time if @max_wait_time
197
+ opt[:min_bytes] = @min_bytes if @min_bytes
198
+
199
+ logger = @get_kafka_client_log ? log : nil
200
+ if @scram_mechanism != nil && @username != nil && @password != nil
201
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
202
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
203
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
204
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
205
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
206
+ elsif @username != nil && @password != nil
207
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
208
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
209
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
210
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password,
211
+ sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
212
+ else
213
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
214
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
215
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
216
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
217
+ ssl_verify_hostname: @ssl_verify_hostname)
218
+ end
219
+
220
+ @zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
221
+
222
+ @topic_watchers = @topic_list.map {|topic_entry|
223
+ offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
224
+ TopicWatcher.new(
225
+ topic_entry,
226
+ @kafka,
227
+ interval,
228
+ @parser_proc,
229
+ @add_prefix,
230
+ @add_suffix,
231
+ offset_manager,
232
+ router,
233
+ @kafka_message_key,
234
+ @time_source,
235
+ @record_time_key,
236
+ @tag_source,
237
+ @record_tag_key,
238
+ opt)
239
+ }
240
+ @topic_watchers.each {|tw|
241
+ tw.attach(@loop)
242
+ }
243
+ @thread = Thread.new(&method(:run))
244
+ end
245
+
246
+ def shutdown
247
+ @loop.stop
248
+ @zookeeper.close! if @zookeeper
249
+ @thread.join
250
+ @kafka.close
251
+ super
252
+ end
253
+
254
+ def run
255
+ @loop.run
256
+ rescue => e
257
+ $log.error "unexpected error", :error => e.to_s
258
+ $log.error_backtrace
259
+ end
260
+
261
+ class TopicWatcher < Coolio::TimerWatcher
262
+ def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, time_source, record_time_key, tag_source, record_tag_key, options={})
263
+ @topic_entry = topic_entry
264
+ @kafka = kafka
265
+ @callback = method(:consume)
266
+ @parser = parser
267
+ @add_prefix = add_prefix
268
+ @add_suffix = add_suffix
269
+ @options = options
270
+ @offset_manager = offset_manager
271
+ @router = router
272
+ @kafka_message_key = kafka_message_key
273
+ @time_source = time_source
274
+ @record_time_key = record_time_key
275
+ @tag_source = tag_source
276
+ @record_tag_key = record_tag_key
277
+
278
+ @next_offset = @topic_entry.offset
279
+ if @topic_entry.offset == -1 && offset_manager
280
+ @next_offset = offset_manager.next_offset
281
+ end
282
+ @fetch_args = {
283
+ topic: @topic_entry.topic,
284
+ partition: @topic_entry.partition,
285
+ }.merge(@options)
286
+
287
+ super(interval, true)
288
+ end
289
+
290
+ def on_timer
291
+ @callback.call
292
+ rescue => e
293
+ # TODO log?
294
+ $log.error e.to_s
295
+ $log.error_backtrace
296
+ end
297
+
298
+ def consume
299
+ offset = @next_offset
300
+ @fetch_args[:offset] = offset
301
+ messages = @kafka.fetch_messages(**@fetch_args)
302
+
303
+ return if messages.size.zero?
304
+
305
+ es = Fluent::MultiEventStream.new
306
+ tag = @topic_entry.topic
307
+ tag = @add_prefix + "." + tag if @add_prefix
308
+ tag = tag + "." + @add_suffix if @add_suffix
309
+
310
+ messages.each { |msg|
311
+ begin
312
+ record = @parser.call(msg, @topic_entry)
313
+ if @tag_source == :record
314
+ tag = record[@record_tag_key]
315
+ tag = @add_prefix + "." + tag if @add_prefix
316
+ tag = tag + "." + @add_suffix if @add_suffix
317
+ end
318
+ case @time_source
319
+ when :kafka
320
+ record_time = Fluent::EventTime.from_time(msg.create_time)
321
+ when :now
322
+ record_time = Fluent::Engine.now
323
+ when :record
324
+ if @time_format
325
+ record_time = @time_parser.parse(record[@record_time_key])
326
+ else
327
+ record_time = record[@record_time_key]
328
+ end
329
+ else
330
+ $log.fatal "BUG: invalid time_source: #{@time_source}"
331
+ end
332
+ if @kafka_message_key
333
+ record[@kafka_message_key] = msg.key
334
+ end
335
+ es.add(record_time, record)
336
+ rescue => e
337
+ $log.warn "parser error in #{@topic_entry.topic}/#{@topic_entry.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
338
+ $log.debug_backtrace
339
+ end
340
+ }
341
+ offset = messages.last.offset + 1
342
+
343
+ unless es.empty?
344
+ @router.emit_stream(tag, es)
345
+
346
+ if @offset_manager
347
+ @offset_manager.save_offset(offset)
348
+ end
349
+ @next_offset = offset
350
+ end
351
+ end
352
+ end
353
+
354
+ class TopicEntry
355
+ def initialize(topic, partition, offset)
356
+ @topic = topic
357
+ @partition = partition
358
+ @offset = offset
359
+ end
360
+ attr_reader :topic, :partition, :offset
361
+ end
362
+
363
+ class OffsetManager
364
+ def initialize(topic_entry, zookeeper, zk_root_node)
365
+ @zookeeper = zookeeper
366
+ @zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
367
+ create_node(@zk_path, topic_entry.topic, topic_entry.partition)
368
+ end
369
+
370
+ def create_node(zk_path, topic, partition)
371
+ path = ""
372
+ zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
373
+ path = path + dir
374
+ @zookeeper.create(:path => "#{path}")
375
+ }
376
+ $log.trace "use zk offset node : #{path}"
377
+ end
378
+
379
+ def next_offset
380
+ @zookeeper.get(:path => @zk_path)[:data].to_i
381
+ end
382
+
383
+ def save_offset(offset)
384
+ @zookeeper.set(:path => @zk_path, :data => offset.to_s)
385
+ $log.trace "update zk offset node : #{offset.to_s}"
386
+ end
387
+ end
388
+ end