fluent-plugin-kafka-xst 0.19.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
  3. data/.github/ISSUE_TEMPLATE/config.yml +5 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
  5. data/.github/dependabot.yml +6 -0
  6. data/.github/workflows/linux.yml +45 -0
  7. data/.github/workflows/stale-actions.yml +24 -0
  8. data/.gitignore +2 -0
  9. data/ChangeLog +344 -0
  10. data/Gemfile +6 -0
  11. data/LICENSE +14 -0
  12. data/README.md +594 -0
  13. data/Rakefile +12 -0
  14. data/ci/prepare-kafka-server.sh +33 -0
  15. data/examples/README.md +3 -0
  16. data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
  17. data/examples/out_kafka2/protobuf-formatter.conf +23 -0
  18. data/examples/out_kafka2/record_key.conf +31 -0
  19. data/fluent-plugin-kafka.gemspec +27 -0
  20. data/lib/fluent/plugin/in_kafka.rb +388 -0
  21. data/lib/fluent/plugin/in_kafka_group.rb +394 -0
  22. data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
  23. data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
  24. data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
  25. data/lib/fluent/plugin/out_kafka.rb +268 -0
  26. data/lib/fluent/plugin/out_kafka2.rb +427 -0
  27. data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
  28. data/lib/fluent/plugin/out_rdkafka.rb +324 -0
  29. data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
  30. data/test/helper.rb +34 -0
  31. data/test/plugin/test_in_kafka.rb +66 -0
  32. data/test/plugin/test_in_kafka_group.rb +69 -0
  33. data/test/plugin/test_kafka_plugin_util.rb +44 -0
  34. data/test/plugin/test_out_kafka.rb +68 -0
  35. data/test/plugin/test_out_kafka2.rb +138 -0
  36. data/test/plugin/test_out_kafka_buffered.rb +68 -0
  37. data/test/plugin/test_out_rdkafka2.rb +182 -0
  38. metadata +214 -0
@@ -0,0 +1,23 @@
1
+ <source>
2
+ @type sample
3
+ sample {"hello": "world", "some_record":{"event":"message"}}
4
+ rate 7000
5
+ tag sample.hello.world
6
+ </source>
7
+
8
+ <match sample.**>
9
+ @type kafka2
10
+
11
+ brokers "broker:29092"
12
+
13
+ record_key "some_record"
14
+ default_topic "events"
15
+
16
+ <format>
17
+ # requires the fluent-plugin-formatter-protobuf gem
18
+ # see its docs for full usage
19
+ @type protobuf
20
+ class_name SomeRecord
21
+ include_paths ["/opt/fluent-plugin-formatter-protobuf/some_record_pb.rb"]
22
+ </format>
23
+ </match>
@@ -0,0 +1,31 @@
1
+ <source>
2
+ @type sample
3
+ sample {"hello": "world", "some_record":{"event":"message"}}
4
+ rate 7000
5
+ tag sample.hello.world
6
+ </source>
7
+
8
+ <match sample.**>
9
+ @type kafka2
10
+
11
+ brokers "broker:29092"
12
+
13
+ # {"event": "message"} will be formatted and sent to Kafka
14
+ record_key "some_record"
15
+
16
+ default_topic "events"
17
+
18
+ <format>
19
+ @type json
20
+ </format>
21
+
22
+ <buffer>
23
+ flush_at_shutdown true
24
+ flush_mode interval
25
+ flush_interval 1s
26
+ chunk_limit_size 3MB
27
+ chunk_full_threshold 1
28
+ total_limit_size 1024MB
29
+ overflow_action block
30
+ </buffer>
31
+ </match>
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ["Hidemasa Togashi", "Masahiro Nakagawa"]
5
+ gem.email = ["togachiro@gmail.com", "repeatedly@gmail.com"]
6
+ gem.description = %q{Fluentd plugin for Apache Kafka > 0.8}
7
+ gem.summary = %q{Fluentd plugin for Apache Kafka > 0.8}
8
+ gem.homepage = "https://github.com/fluent/fluent-plugin-kafka"
9
+ gem.license = "Apache-2.0"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "fluent-plugin-kafka-xst"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = '0.19.1'
17
+ gem.required_ruby_version = ">= 2.1.0"
18
+
19
+ gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
20
+ gem.add_dependency 'ltsv'
21
+ gem.add_dependency 'ruby-kafka', '>= 1.5.0', '< 2'
22
+ gem.add_development_dependency "rake", ">= 0.9.2"
23
+ gem.add_development_dependency "test-unit", ">= 3.0.8"
24
+ gem.add_development_dependency "test-unit-rr", "~> 1.0"
25
+ gem.add_development_dependency "webrick"
26
+ gem.add_development_dependency "digest-murmurhash"
27
+ end
@@ -0,0 +1,388 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka', self)
7
+
8
+ config_param :format, :string, :default => 'json',
9
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
10
+ config_param :message_key, :string, :default => 'message',
11
+ :desc => "For 'text' format only."
12
+ config_param :host, :string, :default => nil,
13
+ :desc => "Broker host"
14
+ config_param :port, :integer, :default => nil,
15
+ :desc => "Broker port"
16
+ config_param :brokers, :string, :default => 'localhost:9092',
17
+ :desc => "List of broker-host:port, separate with comma, must set."
18
+ config_param :interval, :integer, :default => 1, # seconds
19
+ :desc => "Interval (Unit: seconds)"
20
+ config_param :topics, :string, :default => nil,
21
+ :desc => "Listening topics(separate with comma',')"
22
+ config_param :client_id, :string, :default => 'kafka'
23
+ config_param :sasl_over_ssl, :bool, :default => true,
24
+ :desc => "Set to false to prevent SSL strict mode when using SASL authentication"
25
+ config_param :partition, :integer, :default => 0,
26
+ :desc => "Listening partition"
27
+ config_param :offset, :integer, :default => -1,
28
+ :desc => "Listening start offset"
29
+ config_param :add_prefix, :string, :default => nil,
30
+ :desc => "Tag prefix"
31
+ config_param :add_suffix, :string, :default => nil,
32
+ :desc => "tag suffix"
33
+ config_param :add_offset_in_record, :bool, :default => false
34
+ config_param :tag_source, :enum, :list => [:topic, :record], :default => :topic,
35
+ :desc => "Source for the fluentd event tag"
36
+ config_param :record_tag_key, :string, :default => 'tag',
37
+ :desc => "Tag field when tag_source is 'record'"
38
+
39
+ config_param :offset_zookeeper, :string, :default => nil
40
+ config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
41
+ config_param :use_record_time, :bool, :default => false,
42
+ :desc => "Replace message timestamp with contents of 'time' field.",
43
+ :deprecated => "Use 'time_source record' instead."
44
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
45
+ :desc => "Source for message timestamp."
46
+ config_param :record_time_key, :string, :default => 'time',
47
+ :desc => "Time field when time_source is 'record'"
48
+ config_param :get_kafka_client_log, :bool, :default => false
49
+ config_param :time_format, :string, :default => nil,
50
+ :desc => "Time format to be used to parse 'time' field."
51
+ config_param :kafka_message_key, :string, :default => nil,
52
+ :desc => "Set kafka's message key to this field"
53
+
54
+ # Kafka#fetch_messages options
55
+ config_param :max_bytes, :integer, :default => nil,
56
+ :desc => "Maximum number of bytes to fetch."
57
+ config_param :max_wait_time, :integer, :default => nil,
58
+ :desc => "How long to block until the server sends us data."
59
+ config_param :min_bytes, :integer, :default => nil,
60
+ :desc => "Smallest amount of data the server should send us."
61
+
62
+ include Fluent::KafkaPluginUtil::SSLSettings
63
+ include Fluent::KafkaPluginUtil::SaslSettings
64
+
65
+ unless method_defined?(:router)
66
+ define_method("router") { Fluent::Engine }
67
+ end
68
+
69
+ def initialize
70
+ super
71
+ require 'kafka'
72
+
73
+ @time_parser = nil
74
+ @zookeeper = nil
75
+ end
76
+
77
+ def configure(conf)
78
+ super
79
+
80
+ @topic_list = []
81
+ if @topics
82
+ @topic_list = @topics.split(',').map { |topic|
83
+ TopicEntry.new(topic.strip, @partition, @offset)
84
+ }
85
+ else
86
+ conf.elements.select { |element| element.name == 'topic' }.each do |element|
87
+ unless element.has_key?('topic')
88
+ raise Fluent::ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
89
+ end
90
+ partition = element.has_key?('partition') ? element['partition'].to_i : 0
91
+ offset = element.has_key?('offset') ? element['offset'].to_i : -1
92
+ @topic_list.push(TopicEntry.new(element['topic'], partition, offset))
93
+ end
94
+ end
95
+
96
+ if @topic_list.empty?
97
+ raise Fluent::ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
98
+ end
99
+
100
+ # For backward compatibility
101
+ @brokers = case
102
+ when @host && @port
103
+ ["#{@host}:#{@port}"]
104
+ when @host
105
+ ["#{@host}:9092"]
106
+ when @port
107
+ ["localhost:#{@port}"]
108
+ else
109
+ @brokers
110
+ end
111
+
112
+ if conf['max_wait_ms']
113
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
114
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
115
+ end
116
+
117
+ @max_wait_time = @interval if @max_wait_time.nil?
118
+
119
+ require 'zookeeper' if @offset_zookeeper
120
+
121
+ @parser_proc = setup_parser(conf)
122
+
123
+ @time_source = :record if @use_record_time
124
+
125
+ if @time_source == :record and @time_format
126
+ if defined?(Fluent::TimeParser)
127
+ @time_parser = Fluent::TimeParser.new(@time_format)
128
+ else
129
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
130
+ end
131
+ end
132
+ end
133
+
134
+ def setup_parser(conf)
135
+ case @format
136
+ when 'json'
137
+ begin
138
+ require 'oj'
139
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
140
+ Proc.new { |msg, te|
141
+ r = Oj.load(msg.value)
142
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
143
+ r
144
+ }
145
+ rescue LoadError
146
+ require 'yajl'
147
+ Proc.new { |msg, te|
148
+ r = Yajl::Parser.parse(msg.value)
149
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
150
+ r
151
+ }
152
+ end
153
+ when 'ltsv'
154
+ require 'ltsv'
155
+ Proc.new { |msg, te|
156
+ r = LTSV.parse(msg.value, {:symbolize_keys => false}).first
157
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
158
+ r
159
+ }
160
+ when 'msgpack'
161
+ require 'msgpack'
162
+ Proc.new { |msg, te|
163
+ r = MessagePack.unpack(msg.value)
164
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
165
+ r
166
+ }
167
+ when 'text'
168
+ Proc.new { |msg, te|
169
+ r = {@message_key => msg.value}
170
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
171
+ r
172
+ }
173
+ else
174
+ @custom_parser = Fluent::Plugin.new_parser(conf['format'])
175
+ @custom_parser.configure(conf)
176
+ Proc.new { |msg|
177
+ @custom_parser.parse(msg.value) {|_time, record|
178
+ record
179
+ }
180
+ }
181
+ end
182
+ end
183
+
184
+ def add_offset_in_hash(hash, te, offset)
185
+ hash['kafka_topic'.freeze] = te.topic
186
+ hash['kafka_partition'.freeze] = te.partition
187
+ hash['kafka_offset'.freeze] = offset
188
+ end
189
+
190
+ def start
191
+ super
192
+
193
+ @loop = Coolio::Loop.new
194
+ opt = {}
195
+ opt[:max_bytes] = @max_bytes if @max_bytes
196
+ opt[:max_wait_time] = @max_wait_time if @max_wait_time
197
+ opt[:min_bytes] = @min_bytes if @min_bytes
198
+
199
+ logger = @get_kafka_client_log ? log : nil
200
+ if @scram_mechanism != nil && @username != nil && @password != nil
201
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
202
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
203
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
204
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
205
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
206
+ elsif @username != nil && @password != nil
207
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
208
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
209
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
210
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password,
211
+ sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
212
+ else
213
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
214
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
215
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
216
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
217
+ ssl_verify_hostname: @ssl_verify_hostname)
218
+ end
219
+
220
+ @zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
221
+
222
+ @topic_watchers = @topic_list.map {|topic_entry|
223
+ offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
224
+ TopicWatcher.new(
225
+ topic_entry,
226
+ @kafka,
227
+ interval,
228
+ @parser_proc,
229
+ @add_prefix,
230
+ @add_suffix,
231
+ offset_manager,
232
+ router,
233
+ @kafka_message_key,
234
+ @time_source,
235
+ @record_time_key,
236
+ @tag_source,
237
+ @record_tag_key,
238
+ opt)
239
+ }
240
+ @topic_watchers.each {|tw|
241
+ tw.attach(@loop)
242
+ }
243
+ @thread = Thread.new(&method(:run))
244
+ end
245
+
246
+ def shutdown
247
+ @loop.stop
248
+ @zookeeper.close! if @zookeeper
249
+ @thread.join
250
+ @kafka.close
251
+ super
252
+ end
253
+
254
+ def run
255
+ @loop.run
256
+ rescue => e
257
+ $log.error "unexpected error", :error => e.to_s
258
+ $log.error_backtrace
259
+ end
260
+
261
+ class TopicWatcher < Coolio::TimerWatcher
262
+ def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, time_source, record_time_key, tag_source, record_tag_key, options={})
263
+ @topic_entry = topic_entry
264
+ @kafka = kafka
265
+ @callback = method(:consume)
266
+ @parser = parser
267
+ @add_prefix = add_prefix
268
+ @add_suffix = add_suffix
269
+ @options = options
270
+ @offset_manager = offset_manager
271
+ @router = router
272
+ @kafka_message_key = kafka_message_key
273
+ @time_source = time_source
274
+ @record_time_key = record_time_key
275
+ @tag_source = tag_source
276
+ @record_tag_key = record_tag_key
277
+
278
+ @next_offset = @topic_entry.offset
279
+ if @topic_entry.offset == -1 && offset_manager
280
+ @next_offset = offset_manager.next_offset
281
+ end
282
+ @fetch_args = {
283
+ topic: @topic_entry.topic,
284
+ partition: @topic_entry.partition,
285
+ }.merge(@options)
286
+
287
+ super(interval, true)
288
+ end
289
+
290
+ def on_timer
291
+ @callback.call
292
+ rescue => e
293
+ # TODO log?
294
+ $log.error e.to_s
295
+ $log.error_backtrace
296
+ end
297
+
298
+ def consume
299
+ offset = @next_offset
300
+ @fetch_args[:offset] = offset
301
+ messages = @kafka.fetch_messages(**@fetch_args)
302
+
303
+ return if messages.size.zero?
304
+
305
+ es = Fluent::MultiEventStream.new
306
+ tag = @topic_entry.topic
307
+ tag = @add_prefix + "." + tag if @add_prefix
308
+ tag = tag + "." + @add_suffix if @add_suffix
309
+
310
+ messages.each { |msg|
311
+ begin
312
+ record = @parser.call(msg, @topic_entry)
313
+ if @tag_source == :record
314
+ tag = record[@record_tag_key]
315
+ tag = @add_prefix + "." + tag if @add_prefix
316
+ tag = tag + "." + @add_suffix if @add_suffix
317
+ end
318
+ case @time_source
319
+ when :kafka
320
+ record_time = Fluent::EventTime.from_time(msg.create_time)
321
+ when :now
322
+ record_time = Fluent::Engine.now
323
+ when :record
324
+ if @time_format
325
+ record_time = @time_parser.parse(record[@record_time_key])
326
+ else
327
+ record_time = record[@record_time_key]
328
+ end
329
+ else
330
+ $log.fatal "BUG: invalid time_source: #{@time_source}"
331
+ end
332
+ if @kafka_message_key
333
+ record[@kafka_message_key] = msg.key
334
+ end
335
+ es.add(record_time, record)
336
+ rescue => e
337
+ $log.warn "parser error in #{@topic_entry.topic}/#{@topic_entry.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
338
+ $log.debug_backtrace
339
+ end
340
+ }
341
+ offset = messages.last.offset + 1
342
+
343
+ unless es.empty?
344
+ @router.emit_stream(tag, es)
345
+
346
+ if @offset_manager
347
+ @offset_manager.save_offset(offset)
348
+ end
349
+ @next_offset = offset
350
+ end
351
+ end
352
+ end
353
+
354
+ class TopicEntry
355
+ def initialize(topic, partition, offset)
356
+ @topic = topic
357
+ @partition = partition
358
+ @offset = offset
359
+ end
360
+ attr_reader :topic, :partition, :offset
361
+ end
362
+
363
+ class OffsetManager
364
+ def initialize(topic_entry, zookeeper, zk_root_node)
365
+ @zookeeper = zookeeper
366
+ @zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
367
+ create_node(@zk_path, topic_entry.topic, topic_entry.partition)
368
+ end
369
+
370
+ def create_node(zk_path, topic, partition)
371
+ path = ""
372
+ zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
373
+ path = path + dir
374
+ @zookeeper.create(:path => "#{path}")
375
+ }
376
+ $log.trace "use zk offset node : #{path}"
377
+ end
378
+
379
+ def next_offset
380
+ @zookeeper.get(:path => @zk_path)[:data].to_i
381
+ end
382
+
383
+ def save_offset(offset)
384
+ @zookeeper.set(:path => @zk_path, :data => offset.to_s)
385
+ $log.trace "update zk offset node : #{offset.to_s}"
386
+ end
387
+ end
388
+ end