fluent-plugin-kafka 0.14.2 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14cde202b38079778e0d694692f05c19c9576be8622f35a8896df35f33ea9733
4
- data.tar.gz: 80cbc1050e85239dabfe78dbffd97a100c939ec4e9227bd68956a7b3d15aa75e
3
+ metadata.gz: 43c3a759f4636168c932c33f45c38105ebb522b5ea8222f1b1a7eceb53343348
4
+ data.tar.gz: c64a103244e721fa2de124f466f2480c960daafc713fd16f685ea4dd4a545a3d
5
5
  SHA512:
6
- metadata.gz: 5ec7ed5f16a7d78a0dcd5f6eb15ecd94dd47adc7ae77208896a0ede1d341ede1f5a668141e7ac4e5413734105e276bd483f3f2b8c2041d329a4d619a9c469a76
7
- data.tar.gz: 11877dd67f3b0f714b38153368611c8f234b6a96976a00e21117e9523fa5bcde210fd13fa5e22086cf604ee61fc94a3472bc4946c69a6f5623eeace486e7eb75
6
+ metadata.gz: 707d92f2a23041b53daf6410d3fadb0e84053c4eb250b20c6dd3c72a15969273d2279b71950334187d156767bf6646a0af468a0f84e85ca683a34c127e47e363
7
+ data.tar.gz: 978883c8a72152bb6b9262ccea4e6b65b91bca1a3907ea43a7930cf7b4d414f1a9f47cb593d420738a48bd47d86451f376fe4cc6e7dec6b4f2c4e81ad5213d00
data/ChangeLog CHANGED
@@ -1,3 +1,8 @@
1
+ Release 0.15.0 - 2020/09/14
2
+
3
+ * Add experimental `in_rdkafka_group`
4
+ * in_kafka: Expose `ssl_verify_hostname` parameter
5
+
1
6
  Release 0.14.2 - 2020/08/26
2
7
 
3
8
  * in_kafka_group: Add `add_headers` parameter
data/README.md CHANGED
@@ -141,6 +141,39 @@ See also [ruby-kafka README](https://github.com/zendesk/ruby-kafka#consuming-mes
141
141
 
142
142
  Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
143
143
 
144
+ ### Input plugin (@type 'rdkafka_group', supports kafka consumer groups, uses rdkafka-ruby)
145
+
146
+ :warning: **The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!**
147
+
148
+ With the introduction of the rdkafka-ruby based input plugin we hope to support Kafka brokers above version 2.1 where we saw [compatibility issues](https://github.com/fluent/fluent-plugin-kafka/issues/315) when using the ruby-kafka based @kafka_group input type. The rdkafka-ruby lib wraps the highly performant and production ready librdkafka C lib.
149
+
150
+ <source>
151
+ @type rdkafka_group
152
+ topics <listening topics(separate with comma',')>
153
+ format <input text type (text|json|ltsv|msgpack)> :default => json
154
+ message_key <key (Optional, for text format only, default is message)>
155
+ kafka_mesasge_key <key (Optional, If specified, set kafka's message key to this key)>
156
+ add_headers <If true, add kafka's message headers to record>
157
+ add_prefix <tag prefix (Optional)>
158
+ add_suffix <tag suffix (Optional)>
159
+ retry_emit_limit <Wait retry_emit_limit x 1s when BuffereQueueLimitError happens. The default is nil and it means waiting until BufferQueueLimitError is resolved>
160
+ use_record_time (Deprecated. Use 'time_source record' instead.) <If true, replace event time with contents of 'time' field of fetched record>
161
+ time_source <source for message timestamp (now|kafka|record)> :default => now
162
+ time_format <string (Optional when use_record_time is used)>
163
+
164
+ # kafka consumer options
165
+ max_wait_time_ms 500
166
+ max_batch_size 10000
167
+ kafka_configs {
168
+ "bootstrap.servers": "brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>",
169
+ "group.id": "<consumer group name>"
170
+ }
171
+ </source>
172
+
173
+ See also [rdkafka-ruby](https://github.com/appsignal/rdkafka-ruby) and [librdkafka](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for more detailed documentation about Kafka consumer options.
174
+
175
+ Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
176
+
144
177
  ### Output plugin
145
178
 
146
179
  This `kafka2` plugin is for fluentd v1 or later. This plugin uses `ruby-kafka` producer for writing data.
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.14.2'
16
+ gem.version = '0.15.0'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -188,16 +188,17 @@ class Fluent::KafkaInput < Fluent::Input
188
188
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
189
189
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
190
190
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
191
- sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
191
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
192
192
  elsif @username != nil && @password != nil
193
193
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
194
194
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
195
195
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password,
196
- sasl_over_ssl: @sasl_over_ssl)
196
+ sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
197
197
  else
198
198
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
199
199
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
200
- ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
200
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
201
+ ssl_verify_hostname: @ssl_verify_hostname)
201
202
  end
202
203
 
203
204
  @zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
@@ -0,0 +1,284 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+
7
+ class Fluent::Plugin::RdKafkaGroupInput < Fluent::Plugin::Input
8
+ Fluent::Plugin.register_input('rdkafka_group', self)
9
+
10
+ helpers :thread
11
+
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+
15
+ config_param :format, :string, :default => 'json',
16
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
17
+ config_param :message_key, :string, :default => 'message',
18
+ :desc => "For 'text' format only."
19
+ config_param :add_headers, :bool, :default => false,
20
+ :desc => "Add kafka's message headers to event record"
21
+ config_param :add_prefix, :string, :default => nil,
22
+ :desc => "Tag prefix (Optional)"
23
+ config_param :add_suffix, :string, :default => nil,
24
+ :desc => "Tag suffix (Optional)"
25
+ config_param :use_record_time, :bool, :default => false,
26
+ :desc => "Replace message timestamp with contents of 'time' field.",
27
+ :deprecated => "Use 'time_source record' instead."
28
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
29
+ :desc => "Source for message timestamp."
30
+ config_param :record_time_key, :string, :default => 'time',
31
+ :desc => "Time field when time_source is 'record'"
32
+ config_param :time_format, :string, :default => nil,
33
+ :desc => "Time format to be used to parse 'time' field."
34
+ config_param :kafka_message_key, :string, :default => nil,
35
+ :desc => "Set kafka's message key to this field"
36
+
37
+ config_param :retry_emit_limit, :integer, :default => nil,
38
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
39
+ config_param :retry_wait_seconds, :integer, :default => 30
40
+ config_param :disable_retry_limit, :bool, :default => false,
41
+ :desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
42
+ config_param :retry_limit, :integer, :default => 10,
43
+ :desc => "The maximum number of retries for connecting kafka (default: 10)"
44
+
45
+ config_param :max_wait_time_ms, :integer, :default => 250,
46
+ :desc => "How long to block polls in milliseconds until the server sends us data."
47
+ config_param :max_batch_size, :integer, :default => 10000,
48
+ :desc => "Maximum number of log lines emitted in a single batch."
49
+
50
+ config_param :kafka_configs, :hash, :default => {},
51
+ :desc => "Kafka configuration properties as desribed in https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
52
+
53
+ include Fluent::KafkaPluginUtil::SSLSettings
54
+ include Fluent::KafkaPluginUtil::SaslSettings
55
+
56
+ class ForShutdown < StandardError
57
+ end
58
+
59
+ BufferError = Fluent::Plugin::Buffer::BufferOverflowError
60
+
61
+ def initialize
62
+ super
63
+
64
+ @time_parser = nil
65
+ @retry_count = 1
66
+ end
67
+
68
+ def _config_to_array(config)
69
+ config_array = config.split(',').map {|k| k.strip }
70
+ if config_array.empty?
71
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
72
+ end
73
+ config_array
74
+ end
75
+
76
+ def multi_workers_ready?
77
+ true
78
+ end
79
+
80
+ private :_config_to_array
81
+
82
+ def configure(conf)
83
+ super
84
+
85
+ log.warn "The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!"
86
+
87
+ log.info "Will watch for topics #{@topics} at brokers " \
88
+ "#{@kafka_configs["bootstrap.servers"]} and '#{@kafka_configs["group.id"]}' group"
89
+
90
+ @topics = _config_to_array(@topics)
91
+
92
+ @parser_proc = setup_parser
93
+
94
+ @time_source = :record if @use_record_time
95
+
96
+ if @time_source == :record and @time_format
97
+ @time_parser = Fluent::TimeParser.new(@time_format)
98
+ end
99
+ end
100
+
101
+ def setup_parser
102
+ case @format
103
+ when 'json'
104
+ begin
105
+ require 'oj'
106
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
107
+ Proc.new { |msg| Oj.load(msg.payload) }
108
+ rescue LoadError
109
+ require 'yajl'
110
+ Proc.new { |msg| Yajl::Parser.parse(msg.payload) }
111
+ end
112
+ when 'ltsv'
113
+ require 'ltsv'
114
+ Proc.new { |msg| LTSV.parse(msg.payload, {:symbolize_keys => false}).first }
115
+ when 'msgpack'
116
+ require 'msgpack'
117
+ Proc.new { |msg| MessagePack.unpack(msg.payload) }
118
+ when 'text'
119
+ Proc.new { |msg| {@message_key => msg.payload} }
120
+ end
121
+ end
122
+
123
+ def start
124
+ super
125
+
126
+ @consumer = setup_consumer
127
+
128
+ thread_create(:in_rdkafka_group, &method(:run))
129
+ end
130
+
131
+ def shutdown
132
+ # This nil assignment should be guarded by mutex in multithread programming manner.
133
+ # But the situation is very low contention, so we don't use mutex for now.
134
+ # If the problem happens, we will add a guard for consumer.
135
+ consumer = @consumer
136
+ @consumer = nil
137
+ consumer.close
138
+
139
+ super
140
+ end
141
+
142
+ def setup_consumer
143
+ consumer = Rdkafka::Config.new(@kafka_configs).consumer
144
+ consumer.subscribe(*@topics)
145
+ consumer
146
+ end
147
+
148
+ def reconnect_consumer
149
+ log.warn "Stopping Consumer"
150
+ consumer = @consumer
151
+ @consumer = nil
152
+ if consumer
153
+ consumer.close
154
+ end
155
+ log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
156
+ @retry_count = @retry_count + 1
157
+ sleep @retry_wait_seconds
158
+ @consumer = setup_consumer
159
+ log.warn "Re-starting consumer #{Time.now.to_s}"
160
+ @retry_count = 0
161
+ rescue =>e
162
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
163
+ log.error_backtrace
164
+ if @retry_count <= @retry_limit or disable_retry_limit
165
+ reconnect_consumer
166
+ end
167
+ end
168
+
169
+ class Batch
170
+ attr_reader :topic
171
+ attr_reader :messages
172
+
173
+ def initialize(topic)
174
+ @topic = topic
175
+ @messages = []
176
+ end
177
+ end
178
+
179
+ # Executes the passed codeblock on a batch of messages.
180
+ # It is guaranteed that every message in a given batch belongs to the same topic, because the tagging logic in :run expects that property.
181
+ # The number of maximum messages in a batch is capped by the :max_batch_size configuration value. It ensures that consuming from a single
182
+ # topic for a long time (e.g. with `auto.offset.reset` set to `earliest`) does not lead to memory exhaustion. Also, calling consumer.poll
183
+ # advances thes consumer offset, so in case the process crashes we might lose at most :max_batch_size messages.
184
+ def each_batch(&block)
185
+ batch = nil
186
+ message = nil
187
+ while @consumer
188
+ message = @consumer.poll(@max_wait_time_ms)
189
+ if message
190
+ if not batch
191
+ batch = Batch.new(message.topic)
192
+ elsif batch.topic != message.topic || batch.messages.size >= @max_batch_size
193
+ yield batch
194
+ batch = Batch.new(message.topic)
195
+ end
196
+ batch.messages << message
197
+ else
198
+ yield batch if batch
199
+ batch = nil
200
+ end
201
+ end
202
+ yield batch if batch
203
+ end
204
+
205
+ def run
206
+ while @consumer
207
+ begin
208
+ each_batch { |batch|
209
+ log.debug "A new batch for topic #{batch.topic} with #{batch.messages.size} messages"
210
+ es = Fluent::MultiEventStream.new
211
+ tag = batch.topic
212
+ tag = @add_prefix + "." + tag if @add_prefix
213
+ tag = tag + "." + @add_suffix if @add_suffix
214
+
215
+ batch.messages.each { |msg|
216
+ begin
217
+ record = @parser_proc.call(msg)
218
+ case @time_source
219
+ when :kafka
220
+ record_time = Fluent::EventTime.from_time(msg.timestamp)
221
+ when :now
222
+ record_time = Fluent::Engine.now
223
+ when :record
224
+ if @time_format
225
+ record_time = @time_parser.parse(record[@record_time_key].to_s)
226
+ else
227
+ record_time = record[@record_time_key]
228
+ end
229
+ else
230
+ log.fatal "BUG: invalid time_source: #{@time_source}"
231
+ end
232
+ if @kafka_message_key
233
+ record[@kafka_message_key] = msg.key
234
+ end
235
+ if @add_headers
236
+ msg.headers.each_pair { |k, v|
237
+ record[k] = v
238
+ }
239
+ end
240
+ es.add(record_time, record)
241
+ rescue => e
242
+ log.warn "parser error in #{msg.topic}/#{msg.partition}", :error => e.to_s, :value => msg.payload, :offset => msg.offset
243
+ log.debug_backtrace
244
+ end
245
+ }
246
+
247
+ unless es.empty?
248
+ emit_events(tag, es)
249
+ end
250
+ }
251
+ rescue ForShutdown
252
+ rescue => e
253
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
254
+ log.error_backtrace
255
+ reconnect_consumer
256
+ end
257
+ end
258
+ rescue => e
259
+ log.error "unexpected error during consumer object access", :error => e.to_s
260
+ log.error_backtrace
261
+ end
262
+
263
+ def emit_events(tag, es)
264
+ retries = 0
265
+ begin
266
+ router.emit_stream(tag, es)
267
+ rescue BufferError
268
+ raise ForShutdown if @consumer.nil?
269
+
270
+ if @retry_emit_limit.nil?
271
+ sleep 1
272
+ retry
273
+ end
274
+
275
+ if retries < @retry_emit_limit
276
+ retries += 1
277
+ sleep 1
278
+ retry
279
+ else
280
+ raise RuntimeError, "Exceeds retry_emit_limit"
281
+ end
282
+ end
283
+ end
284
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.2
4
+ version: 0.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-08-26 00:00:00.000000000 Z
12
+ date: 2020-09-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -111,6 +111,7 @@ files:
111
111
  - fluent-plugin-kafka.gemspec
112
112
  - lib/fluent/plugin/in_kafka.rb
113
113
  - lib/fluent/plugin/in_kafka_group.rb
114
+ - lib/fluent/plugin/in_rdkafka_group.rb
114
115
  - lib/fluent/plugin/kafka_plugin_util.rb
115
116
  - lib/fluent/plugin/kafka_producer_ext.rb
116
117
  - lib/fluent/plugin/out_kafka.rb