sk-fluent-plugin-kafka 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.travis.yml +25 -0
- data/ChangeLog +161 -0
- data/Gemfile +4 -0
- data/LICENSE +14 -0
- data/README.md +319 -0
- data/Rakefile +12 -0
- data/fluent-plugin-kafka.gemspec +24 -0
- data/lib/fluent/plugin/in_kafka.rb +341 -0
- data/lib/fluent/plugin/in_kafka_group.rb +281 -0
- data/lib/fluent/plugin/kafka_plugin_util.rb +52 -0
- data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
- data/lib/fluent/plugin/out_kafka.rb +254 -0
- data/lib/fluent/plugin/out_kafka2.rb +243 -0
- data/lib/fluent/plugin/out_kafka_buffered.rb +361 -0
- data/lib/fluent/plugin/out_rdkafka.rb +301 -0
- data/test/helper.rb +27 -0
- data/test/plugin/test_out_kafka.rb +58 -0
- metadata +147 -0
@@ -0,0 +1,243 @@
|
|
1
|
+
require 'fluent/plugin/output'
|
2
|
+
require 'fluent/plugin/kafka_plugin_util'
|
3
|
+
|
4
|
+
require 'kafka'
|
5
|
+
require 'fluent/plugin/kafka_producer_ext'
|
6
|
+
|
7
|
+
module Fluent::Plugin
|
8
|
+
class Fluent::Kafka2Output < Output
|
9
|
+
Fluent::Plugin.register_output('kafka2', self)
|
10
|
+
|
11
|
+
helpers :inject, :formatter, :event_emitter
|
12
|
+
|
13
|
+
config_param :brokers, :array, :value_type => :string, :default => ['localhost:9092'],
|
14
|
+
:desc => <<-DESC
|
15
|
+
Set brokers directly:
|
16
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
17
|
+
DESC
|
18
|
+
config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
|
19
|
+
config_param :default_topic, :string, :default => nil,
|
20
|
+
:desc => "Default output topic when record doesn't have topic field"
|
21
|
+
config_param :message_key_key, :string, :default => 'message_key', :desc => "Field for kafka message key"
|
22
|
+
config_param :default_message_key, :string, :default => nil
|
23
|
+
config_param :partition_key_key, :string, :default => 'partition_key', :desc => "Field for kafka partition key"
|
24
|
+
config_param :default_partition_key, :string, :default => nil
|
25
|
+
config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
|
26
|
+
config_param :default_partition, :integer, :default => nil
|
27
|
+
config_param :client_id, :string, :default => 'fluentd'
|
28
|
+
config_param :sasl_over_ssl, :bool, :default => true,
|
29
|
+
:desc => <<-DESC
|
30
|
+
Set to false to prevent SSL strict mode when using SASL authentication
|
31
|
+
DESC
|
32
|
+
config_param :exclude_partition_key, :bool, :default => false,
|
33
|
+
:desc => 'Set true to remove partition key from data'
|
34
|
+
config_param :exclude_partition, :bool, :default => false,
|
35
|
+
:desc => 'Set true to remove partition from data'
|
36
|
+
config_param :exclude_message_key, :bool, :default => false,
|
37
|
+
:desc => 'Set true to remove partition key from data'
|
38
|
+
config_param :exclude_topic_key, :bool, :default => false,
|
39
|
+
:desc => 'Set true to remove topic name key from data'
|
40
|
+
|
41
|
+
config_param :get_kafka_client_log, :bool, :default => false
|
42
|
+
|
43
|
+
config_param :ignore_exceptions, :array, :default => [], value_type: :string, :desc => "Ignorable exception list"
|
44
|
+
config_param :exception_backup, :bool, :default => true, :desc => "Chunk backup flag when ignore exception occured"
|
45
|
+
|
46
|
+
# ruby-kafka producer options
|
47
|
+
config_param :max_send_retries, :integer, :default => 2,
|
48
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
49
|
+
config_param :required_acks, :integer, :default => -1,
|
50
|
+
:desc => "The number of acks required per request."
|
51
|
+
config_param :ack_timeout, :time, :default => nil,
|
52
|
+
:desc => "How long the producer waits for acks."
|
53
|
+
config_param :compression_codec, :string, :default => nil,
|
54
|
+
:desc => <<-DESC
|
55
|
+
The codec the producer uses to compress messages.
|
56
|
+
Supported codecs: (gzip|snappy)
|
57
|
+
DESC
|
58
|
+
|
59
|
+
config_param :active_support_notification_regex, :string, :default => nil,
|
60
|
+
:desc => <<-DESC
|
61
|
+
Add a regular expression to capture ActiveSupport notifications from the Kafka client
|
62
|
+
requires activesupport gem - records will be generated under fluent_kafka_stats.**
|
63
|
+
DESC
|
64
|
+
|
65
|
+
config_section :buffer do
|
66
|
+
config_set_default :chunk_keys, ["topic"]
|
67
|
+
end
|
68
|
+
config_section :format do
|
69
|
+
config_set_default :@type, 'json'
|
70
|
+
end
|
71
|
+
|
72
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
73
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
74
|
+
|
75
|
+
def initialize
|
76
|
+
super
|
77
|
+
|
78
|
+
@kafka = nil
|
79
|
+
end
|
80
|
+
|
81
|
+
def refresh_client(raise_error = true)
|
82
|
+
begin
|
83
|
+
logger = @get_kafka_client_log ? log : nil
|
84
|
+
if @scram_mechanism != nil && @username != nil && @password != nil
|
85
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
86
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
|
87
|
+
sasl_scram_username: @username, sasl_scram_password: @password, sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
|
88
|
+
elsif @username != nil && @password != nil
|
89
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
90
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
|
91
|
+
sasl_plain_username: @username, sasl_plain_password: @password)
|
92
|
+
else
|
93
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
94
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
|
95
|
+
sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
|
96
|
+
end
|
97
|
+
log.info "initialized kafka producer: #{@client_id}"
|
98
|
+
rescue Exception => e
|
99
|
+
if raise_error # During startup, error should be reported to engine and stop its phase for safety.
|
100
|
+
raise e
|
101
|
+
else
|
102
|
+
log.error e
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def configure(conf)
|
108
|
+
super
|
109
|
+
|
110
|
+
if @brokers.size > 0
|
111
|
+
@seed_brokers = @brokers
|
112
|
+
log.info "brokers has been set: #{@seed_brokers}"
|
113
|
+
else
|
114
|
+
raise Fluent::Config, 'No brokers specified. Need one broker at least.'
|
115
|
+
end
|
116
|
+
|
117
|
+
formatter_conf = conf.elements('format').first
|
118
|
+
unless formatter_conf
|
119
|
+
raise Fluent::ConfigError, "<format> section is required."
|
120
|
+
end
|
121
|
+
unless formatter_conf["@type"]
|
122
|
+
raise Fluent::ConfigError, "format/@type is required."
|
123
|
+
end
|
124
|
+
@formatter_proc = setup_formatter(formatter_conf)
|
125
|
+
|
126
|
+
if @default_topic.nil?
|
127
|
+
if @chunk_keys.include?('topic') && !@chunk_key_tag
|
128
|
+
log.warn "Use 'topic' field of event record for topic but no fallback. Recommend to set default_topic or set 'tag' in buffer chunk keys like <buffer topic,tag>"
|
129
|
+
end
|
130
|
+
else
|
131
|
+
if @chunk_key_tag
|
132
|
+
log.warn "default_topic is set. Fluentd's event tag is not used for topic"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
@producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
|
137
|
+
@producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
|
138
|
+
@producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
|
139
|
+
if @active_support_notification_regex
|
140
|
+
require 'active_support/notifications'
|
141
|
+
require 'active_support/core_ext/hash/keys'
|
142
|
+
ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
|
143
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
144
|
+
message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
|
145
|
+
@router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
@topic_key_sym = @topic_key.to_sym
|
150
|
+
end
|
151
|
+
|
152
|
+
def multi_workers_ready?
|
153
|
+
true
|
154
|
+
end
|
155
|
+
|
156
|
+
def start
|
157
|
+
super
|
158
|
+
refresh_client
|
159
|
+
end
|
160
|
+
|
161
|
+
def close
|
162
|
+
super
|
163
|
+
@kafka.close if @kafka
|
164
|
+
end
|
165
|
+
|
166
|
+
def terminate
|
167
|
+
super
|
168
|
+
@kafka = nil
|
169
|
+
end
|
170
|
+
|
171
|
+
def setup_formatter(conf)
|
172
|
+
type = conf['@type']
|
173
|
+
case type
|
174
|
+
when 'json'
|
175
|
+
begin
|
176
|
+
require 'oj'
|
177
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
178
|
+
Proc.new { |tag, time, record| Oj.dump(record) }
|
179
|
+
rescue LoadError
|
180
|
+
require 'yajl'
|
181
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
182
|
+
end
|
183
|
+
when 'ltsv'
|
184
|
+
require 'ltsv'
|
185
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
186
|
+
else
|
187
|
+
@formatter = formatter_create(usage: 'kafka-plugin', conf: conf)
|
188
|
+
@formatter.method(:format)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# TODO: optimize write performance
|
193
|
+
def write(chunk)
|
194
|
+
tag = chunk.metadata.tag
|
195
|
+
topic = chunk.metadata.variables[@topic_key_sym] || @default_topic || tag
|
196
|
+
producer = @kafka.topic_producer(topic, @producer_opts)
|
197
|
+
|
198
|
+
messages = 0
|
199
|
+
record_buf = nil
|
200
|
+
|
201
|
+
begin
|
202
|
+
chunk.msgpack_each { |time, record|
|
203
|
+
begin
|
204
|
+
record = inject_values_to_record(tag, time, record)
|
205
|
+
record.delete(@topic_key) if @exclude_topic_key
|
206
|
+
partition_key = (@exclude_partition_key ? record.delete(@partition_key_key) : record[@partition_key_key]) || @default_partition_key
|
207
|
+
partition = (@exclude_partition ? record.delete(@partition_key) : record[@partition_key]) || @default_partition
|
208
|
+
message_key = (@exclude_message_key ? record.delete(@message_key_key) : record[@message_key_key]) || @default_message_key
|
209
|
+
|
210
|
+
record_buf = @formatter_proc.call(tag, time, record)
|
211
|
+
rescue StandardError => e
|
212
|
+
log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
|
213
|
+
next
|
214
|
+
end
|
215
|
+
|
216
|
+
log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
|
217
|
+
messages += 1
|
218
|
+
|
219
|
+
producer.produce(record_buf, key: message_key, partition_key: partition_key, partition: partition)
|
220
|
+
}
|
221
|
+
|
222
|
+
if messages > 0
|
223
|
+
log.debug { "#{messages} messages send." }
|
224
|
+
producer.deliver_messages
|
225
|
+
end
|
226
|
+
end
|
227
|
+
rescue Exception => e
|
228
|
+
ignore = @ignore_exceptions.include?(e.class.name)
|
229
|
+
|
230
|
+
log.warn "Send exception occurred: #{e}"
|
231
|
+
log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
|
232
|
+
log.warn "Exception ignored in tag : #{tag}" if ignore
|
233
|
+
# For safety, refresh client and its producers
|
234
|
+
refresh_client(false)
|
235
|
+
# raise UnrecoverableError for backup ignored exception chunk
|
236
|
+
raise Fluent::UnrecoverableError if ignore && exception_backup
|
237
|
+
# Raise exception to retry sendind messages
|
238
|
+
raise e unless ignore
|
239
|
+
ensure
|
240
|
+
producer.shutdown if producer
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
@@ -0,0 +1,361 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'fluent/output'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
|
6
|
+
Fluent::Plugin.register_output('kafka_buffered', self)
|
7
|
+
|
8
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
9
|
+
:desc => <<-DESC
|
10
|
+
Set brokers directly:
|
11
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
12
|
+
Brokers: you can choose to use either brokers or zookeeper.
|
13
|
+
DESC
|
14
|
+
config_param :zookeeper, :string, :default => nil,
|
15
|
+
:desc => <<-DESC
|
16
|
+
Set brokers via Zookeeper:
|
17
|
+
<zookeeper_host>:<zookeeper_port>
|
18
|
+
DESC
|
19
|
+
config_param :zookeeper_path, :string, :default => '/brokers/ids', :desc => "Path in path for Broker id. Default to /brokers/ids"
|
20
|
+
|
21
|
+
config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
|
22
|
+
config_param :default_topic, :string, :default => nil, :desc => "Default output topic when record doesn't have topic field"
|
23
|
+
config_param :message_key_key, :string, :default => 'message_key', :desc => "Field for kafka message key"
|
24
|
+
config_param :default_message_key, :string, :default => nil
|
25
|
+
config_param :partition_key_key, :string, :default => 'partition_key', :desc => "Field for kafka partition key"
|
26
|
+
config_param :default_partition_key, :string, :default => nil
|
27
|
+
config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
|
28
|
+
config_param :default_partition, :integer, :default => nil
|
29
|
+
config_param :client_id, :string, :default => 'kafka'
|
30
|
+
config_param :sasl_over_ssl, :bool, :default => true,
|
31
|
+
:desc => <<-DESC
|
32
|
+
Set to false to prevent SSL strict mode when using SASL authentication
|
33
|
+
DESC
|
34
|
+
config_param :output_data_type, :string, :default => 'json',
|
35
|
+
:desc => <<-DESC
|
36
|
+
Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
|
37
|
+
DESC
|
38
|
+
config_param :output_include_tag, :bool, :default => false
|
39
|
+
config_param :output_include_time, :bool, :default => false
|
40
|
+
config_param :exclude_partition_key, :bool, :default => false,
|
41
|
+
:desc => <<-DESC
|
42
|
+
Set true to remove partition key from data
|
43
|
+
DESC
|
44
|
+
config_param :exclude_partition, :bool, :default => false,
|
45
|
+
:desc => <<-DESC
|
46
|
+
Set true to remove partition from data
|
47
|
+
DESC
|
48
|
+
config_param :exclude_message_key, :bool, :default => false,
|
49
|
+
:desc => <<-DESC
|
50
|
+
Set true to remove message key from data
|
51
|
+
DESC
|
52
|
+
config_param :exclude_topic_key, :bool, :default => false,
|
53
|
+
:desc => <<-DESC
|
54
|
+
Set true to remove topic name key from data
|
55
|
+
DESC
|
56
|
+
|
57
|
+
config_param :kafka_agg_max_bytes, :size, :default => 4*1024 #4k
|
58
|
+
config_param :kafka_agg_max_messages, :integer, :default => nil
|
59
|
+
config_param :get_kafka_client_log, :bool, :default => false
|
60
|
+
|
61
|
+
# ruby-kafka producer options
|
62
|
+
config_param :max_send_retries, :integer, :default => 2,
|
63
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
64
|
+
config_param :required_acks, :integer, :default => -1,
|
65
|
+
:desc => "The number of acks required per request."
|
66
|
+
config_param :ack_timeout, :time, :default => nil,
|
67
|
+
:desc => "How long the producer waits for acks."
|
68
|
+
config_param :compression_codec, :string, :default => nil,
|
69
|
+
:desc => <<-DESC
|
70
|
+
The codec the producer uses to compress messages.
|
71
|
+
Supported codecs: (gzip|snappy)
|
72
|
+
DESC
|
73
|
+
config_param :max_send_limit_bytes, :size, :default => nil
|
74
|
+
config_param :discard_kafka_delivery_failed, :bool, :default => false
|
75
|
+
|
76
|
+
config_param :time_format, :string, :default => nil
|
77
|
+
|
78
|
+
config_param :active_support_notification_regex, :string, :default => nil,
|
79
|
+
:desc => <<-DESC
|
80
|
+
Add a regular expression to capture ActiveSupport notifications from the Kafka client
|
81
|
+
requires activesupport gem - records will be generated under fluent_kafka_stats.**
|
82
|
+
DESC
|
83
|
+
|
84
|
+
config_param :monitoring_list, :array, :default => [],
|
85
|
+
:desc => "library to be used to monitor. statsd and datadog are supported"
|
86
|
+
|
87
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
88
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
89
|
+
|
90
|
+
attr_accessor :output_data_type
|
91
|
+
attr_accessor :field_separator
|
92
|
+
|
93
|
+
unless method_defined?(:log)
|
94
|
+
define_method("log") { $log }
|
95
|
+
end
|
96
|
+
|
97
|
+
def initialize
|
98
|
+
super
|
99
|
+
|
100
|
+
require 'kafka'
|
101
|
+
require 'fluent/plugin/kafka_producer_ext'
|
102
|
+
|
103
|
+
@kafka = nil
|
104
|
+
@producers = {}
|
105
|
+
@producers_mutex = Mutex.new
|
106
|
+
end
|
107
|
+
|
108
|
+
def multi_workers_ready?
|
109
|
+
true
|
110
|
+
end
|
111
|
+
|
112
|
+
def refresh_client(raise_error = true)
|
113
|
+
if @zookeeper
|
114
|
+
@seed_brokers = []
|
115
|
+
z = Zookeeper.new(@zookeeper)
|
116
|
+
z.get_children(:path => @zookeeper_path)[:children].each do |id|
|
117
|
+
broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
|
118
|
+
if @ssl_client_cert
|
119
|
+
@seed_brokers.push(pickup_ssl_endpoint(broker))
|
120
|
+
else
|
121
|
+
@seed_brokers.push("#{broker['host']}:#{broker['port']}")
|
122
|
+
end
|
123
|
+
end
|
124
|
+
z.close
|
125
|
+
log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
|
126
|
+
end
|
127
|
+
begin
|
128
|
+
if @seed_brokers.length > 0
|
129
|
+
logger = @get_kafka_client_log ? log : nil
|
130
|
+
if @scram_mechanism != nil && @username != nil && @password != nil
|
131
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
132
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
|
133
|
+
sasl_scram_username: @username, sasl_scram_password: @password, sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
|
134
|
+
elsif @username != nil && @password != nil
|
135
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
136
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
|
137
|
+
sasl_plain_username: @username, sasl_plain_password: @password)
|
138
|
+
else
|
139
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
140
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
|
141
|
+
sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
|
142
|
+
end
|
143
|
+
log.info "initialized kafka producer: #{@client_id}"
|
144
|
+
else
|
145
|
+
log.warn "No brokers found on Zookeeper"
|
146
|
+
end
|
147
|
+
rescue Exception => e
|
148
|
+
if raise_error # During startup, error should be reported to engine and stop its phase for safety.
|
149
|
+
raise e
|
150
|
+
else
|
151
|
+
log.error e
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def configure(conf)
|
157
|
+
super
|
158
|
+
|
159
|
+
if @zookeeper
|
160
|
+
require 'zookeeper'
|
161
|
+
else
|
162
|
+
@seed_brokers = @brokers.split(",")
|
163
|
+
log.info "brokers has been set directly: #{@seed_brokers}"
|
164
|
+
end
|
165
|
+
|
166
|
+
if conf['ack_timeout_ms']
|
167
|
+
log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
|
168
|
+
@ack_timeout = conf['ack_timeout_ms'].to_i / 1000
|
169
|
+
end
|
170
|
+
|
171
|
+
@f_separator = case @field_separator
|
172
|
+
when /SPACE/i then ' '
|
173
|
+
when /COMMA/i then ','
|
174
|
+
when /SOH/i then "\x01"
|
175
|
+
else "\t"
|
176
|
+
end
|
177
|
+
|
178
|
+
@formatter_proc = setup_formatter(conf)
|
179
|
+
|
180
|
+
@producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
|
181
|
+
@producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
|
182
|
+
@producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
|
183
|
+
|
184
|
+
if @discard_kafka_delivery_failed
|
185
|
+
log.warn "'discard_kafka_delivery_failed' option discards events which cause delivery failure, e.g. invalid topic or something."
|
186
|
+
log.warn "If this is unexpected, you need to check your configuration or data."
|
187
|
+
end
|
188
|
+
|
189
|
+
if @active_support_notification_regex
|
190
|
+
require 'active_support/notifications'
|
191
|
+
require 'active_support/core_ext/hash/keys'
|
192
|
+
ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
|
193
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
194
|
+
message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
|
195
|
+
@router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
@monitoring_list.each { |m|
|
200
|
+
require "kafka/#{m}"
|
201
|
+
log.info "#{m} monitoring started"
|
202
|
+
}
|
203
|
+
end
|
204
|
+
|
205
|
+
def start
|
206
|
+
super
|
207
|
+
refresh_client
|
208
|
+
end
|
209
|
+
|
210
|
+
def shutdown
|
211
|
+
super
|
212
|
+
shutdown_producers
|
213
|
+
@kafka = nil
|
214
|
+
end
|
215
|
+
|
216
|
+
def emit(tag, es, chain)
|
217
|
+
super(tag, es, chain, tag)
|
218
|
+
end
|
219
|
+
|
220
|
+
def format_stream(tag, es)
|
221
|
+
es.to_msgpack_stream
|
222
|
+
end
|
223
|
+
|
224
|
+
def shutdown_producers
|
225
|
+
@producers_mutex.synchronize {
|
226
|
+
@producers.each { |key, producer|
|
227
|
+
producer.shutdown
|
228
|
+
}
|
229
|
+
@producers = {}
|
230
|
+
}
|
231
|
+
end
|
232
|
+
|
233
|
+
def get_producer
|
234
|
+
@producers_mutex.synchronize {
|
235
|
+
producer = @producers[Thread.current.object_id]
|
236
|
+
unless producer
|
237
|
+
producer = @kafka.producer(@producer_opts)
|
238
|
+
@producers[Thread.current.object_id] = producer
|
239
|
+
end
|
240
|
+
producer
|
241
|
+
}
|
242
|
+
end
|
243
|
+
|
244
|
+
def setup_formatter(conf)
|
245
|
+
if @output_data_type == 'json'
|
246
|
+
begin
|
247
|
+
require 'oj'
|
248
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
249
|
+
Proc.new { |tag, time, record| Oj.dump(record) }
|
250
|
+
rescue LoadError
|
251
|
+
require 'yajl'
|
252
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
253
|
+
end
|
254
|
+
elsif @output_data_type == 'ltsv'
|
255
|
+
require 'ltsv'
|
256
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
257
|
+
elsif @output_data_type == 'msgpack'
|
258
|
+
require 'msgpack'
|
259
|
+
Proc.new { |tag, time, record| record.to_msgpack }
|
260
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
261
|
+
@custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
|
262
|
+
@custom_attributes.unshift('time') if @output_include_time
|
263
|
+
@custom_attributes.unshift('tag') if @output_include_tag
|
264
|
+
Proc.new { |tag, time, record|
|
265
|
+
@custom_attributes.map { |attr|
|
266
|
+
record[attr].nil? ? '' : record[attr].to_s
|
267
|
+
}.join(@f_separator)
|
268
|
+
}
|
269
|
+
else
|
270
|
+
@formatter = Fluent::Plugin.new_formatter(@output_data_type)
|
271
|
+
@formatter.configure(conf)
|
272
|
+
@formatter.method(:format)
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
def deliver_messages(producer, tag)
|
277
|
+
if @discard_kafka_delivery_failed
|
278
|
+
begin
|
279
|
+
producer.deliver_messages
|
280
|
+
rescue Kafka::DeliveryFailed => e
|
281
|
+
log.warn "DeliveryFailed occurred. Discard broken event:", :error => e.to_s, :error_class => e.class.to_s, :tag => tag
|
282
|
+
producer.clear_buffer
|
283
|
+
end
|
284
|
+
else
|
285
|
+
producer.deliver_messages
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
def write(chunk)
|
290
|
+
tag = chunk.key
|
291
|
+
def_topic = @default_topic || tag
|
292
|
+
producer = get_producer
|
293
|
+
|
294
|
+
records_by_topic = {}
|
295
|
+
bytes_by_topic = {}
|
296
|
+
messages = 0
|
297
|
+
messages_bytes = 0
|
298
|
+
record_buf = nil
|
299
|
+
record_buf_bytes = nil
|
300
|
+
|
301
|
+
begin
|
302
|
+
chunk.msgpack_each { |time, record|
|
303
|
+
begin
|
304
|
+
if @output_include_time
|
305
|
+
if @time_format
|
306
|
+
record['time'.freeze] = Time.at(time).strftime(@time_format)
|
307
|
+
else
|
308
|
+
record['time'.freeze] = time
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
record['tag'] = tag if @output_include_tag
|
313
|
+
topic = (@exclude_topic_key ? record.delete(@topic_key) : record[@topic_key]) || def_topic
|
314
|
+
partition_key = (@exclude_partition_key ? record.delete(@partition_key_key) : record[@partition_key_key]) || @default_partition_key
|
315
|
+
partition = (@exclude_partition ? record.delete(@partition) : record[@partition]) || @default_partition
|
316
|
+
message_key = (@exclude_message_key ? record.delete(@message_key_key) : record[@message_key_key]) || @default_message_key
|
317
|
+
|
318
|
+
records_by_topic[topic] ||= 0
|
319
|
+
bytes_by_topic[topic] ||= 0
|
320
|
+
|
321
|
+
record_buf = @formatter_proc.call(tag, time, record)
|
322
|
+
record_buf_bytes = record_buf.bytesize
|
323
|
+
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
324
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
|
325
|
+
next
|
326
|
+
end
|
327
|
+
rescue StandardError => e
|
328
|
+
log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
|
329
|
+
next
|
330
|
+
end
|
331
|
+
|
332
|
+
if (messages > 0) and (messages_bytes + record_buf_bytes > @kafka_agg_max_bytes) or (@kafka_agg_max_messages && messages >= @kafka_agg_max_messages)
|
333
|
+
log.debug { "#{messages} messages send because reaches the limit of batch transmission." }
|
334
|
+
deliver_messages(producer, tag)
|
335
|
+
messages = 0
|
336
|
+
messages_bytes = 0
|
337
|
+
end
|
338
|
+
log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
|
339
|
+
messages += 1
|
340
|
+
producer.produce_for_buffered(record_buf, topic: topic, key: message_key, partition_key: partition_key, partition: partition)
|
341
|
+
messages_bytes += record_buf_bytes
|
342
|
+
|
343
|
+
records_by_topic[topic] += 1
|
344
|
+
bytes_by_topic[topic] += record_buf_bytes
|
345
|
+
}
|
346
|
+
if messages > 0
|
347
|
+
log.debug { "#{messages} messages send." }
|
348
|
+
deliver_messages(producer, tag)
|
349
|
+
end
|
350
|
+
log.debug { "(records|bytes) (#{records_by_topic}|#{bytes_by_topic})" }
|
351
|
+
end
|
352
|
+
rescue Exception => e
|
353
|
+
log.warn "Send exception occurred: #{e}"
|
354
|
+
log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
|
355
|
+
# For safety, refresh client and its producers
|
356
|
+
shutdown_producers
|
357
|
+
refresh_client(false)
|
358
|
+
# Raise exception to retry sendind messages
|
359
|
+
raise e
|
360
|
+
end
|
361
|
+
end
|