fluent-plugin-kafka-enchanced 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.travis.yml +17 -0
- data/ChangeLog +49 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +221 -0
- data/Rakefile +12 -0
- data/fluent-plugin-kafka.gemspec +23 -0
- data/lib/fluent/plugin/in_kafka.rb +308 -0
- data/lib/fluent/plugin/in_kafka_group.rb +218 -0
- data/lib/fluent/plugin/kafka_plugin_util.rb +22 -0
- data/lib/fluent/plugin/kafka_producer_ext.rb +225 -0
- data/lib/fluent/plugin/out_kafka.rb +200 -0
- data/lib/fluent/plugin/out_kafka2.rb +187 -0
- data/lib/fluent/plugin/out_kafka_buffered.rb +279 -0
- data/test/helper.rb +27 -0
- data/test/plugin/test_out_kafka.rb +52 -0
- metadata +138 -0
@@ -0,0 +1,200 @@
|
|
1
|
+
require 'fluent/output'
|
2
|
+
require 'fluent/plugin/kafka_plugin_util'
|
3
|
+
|
4
|
+
class Fluent::KafkaOutput < Fluent::Output
|
5
|
+
Fluent::Plugin.register_output('kafka', self)
|
6
|
+
|
7
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
8
|
+
:desc => <<-DESC
|
9
|
+
Set brokers directly
|
10
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
11
|
+
Note that you can choose to use either brokers or zookeeper.
|
12
|
+
DESC
|
13
|
+
config_param :zookeeper, :string, :default => nil,
|
14
|
+
:desc => "Set brokers via Zookeeper: <zookeeper_host>:<zookeeper_port>"
|
15
|
+
config_param :zookeeper_path, :string, :default => '/brokers/ids',
|
16
|
+
:desc => "Path in path for Broker id. Default to /brokers/ids"
|
17
|
+
config_param :default_topic, :string, :default => nil,
|
18
|
+
:desc => "Output topic."
|
19
|
+
config_param :default_message_key, :string, :default => nil
|
20
|
+
config_param :default_partition_key, :string, :default => nil
|
21
|
+
config_param :default_partition, :integer, :default => nil
|
22
|
+
config_param :client_id, :string, :default => 'kafka'
|
23
|
+
config_param :output_data_type, :string, :default => 'json',
|
24
|
+
:desc => "Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)"
|
25
|
+
config_param :output_include_tag, :bool, :default => false
|
26
|
+
config_param :output_include_time, :bool, :default => false
|
27
|
+
config_param :exclude_partition_key, :bool, :default => false,
|
28
|
+
:desc => <<-DESC
|
29
|
+
Set true to remove partition key from data
|
30
|
+
DESC
|
31
|
+
config_param :exclude_partition, :bool, :default => false,
|
32
|
+
:desc => <<-DESC
|
33
|
+
Set true to remove partition from data
|
34
|
+
DESC
|
35
|
+
|
36
|
+
config_param :exclude_message_key, :bool, :default => false,
|
37
|
+
:desc => <<-DESC
|
38
|
+
Set true to remove message key from data
|
39
|
+
DESC
|
40
|
+
config_param :exclude_topic_key, :bool, :default => false,
|
41
|
+
:desc => <<-DESC
|
42
|
+
Set true to remove topic name key from data
|
43
|
+
DESC
|
44
|
+
|
45
|
+
# ruby-kafka producer options
|
46
|
+
config_param :max_send_retries, :integer, :default => 2,
|
47
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
48
|
+
config_param :required_acks, :integer, :default => -1,
|
49
|
+
:desc => "The number of acks required per request."
|
50
|
+
config_param :ack_timeout, :integer, :default => nil,
|
51
|
+
:desc => "How long the producer waits for acks."
|
52
|
+
config_param :compression_codec, :string, :default => nil,
|
53
|
+
:desc => "The codec the producer uses to compress messages."
|
54
|
+
|
55
|
+
config_param :time_format, :string, :default => nil
|
56
|
+
|
57
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
58
|
+
|
59
|
+
attr_accessor :output_data_type
|
60
|
+
attr_accessor :field_separator
|
61
|
+
|
62
|
+
unless method_defined?(:log)
|
63
|
+
define_method("log") { $log }
|
64
|
+
end
|
65
|
+
|
66
|
+
def initialize
|
67
|
+
super
|
68
|
+
|
69
|
+
require 'kafka'
|
70
|
+
|
71
|
+
@kafka = nil
|
72
|
+
end
|
73
|
+
|
74
|
+
def refresh_client
|
75
|
+
if @zookeeper
|
76
|
+
@seed_brokers = []
|
77
|
+
z = Zookeeper.new(@zookeeper)
|
78
|
+
z.get_children(:path => @zookeeper_path)[:children].each do |id|
|
79
|
+
broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
|
80
|
+
@seed_brokers.push("#{broker['host']}:#{broker['port']}")
|
81
|
+
end
|
82
|
+
z.close
|
83
|
+
log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
|
84
|
+
end
|
85
|
+
begin
|
86
|
+
if @seed_brokers.length > 0
|
87
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
88
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key))
|
89
|
+
log.info "initialized kafka producer: #{@client_id}"
|
90
|
+
else
|
91
|
+
log.warn "No brokers found on Zookeeper"
|
92
|
+
end
|
93
|
+
rescue Exception => e
|
94
|
+
log.error e
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def configure(conf)
|
99
|
+
super
|
100
|
+
|
101
|
+
if @zookeeper
|
102
|
+
require 'zookeeper'
|
103
|
+
else
|
104
|
+
@seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
|
105
|
+
log.info "brokers has been set directly: #{@seed_brokers}"
|
106
|
+
end
|
107
|
+
|
108
|
+
if conf['ack_timeout_ms']
|
109
|
+
log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
|
110
|
+
@ack_timeout = conf['ack_timeout_ms'].to_i / 1000
|
111
|
+
end
|
112
|
+
|
113
|
+
@f_separator = case @field_separator
|
114
|
+
when /SPACE/i then ' '
|
115
|
+
when /COMMA/i then ','
|
116
|
+
when /SOH/i then "\x01"
|
117
|
+
else "\t"
|
118
|
+
end
|
119
|
+
|
120
|
+
@formatter_proc = setup_formatter(conf)
|
121
|
+
|
122
|
+
@producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
|
123
|
+
@producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
|
124
|
+
@producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
|
125
|
+
end
|
126
|
+
|
127
|
+
def start
|
128
|
+
super
|
129
|
+
refresh_client
|
130
|
+
end
|
131
|
+
|
132
|
+
def shutdown
|
133
|
+
super
|
134
|
+
@kafka = nil
|
135
|
+
end
|
136
|
+
|
137
|
+
def setup_formatter(conf)
|
138
|
+
if @output_data_type == 'json'
|
139
|
+
require 'yajl'
|
140
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
141
|
+
elsif @output_data_type == 'ltsv'
|
142
|
+
require 'ltsv'
|
143
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
144
|
+
elsif @output_data_type == 'msgpack'
|
145
|
+
require 'msgpack'
|
146
|
+
Proc.new { |tag, time, record| record.to_msgpack }
|
147
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
148
|
+
@custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
|
149
|
+
@custom_attributes.unshift('time') if @output_include_time
|
150
|
+
@custom_attributes.unshift('tag') if @output_include_tag
|
151
|
+
Proc.new { |tag, time, record|
|
152
|
+
@custom_attributes.map { |attr|
|
153
|
+
record[attr].nil? ? '' : record[attr].to_s
|
154
|
+
}.join(@f_separator)
|
155
|
+
}
|
156
|
+
else
|
157
|
+
@formatter = Fluent::Plugin.new_formatter(@output_data_type)
|
158
|
+
@formatter.configure(conf)
|
159
|
+
@formatter.method(:format)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def emit(tag, es, chain)
|
164
|
+
begin
|
165
|
+
chain.next
|
166
|
+
|
167
|
+
# out_kafka is mainly for testing so don't need the performance unlike out_kafka_buffered.
|
168
|
+
producer = @kafka.producer(@producer_opts)
|
169
|
+
|
170
|
+
es.each do |time, record|
|
171
|
+
if @output_include_time
|
172
|
+
if @time_format
|
173
|
+
record['time'] = Time.at(time).strftime(@time_format)
|
174
|
+
else
|
175
|
+
record['time'] = time
|
176
|
+
end
|
177
|
+
end
|
178
|
+
record['tag'] = tag if @output_include_tag
|
179
|
+
topic = (@exclude_topic_key ? record.delete('topic') : record['topic']) || @default_topic || tag
|
180
|
+
partition_key = (@exclude_partition_key ? record.delete('partition_key') : record['partition_key']) || @default_partition_key
|
181
|
+
partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
|
182
|
+
message_key = (@exclude_message_key ? record.delete('message_key') : record['message_key']) || @default_message_key
|
183
|
+
|
184
|
+
value = @formatter_proc.call(tag, time, record)
|
185
|
+
|
186
|
+
log.on_trace { log.trace("message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}.") }
|
187
|
+
producer.produce(value, topic: topic, key: message_key, partition: partition, partition_key: partition_key)
|
188
|
+
end
|
189
|
+
|
190
|
+
producer.deliver_messages
|
191
|
+
producer.shutdown
|
192
|
+
rescue Exception => e
|
193
|
+
log.warn "Send exception occurred: #{e}"
|
194
|
+
producer.shutdown if producer
|
195
|
+
refresh_client
|
196
|
+
raise e
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
@@ -0,0 +1,187 @@
|
|
1
|
+
require 'fluent/plugin/output'
|
2
|
+
require 'fluent/plugin/kafka_plugin_util'
|
3
|
+
|
4
|
+
require 'kafka'
|
5
|
+
require 'fluent/plugin/kafka_producer_ext'
|
6
|
+
|
7
|
+
module Fluent::Plugin
|
8
|
+
class Fluent::Kafka2Output < Output
|
9
|
+
Fluent::Plugin.register_output('kafka2', self)
|
10
|
+
|
11
|
+
helpers :inject, :formatter
|
12
|
+
|
13
|
+
config_param :brokers, :array, :value_type => :string, :default => ['localhost:9092'],
|
14
|
+
:desc => <<-DESC
|
15
|
+
Set brokers directly:
|
16
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
17
|
+
DESC
|
18
|
+
config_param :default_topic, :string, :default => nil,
|
19
|
+
:desc => "Default output topic when record doesn't have topic field"
|
20
|
+
config_param :default_message_key, :string, :default => nil
|
21
|
+
config_param :default_partition_key, :string, :default => nil
|
22
|
+
config_param :default_partition, :integer, :default => nil
|
23
|
+
config_param :client_id, :string, :default => 'fluentd'
|
24
|
+
config_param :exclude_partition_key, :bool, :default => false,
|
25
|
+
:desc => 'Set true to remove partition key from data'
|
26
|
+
config_param :exclude_partition, :bool, :default => false,
|
27
|
+
:desc => 'Set true to remove partition from data'
|
28
|
+
config_param :exclude_message_key, :bool, :default => false,
|
29
|
+
:desc => 'Set true to remove partition key from data'
|
30
|
+
config_param :exclude_topic_key, :bool, :default => false,
|
31
|
+
:desc => 'Set true to remove topic name key from data'
|
32
|
+
|
33
|
+
config_param :get_kafka_client_log, :bool, :default => false
|
34
|
+
|
35
|
+
# ruby-kafka producer options
|
36
|
+
config_param :max_send_retries, :integer, :default => 2,
|
37
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
38
|
+
config_param :required_acks, :integer, :default => -1,
|
39
|
+
:desc => "The number of acks required per request."
|
40
|
+
config_param :ack_timeout, :time, :default => nil,
|
41
|
+
:desc => "How long the producer waits for acks."
|
42
|
+
config_param :compression_codec, :string, :default => nil,
|
43
|
+
:desc => <<-DESC
|
44
|
+
The codec the producer uses to compress messages.
|
45
|
+
Supported codecs: (gzip|snappy)
|
46
|
+
DESC
|
47
|
+
|
48
|
+
config_section :buffer do
|
49
|
+
config_set_default :chunk_keys, ["topic"]
|
50
|
+
end
|
51
|
+
config_section :format do
|
52
|
+
config_set_default :@type, 'json'
|
53
|
+
end
|
54
|
+
|
55
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
56
|
+
|
57
|
+
def initialize
|
58
|
+
super
|
59
|
+
|
60
|
+
@kafka = nil
|
61
|
+
end
|
62
|
+
|
63
|
+
def refresh_client(raise_error = true)
|
64
|
+
begin
|
65
|
+
logger = @get_kafka_client_log ? log : nil
|
66
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
67
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key))
|
68
|
+
log.info "initialized kafka producer: #{@client_id}"
|
69
|
+
rescue Exception => e
|
70
|
+
if raise_error # During startup, error should be reported to engine and stop its phase for safety.
|
71
|
+
raise e
|
72
|
+
else
|
73
|
+
log.error e
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def configure(conf)
|
79
|
+
super
|
80
|
+
|
81
|
+
if @brokers.size > 0
|
82
|
+
log.info "brokers has been set: #{@brokers}"
|
83
|
+
else
|
84
|
+
raise Fluent::Config, 'No brokers specified. Need one broker at least.'
|
85
|
+
end
|
86
|
+
|
87
|
+
formatter_conf = conf.elements('format').first
|
88
|
+
unless formatter_conf
|
89
|
+
raise Fluent::ConfigError, "<format> section is required."
|
90
|
+
end
|
91
|
+
unless formatter_conf["@type"]
|
92
|
+
raise Fluent::ConfigError, "format/@type is required."
|
93
|
+
end
|
94
|
+
@formatter_proc = setup_formatter(formatter_conf)
|
95
|
+
|
96
|
+
@producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
|
97
|
+
@producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
|
98
|
+
@producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
|
99
|
+
end
|
100
|
+
|
101
|
+
def multi_workers_ready?
|
102
|
+
true
|
103
|
+
end
|
104
|
+
|
105
|
+
def start
|
106
|
+
super
|
107
|
+
refresh_client
|
108
|
+
end
|
109
|
+
|
110
|
+
def close
|
111
|
+
super
|
112
|
+
@kafka.close if @kafka
|
113
|
+
end
|
114
|
+
|
115
|
+
def terminate
|
116
|
+
super
|
117
|
+
@kafka = nil
|
118
|
+
end
|
119
|
+
|
120
|
+
def setup_formatter(conf)
|
121
|
+
type = conf['@type']
|
122
|
+
case type
|
123
|
+
when 'json'
|
124
|
+
begin
|
125
|
+
require 'oj'
|
126
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
127
|
+
Proc.new { |tag, time, record| Oj.dump(record) }
|
128
|
+
rescue LoadError
|
129
|
+
require 'yajl'
|
130
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
131
|
+
end
|
132
|
+
when 'ltsv'
|
133
|
+
require 'ltsv'
|
134
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
135
|
+
else
|
136
|
+
@formatter = formatter_create(usage: 'kafka-plugin', conf: conf)
|
137
|
+
@formatter.method(:format)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# TODO: optimize write performance
|
142
|
+
def write(chunk)
|
143
|
+
tag = chunk.metadata.tag
|
144
|
+
topic = chunk.metadata.variables[:topic] || @default_topic || tag
|
145
|
+
producer = @kafka.topic_producer(topic, @producer_opts)
|
146
|
+
|
147
|
+
messages = 0
|
148
|
+
record_buf = nil
|
149
|
+
|
150
|
+
begin
|
151
|
+
chunk.msgpack_each { |time, record|
|
152
|
+
begin
|
153
|
+
record = inject_values_to_record(tag, time, record)
|
154
|
+
record.delete('topic'.freeze) if @exclude_topic_key
|
155
|
+
partition_key = (@exclude_partition_key ? record.delete('partition_key'.freeze) : record['partition_key'.freeze]) || @default_partition_key
|
156
|
+
partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
|
157
|
+
message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
|
158
|
+
|
159
|
+
record_buf = @formatter_proc.call(tag, time, record)
|
160
|
+
rescue StandardError => e
|
161
|
+
log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
|
162
|
+
next
|
163
|
+
end
|
164
|
+
|
165
|
+
log.on_trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
|
166
|
+
messages += 1
|
167
|
+
|
168
|
+
producer.produce(record_buf, message_key, partition, partition_key)
|
169
|
+
}
|
170
|
+
|
171
|
+
if messages > 0
|
172
|
+
log.trace { "#{messages} messages send." }
|
173
|
+
producer.deliver_messages
|
174
|
+
end
|
175
|
+
end
|
176
|
+
rescue Exception => e
|
177
|
+
log.warn "Send exception occurred: #{e}"
|
178
|
+
log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
|
179
|
+
# For safety, refresh client and its producers
|
180
|
+
refresh_client(false)
|
181
|
+
# Raise exception to retry sendind messages
|
182
|
+
raise e
|
183
|
+
ensure
|
184
|
+
producer.shutdown if producer
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
@@ -0,0 +1,279 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'fluent/output'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
|
6
|
+
Fluent::Plugin.register_output('kafka_buffered', self)
|
7
|
+
|
8
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
9
|
+
:desc => <<-DESC
|
10
|
+
Set brokers directly:
|
11
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
12
|
+
Brokers: you can choose to use either brokers or zookeeper.
|
13
|
+
DESC
|
14
|
+
config_param :zookeeper, :string, :default => nil,
|
15
|
+
:desc => <<-DESC
|
16
|
+
Set brokers via Zookeeper:
|
17
|
+
<zookeeper_host>:<zookeeper_port>
|
18
|
+
DESC
|
19
|
+
config_param :zookeeper_path, :string, :default => '/brokers/ids',
|
20
|
+
:desc => "Path in path for Broker id. Default to /brokers/ids"
|
21
|
+
config_param :default_topic, :string, :default => nil,
|
22
|
+
:desc => "Output topic"
|
23
|
+
config_param :default_message_key, :string, :default => nil
|
24
|
+
config_param :default_partition_key, :string, :default => nil
|
25
|
+
config_param :default_partition, :integer, :default => nil
|
26
|
+
config_param :client_id, :string, :default => 'kafka'
|
27
|
+
config_param :output_data_type, :string, :default => 'json',
|
28
|
+
:desc => <<-DESC
|
29
|
+
Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
|
30
|
+
DESC
|
31
|
+
config_param :output_include_tag, :bool, :default => false
|
32
|
+
config_param :output_include_time, :bool, :default => false
|
33
|
+
config_param :exclude_partition_key, :bool, :default => false,
|
34
|
+
:desc => <<-DESC
|
35
|
+
Set true to remove partition key from data
|
36
|
+
DESC
|
37
|
+
config_param :exclude_partition, :bool, :default => false,
|
38
|
+
:desc => <<-DESC
|
39
|
+
Set true to remove partition from data
|
40
|
+
DESC
|
41
|
+
config_param :exclude_message_key, :bool, :default => false,
|
42
|
+
:desc => <<-DESC
|
43
|
+
Set true to remove partition key from data
|
44
|
+
DESC
|
45
|
+
config_param :exclude_topic_key, :bool, :default => false,
|
46
|
+
:desc => <<-DESC
|
47
|
+
Set true to remove topic name key from data
|
48
|
+
DESC
|
49
|
+
|
50
|
+
config_param :kafka_agg_max_bytes, :size, :default => 4*1024 #4k
|
51
|
+
config_param :get_kafka_client_log, :bool, :default => false
|
52
|
+
|
53
|
+
# ruby-kafka producer options
|
54
|
+
config_param :max_send_retries, :integer, :default => 2,
|
55
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
56
|
+
config_param :required_acks, :integer, :default => -1,
|
57
|
+
:desc => "The number of acks required per request."
|
58
|
+
config_param :ack_timeout, :time, :default => nil,
|
59
|
+
:desc => "How long the producer waits for acks."
|
60
|
+
config_param :compression_codec, :string, :default => nil,
|
61
|
+
:desc => <<-DESC
|
62
|
+
The codec the producer uses to compress messages.
|
63
|
+
Supported codecs: (gzip|snappy)
|
64
|
+
DESC
|
65
|
+
|
66
|
+
config_param :time_format, :string, :default => nil
|
67
|
+
|
68
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
69
|
+
|
70
|
+
attr_accessor :output_data_type
|
71
|
+
attr_accessor :field_separator
|
72
|
+
|
73
|
+
unless method_defined?(:log)
|
74
|
+
define_method("log") { $log }
|
75
|
+
end
|
76
|
+
|
77
|
+
def initialize
|
78
|
+
super
|
79
|
+
|
80
|
+
require 'kafka'
|
81
|
+
require 'fluent/plugin/kafka_producer_ext'
|
82
|
+
|
83
|
+
@kafka = nil
|
84
|
+
@producers = {}
|
85
|
+
@producers_mutex = Mutex.new
|
86
|
+
end
|
87
|
+
|
88
|
+
def refresh_client(raise_error = true)
|
89
|
+
if @zookeeper
|
90
|
+
@seed_brokers = []
|
91
|
+
z = Zookeeper.new(@zookeeper)
|
92
|
+
z.get_children(:path => @zookeeper_path)[:children].each do |id|
|
93
|
+
broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
|
94
|
+
@seed_brokers.push("#{broker['host']}:#{broker['port']}")
|
95
|
+
end
|
96
|
+
z.close
|
97
|
+
log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
|
98
|
+
end
|
99
|
+
begin
|
100
|
+
if @seed_brokers.length > 0
|
101
|
+
logger = @get_kafka_client_log ? log : nil
|
102
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
103
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key))
|
104
|
+
log.info "initialized kafka producer: #{@client_id}"
|
105
|
+
else
|
106
|
+
log.warn "No brokers found on Zookeeper"
|
107
|
+
end
|
108
|
+
rescue Exception => e
|
109
|
+
if raise_error # During startup, error should be reported to engine and stop its phase for safety.
|
110
|
+
raise e
|
111
|
+
else
|
112
|
+
log.error e
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def configure(conf)
|
118
|
+
super
|
119
|
+
|
120
|
+
if @zookeeper
|
121
|
+
require 'zookeeper'
|
122
|
+
else
|
123
|
+
@seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
|
124
|
+
log.info "brokers has been set directly: #{@seed_brokers}"
|
125
|
+
end
|
126
|
+
|
127
|
+
if conf['ack_timeout_ms']
|
128
|
+
log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
|
129
|
+
@ack_timeout = conf['ack_timeout_ms'].to_i / 1000
|
130
|
+
end
|
131
|
+
|
132
|
+
@f_separator = case @field_separator
|
133
|
+
when /SPACE/i then ' '
|
134
|
+
when /COMMA/i then ','
|
135
|
+
when /SOH/i then "\x01"
|
136
|
+
else "\t"
|
137
|
+
end
|
138
|
+
|
139
|
+
@formatter_proc = setup_formatter(conf)
|
140
|
+
|
141
|
+
@producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
|
142
|
+
@producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
|
143
|
+
@producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
|
144
|
+
end
|
145
|
+
|
146
|
+
def start
|
147
|
+
super
|
148
|
+
refresh_client
|
149
|
+
end
|
150
|
+
|
151
|
+
def shutdown
|
152
|
+
super
|
153
|
+
shutdown_producers
|
154
|
+
@kafka = nil
|
155
|
+
end
|
156
|
+
|
157
|
+
def emit(tag, es, chain)
|
158
|
+
super(tag, es, chain, tag)
|
159
|
+
end
|
160
|
+
|
161
|
+
def format_stream(tag, es)
|
162
|
+
es.to_msgpack_stream
|
163
|
+
end
|
164
|
+
|
165
|
+
def shutdown_producers
|
166
|
+
@producers_mutex.synchronize {
|
167
|
+
@producers.each { |key, producer|
|
168
|
+
producer.shutdown
|
169
|
+
}
|
170
|
+
@producers = {}
|
171
|
+
}
|
172
|
+
end
|
173
|
+
|
174
|
+
def get_producer
|
175
|
+
@producers_mutex.synchronize {
|
176
|
+
producer = @producers[Thread.current.object_id]
|
177
|
+
unless producer
|
178
|
+
producer = @kafka.producer(@producer_opts)
|
179
|
+
@producers[Thread.current.object_id] = producer
|
180
|
+
end
|
181
|
+
producer
|
182
|
+
}
|
183
|
+
end
|
184
|
+
|
185
|
+
def setup_formatter(conf)
|
186
|
+
if @output_data_type == 'json'
|
187
|
+
require 'yajl'
|
188
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
189
|
+
elsif @output_data_type == 'ltsv'
|
190
|
+
require 'ltsv'
|
191
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
192
|
+
elsif @output_data_type == 'msgpack'
|
193
|
+
require 'msgpack'
|
194
|
+
Proc.new { |tag, time, record| record.to_msgpack }
|
195
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
196
|
+
@custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
|
197
|
+
@custom_attributes.unshift('time') if @output_include_time
|
198
|
+
@custom_attributes.unshift('tag') if @output_include_tag
|
199
|
+
Proc.new { |tag, time, record|
|
200
|
+
@custom_attributes.map { |attr|
|
201
|
+
record[attr].nil? ? '' : record[attr].to_s
|
202
|
+
}.join(@f_separator)
|
203
|
+
}
|
204
|
+
else
|
205
|
+
@formatter = Fluent::Plugin.new_formatter(@output_data_type)
|
206
|
+
@formatter.configure(conf)
|
207
|
+
@formatter.method(:format)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def write(chunk)
|
212
|
+
tag = chunk.key
|
213
|
+
def_topic = @default_topic || tag
|
214
|
+
producer = get_producer
|
215
|
+
|
216
|
+
records_by_topic = {}
|
217
|
+
bytes_by_topic = {}
|
218
|
+
messages = 0
|
219
|
+
messages_bytes = 0
|
220
|
+
record_buf = nil
|
221
|
+
record_buf_bytes = nil
|
222
|
+
|
223
|
+
begin
|
224
|
+
chunk.msgpack_each { |time, record|
|
225
|
+
begin
|
226
|
+
if @output_include_time
|
227
|
+
if @time_format
|
228
|
+
record['time'.freeze] = Time.at(time).strftime(@time_format)
|
229
|
+
else
|
230
|
+
record['time'.freeze] = time
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
record['tag'] = tag if @output_include_tag
|
235
|
+
topic = (@exclude_topic_key ? record.delete('topic'.freeze) : record['topic'.freeze]) || def_topic
|
236
|
+
partition_key = (@exclude_partition_key ? record.delete('partition_key'.freeze) : record['partition_key'.freeze]) || @default_partition_key
|
237
|
+
partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
|
238
|
+
message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
|
239
|
+
|
240
|
+
records_by_topic[topic] ||= 0
|
241
|
+
bytes_by_topic[topic] ||= 0
|
242
|
+
|
243
|
+
record_buf = @formatter_proc.call(tag, time, record)
|
244
|
+
record_buf_bytes = record_buf.bytesize
|
245
|
+
rescue StandardError => e
|
246
|
+
log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
|
247
|
+
next
|
248
|
+
end
|
249
|
+
|
250
|
+
if (messages > 0) and (messages_bytes + record_buf_bytes > @kafka_agg_max_bytes)
|
251
|
+
log.on_trace { log.trace("#{messages} messages send.") }
|
252
|
+
producer.deliver_messages
|
253
|
+
messages = 0
|
254
|
+
messages_bytes = 0
|
255
|
+
end
|
256
|
+
log.on_trace { log.trace("message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}.") }
|
257
|
+
messages += 1
|
258
|
+
producer.produce2(record_buf, topic: topic, key: message_key, partition_key: partition_key, partition: partition)
|
259
|
+
messages_bytes += record_buf_bytes
|
260
|
+
|
261
|
+
records_by_topic[topic] += 1
|
262
|
+
bytes_by_topic[topic] += record_buf_bytes
|
263
|
+
}
|
264
|
+
if messages > 0
|
265
|
+
log.trace { "#{messages} messages send." }
|
266
|
+
producer.deliver_messages
|
267
|
+
end
|
268
|
+
log.debug { "(records|bytes) (#{records_by_topic}|#{bytes_by_topic})" }
|
269
|
+
end
|
270
|
+
rescue Exception => e
|
271
|
+
log.warn "Send exception occurred: #{e}"
|
272
|
+
log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
|
273
|
+
# For safety, refresh client and its producers
|
274
|
+
shutdown_producers
|
275
|
+
refresh_client(false)
|
276
|
+
# Raise exception to retry sendind messages
|
277
|
+
raise e
|
278
|
+
end
|
279
|
+
end
|