fluent-plugin-kafka 0.4.2 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3022fed18061233c956a0b13292cbebb9da8a79f
4
- data.tar.gz: 5e8f4ecf691f72b620b21358a60f8d8994b9e359
3
+ metadata.gz: a09a4933e7d0f7a30094cd98900a03a80dac3c9a
4
+ data.tar.gz: 9421658f52091e37e39e32ae10e7e93132d9394f
5
5
  SHA512:
6
- metadata.gz: fb824c849ef4feb963a1c6675717aadcaea78969ed30285b1d871f9e2a8f62d026489ca660a849a79c4ac8040673ec816b870037647fdde1dc19b7cf772c4f4f
7
- data.tar.gz: e1c6ff1982adcac8ce67f0c7f87f94c6b1b497b92f994a4c5c50ac8d55151c01aa2f798dcf7c873ea5158069b835ea92255bc177da972df83b07c641a67c1aaf
6
+ metadata.gz: e3a72bb6fecbe2dd0204e8bc84234d3d11c44017699eb7ce88ee4b154d04966f939487d85ef90b4811510c490fb733a922ba1f70012dc6fbf9490afebf843ed7
7
+ data.tar.gz: 5b633c21eadd8797a5a6672191a391468a5b8c8e2b4c7968f74b67d7e3edc2978f3db0df2c157b835a7dcd75e899f4b26baf1d62c02ac7cf47f55943dd72dba8
data/ChangeLog CHANGED
@@ -1,4 +1,8 @@
1
- Release 0.4.2 - 2016012/10
1
+ Release 0.5.0 - 2017/01/17
2
+
3
+ * output: Add out_kafka2 plugin with v0.14 API
4
+
5
+ Release 0.4.2 - 2016/12/10
2
6
 
3
7
  * input: Add use_record_time and time_format parameters
4
8
  * Update ruby-kafka dependency to 0.3.16.beta2
@@ -12,7 +12,7 @@ Gem::Specification.new do |gem|
12
12
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
13
13
  gem.name = "fluent-plugin-kafka"
14
14
  gem.require_paths = ["lib"]
15
- gem.version = '0.4.2'
15
+ gem.version = '0.5.0'
16
16
  gem.required_ruby_version = ">= 2.1.0"
17
17
 
18
18
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -1,5 +1,13 @@
1
+ require "set"
2
+ require "kafka/partitioner"
3
+ require "kafka/message_buffer"
4
+ require "kafka/produce_operation"
5
+ require "kafka/pending_message_queue"
6
+ require "kafka/pending_message"
7
+ require "kafka/compressor"
1
8
  require 'kafka/producer'
2
9
 
10
+ # for out_kafka_buffered
3
11
  module Kafka
4
12
  class Producer
5
13
  def produce2(value, key: nil, topic:, partition: nil, partition_key: nil)
@@ -22,3 +30,196 @@ module Kafka
22
30
  end
23
31
  end
24
32
  end
33
+
34
+ # for out_kafka2
35
+ module Kafka
36
+ class Client
37
+ def topic_producer(topic, compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
38
+ compressor = Compressor.new(
39
+ codec_name: compression_codec,
40
+ threshold: compression_threshold,
41
+ instrumenter: @instrumenter,
42
+ )
43
+
44
+ TopicProducer.new(topic,
45
+ cluster: initialize_cluster,
46
+ logger: @logger,
47
+ instrumenter: @instrumenter,
48
+ compressor: compressor,
49
+ ack_timeout: ack_timeout,
50
+ required_acks: required_acks,
51
+ max_retries: max_retries,
52
+ retry_backoff: retry_backoff,
53
+ max_buffer_size: max_buffer_size,
54
+ max_buffer_bytesize: max_buffer_bytesize,
55
+ )
56
+ end
57
+ end
58
+
59
+ class TopicProducer
60
+ def initialize(topic, cluster:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
61
+ @cluster = cluster
62
+ @logger = logger
63
+ @instrumenter = instrumenter
64
+ @required_acks = required_acks == :all ? -1 : required_acks
65
+ @ack_timeout = ack_timeout
66
+ @max_retries = max_retries
67
+ @retry_backoff = retry_backoff
68
+ @max_buffer_size = max_buffer_size
69
+ @max_buffer_bytesize = max_buffer_bytesize
70
+ @compressor = compressor
71
+
72
+ @topic = topic
73
+ @cluster.add_target_topics(Set.new([topic]))
74
+
75
+ # A buffer organized by topic/partition.
76
+ @buffer = MessageBuffer.new
77
+
78
+ # Messages added by `#produce` but not yet assigned a partition.
79
+ @pending_message_queue = PendingMessageQueue.new
80
+ end
81
+
82
+ def produce(value, key, partition, partition_key)
83
+ create_time = Time.now
84
+
85
+ message = PendingMessage.new(
86
+ value,
87
+ key,
88
+ @topic,
89
+ partition,
90
+ partition_key,
91
+ create_time,
92
+ key.to_s.bytesize + value.to_s.bytesize
93
+ )
94
+
95
+ @pending_message_queue.write(message)
96
+
97
+ nil
98
+ end
99
+
100
+ def deliver_messages
101
+ # There's no need to do anything if the buffer is empty.
102
+ return if buffer_size == 0
103
+
104
+ deliver_messages_with_retries
105
+ end
106
+
107
+ # Returns the number of messages currently held in the buffer.
108
+ #
109
+ # @return [Integer] buffer size.
110
+ def buffer_size
111
+ @pending_message_queue.size + @buffer.size
112
+ end
113
+
114
+ def buffer_bytesize
115
+ @pending_message_queue.bytesize + @buffer.bytesize
116
+ end
117
+
118
+ # Deletes all buffered messages.
119
+ #
120
+ # @return [nil]
121
+ def clear_buffer
122
+ @buffer.clear
123
+ @pending_message_queue.clear
124
+ end
125
+
126
+ # Closes all connections to the brokers.
127
+ #
128
+ # @return [nil]
129
+ def shutdown
130
+ @cluster.disconnect
131
+ end
132
+
133
+ private
134
+
135
+ def deliver_messages_with_retries
136
+ attempt = 0
137
+
138
+ #@cluster.add_target_topics(@target_topics)
139
+
140
+ operation = ProduceOperation.new(
141
+ cluster: @cluster,
142
+ buffer: @buffer,
143
+ required_acks: @required_acks,
144
+ ack_timeout: @ack_timeout,
145
+ compressor: @compressor,
146
+ logger: @logger,
147
+ instrumenter: @instrumenter,
148
+ )
149
+
150
+ loop do
151
+ attempt += 1
152
+
153
+ @cluster.refresh_metadata_if_necessary!
154
+
155
+ assign_partitions!
156
+ operation.execute
157
+
158
+ if @required_acks.zero?
159
+ # No response is returned by the brokers, so we can't know which messages
160
+ # have been successfully written. Our only option is to assume that they all
161
+ # have.
162
+ @buffer.clear
163
+ end
164
+
165
+ if buffer_size.zero?
166
+ break
167
+ elsif attempt <= @max_retries
168
+ @logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
169
+
170
+ sleep @retry_backoff
171
+ else
172
+ @logger.error "Failed to send all messages; keeping remaining messages in buffer"
173
+ break
174
+ end
175
+ end
176
+
177
+ unless @pending_message_queue.empty?
178
+ # Mark the cluster as stale in order to force a cluster metadata refresh.
179
+ @cluster.mark_as_stale!
180
+ raise DeliveryFailed, "Failed to assign partitions to #{@pending_message_queue.size} messages"
181
+ end
182
+
183
+ unless @buffer.empty?
184
+ partitions = @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
185
+
186
+ raise DeliveryFailed, "Failed to send messages to #{partitions}"
187
+ end
188
+ end
189
+
190
+ def assign_partitions!
191
+ failed_messages = []
192
+ partition_count = @cluster.partitions_for(@topic).count
193
+
194
+ @pending_message_queue.each do |message|
195
+ partition = message.partition
196
+
197
+ begin
198
+ if partition.nil?
199
+ partition = Partitioner.partition_for_key(partition_count, message)
200
+ end
201
+
202
+ @buffer.write(
203
+ value: message.value,
204
+ key: message.key,
205
+ topic: message.topic,
206
+ partition: partition,
207
+ create_time: message.create_time,
208
+ )
209
+ rescue Kafka::Error => e
210
+ failed_messages << message
211
+ end
212
+ end
213
+
214
+ if failed_messages.any?
215
+ failed_messages.group_by(&:topic).each do |topic, messages|
216
+ @logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
217
+ end
218
+
219
+ @cluster.mark_as_stale!
220
+ end
221
+
222
+ @pending_message_queue.replace(failed_messages)
223
+ end
224
+ end
225
+ end
@@ -0,0 +1,187 @@
1
+ require 'fluent/plugin/output'
2
+ require 'fluent/plugin/kafka_plugin_util'
3
+
4
+ require 'kafka'
5
+ require 'fluent/plugin/kafka_producer_ext'
6
+
7
+ module Fluent::Plugin
8
+ class Fluent::Kafka2Output < Output
9
+ Fluent::Plugin.register_output('kafka2', self)
10
+
11
+ helpers :inject, :formatter
12
+
13
+ config_param :brokers, :array, :value_type => :string, :default => ['localhost:9092'],
14
+ :desc => <<-DESC
15
+ Set brokers directly:
16
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
17
+ DESC
18
+ config_param :default_topic, :string, :default => nil,
19
+ :desc => "Default output topic when record doesn't have topic field"
20
+ config_param :default_message_key, :string, :default => nil
21
+ config_param :default_partition_key, :string, :default => nil
22
+ config_param :default_partition, :integer, :default => nil
23
+ config_param :client_id, :string, :default => 'fluentd'
24
+ config_param :exclude_partition_key, :bool, :default => false,
25
+ :desc => 'Set true to remove partition key from data'
26
+ config_param :exclude_partition, :bool, :default => false,
27
+ :desc => 'Set true to remove partition from data'
28
+ config_param :exclude_message_key, :bool, :default => false,
29
+ :desc => 'Set true to remove partition key from data'
30
+ config_param :exclude_topic_key, :bool, :default => false,
31
+ :desc => 'Set true to remove topic name key from data'
32
+
33
+ config_param :get_kafka_client_log, :bool, :default => false
34
+
35
+ # ruby-kafka producer options
36
+ config_param :max_send_retries, :integer, :default => 2,
37
+ :desc => "Number of times to retry sending of messages to a leader."
38
+ config_param :required_acks, :integer, :default => -1,
39
+ :desc => "The number of acks required per request."
40
+ config_param :ack_timeout, :time, :default => nil,
41
+ :desc => "How long the producer waits for acks."
42
+ config_param :compression_codec, :string, :default => nil,
43
+ :desc => <<-DESC
44
+ The codec the producer uses to compress messages.
45
+ Supported codecs: (gzip|snappy)
46
+ DESC
47
+
48
+ config_section :buffer do
49
+ config_set_default :chunk_keys, ["topic"]
50
+ end
51
+ config_section :format do
52
+ config_set_default :@type, 'json'
53
+ end
54
+
55
+ include Fluent::KafkaPluginUtil::SSLSettings
56
+
57
+ def initialize
58
+ super
59
+
60
+ @kafka = nil
61
+ end
62
+
63
+ def refresh_client(raise_error = true)
64
+ begin
65
+ logger = @get_kafka_client_log ? log : nil
66
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
67
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key))
68
+ log.info "initialized kafka producer: #{@client_id}"
69
+ rescue Exception => e
70
+ if raise_error # During startup, error should be reported to engine and stop its phase for safety.
71
+ raise e
72
+ else
73
+ log.error e
74
+ end
75
+ end
76
+ end
77
+
78
+ def configure(conf)
79
+ super
80
+
81
+ if @brokers.size > 0
82
+ log.info "brokers has been set: #{@brokers}"
83
+ else
84
+ raise Fluent::Config, 'No brokers specified. Need one broker at least.'
85
+ end
86
+
87
+ formatter_conf = conf.elements('format').first
88
+ unless formatter_conf
89
+ raise Fluent::ConfigError, "<format> section is required."
90
+ end
91
+ unless formatter_conf["@type"]
92
+ raise Fluent::ConfigError, "format/@type is required."
93
+ end
94
+ @formatter_proc = setup_formatter(formatter_conf)
95
+
96
+ @producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
97
+ @producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
98
+ @producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
99
+ end
100
+
101
+ def multi_workers_ready?
102
+ true
103
+ end
104
+
105
+ def start
106
+ super
107
+ refresh_client
108
+ end
109
+
110
+ def close
111
+ super
112
+ @kafka.close if @kafka
113
+ end
114
+
115
+ def terminate
116
+ super
117
+ @kafka = nil
118
+ end
119
+
120
+ def setup_formatter(conf)
121
+ type = conf['@type']
122
+ case type
123
+ when 'json'
124
+ begin
125
+ require 'oj'
126
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
127
+ Proc.new { |tag, time, record| Oj.dump(record) }
128
+ rescue LoadError
129
+ require 'yajl'
130
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
131
+ end
132
+ when 'ltsv'
133
+ require 'ltsv'
134
+ Proc.new { |tag, time, record| LTSV.dump(record) }
135
+ else
136
+ @formatter = formatter_create(usage: 'kafka-plugin', conf: conf)
137
+ @formatter.method(:format)
138
+ end
139
+ end
140
+
141
+ # TODO: optimize write performance
142
+ def write(chunk)
143
+ tag = chunk.metadata.tag
144
+ topic = chunk.metadata.variables[:topic] || @default_topic || tag
145
+ producer = @kafka.topic_producer(topic, @producer_opts)
146
+
147
+ messages = 0
148
+ record_buf = nil
149
+
150
+ begin
151
+ chunk.msgpack_each { |time, record|
152
+ begin
153
+ record = inject_values_to_record(tag, time, record)
154
+ record.delete('topic'.freeze) if @exclude_topic_key
155
+ partition_key = (@exclude_partition_key ? record.delete('partition_key'.freeze) : record['partition_key'.freeze]) || @default_partition_key
156
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
157
+ message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
158
+
159
+ record_buf = @formatter_proc.call(tag, time, record)
160
+ rescue StandardError => e
161
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
162
+ next
163
+ end
164
+
165
+ log.on_trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
166
+ messages += 1
167
+
168
+ producer.produce(record_buf, message_key, partition, partition_key)
169
+ }
170
+
171
+ if messages > 0
172
+ log.trace { "#{messages} messages send." }
173
+ producer.deliver_messages
174
+ end
175
+ end
176
+ rescue Exception => e
177
+ log.warn "Send exception occurred: #{e}"
178
+ log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
179
+ # For safety, refresh client and its producers
180
+ refresh_client(false)
181
+ # Raise exception to retry sendind messages
182
+ raise e
183
+ ensure
184
+ producer.shutdown if producer
185
+ end
186
+ end
187
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-01-10 00:00:00.000000000 Z
12
+ date: 2017-01-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -108,6 +108,7 @@ files:
108
108
  - lib/fluent/plugin/kafka_plugin_util.rb
109
109
  - lib/fluent/plugin/kafka_producer_ext.rb
110
110
  - lib/fluent/plugin/out_kafka.rb
111
+ - lib/fluent/plugin/out_kafka2.rb
111
112
  - lib/fluent/plugin/out_kafka_buffered.rb
112
113
  - test/helper.rb
113
114
  - test/plugin/test_out_kafka.rb
@@ -130,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
130
131
  version: '0'
131
132
  requirements: []
132
133
  rubyforge_project:
133
- rubygems_version: 2.5.1
134
+ rubygems_version: 2.6.8
134
135
  signing_key:
135
136
  specification_version: 4
136
137
  summary: Fluentd plugin for Apache Kafka > 0.8