fluent-plugin-kafka 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3022fed18061233c956a0b13292cbebb9da8a79f
4
- data.tar.gz: 5e8f4ecf691f72b620b21358a60f8d8994b9e359
3
+ metadata.gz: a09a4933e7d0f7a30094cd98900a03a80dac3c9a
4
+ data.tar.gz: 9421658f52091e37e39e32ae10e7e93132d9394f
5
5
  SHA512:
6
- metadata.gz: fb824c849ef4feb963a1c6675717aadcaea78969ed30285b1d871f9e2a8f62d026489ca660a849a79c4ac8040673ec816b870037647fdde1dc19b7cf772c4f4f
7
- data.tar.gz: e1c6ff1982adcac8ce67f0c7f87f94c6b1b497b92f994a4c5c50ac8d55151c01aa2f798dcf7c873ea5158069b835ea92255bc177da972df83b07c641a67c1aaf
6
+ metadata.gz: e3a72bb6fecbe2dd0204e8bc84234d3d11c44017699eb7ce88ee4b154d04966f939487d85ef90b4811510c490fb733a922ba1f70012dc6fbf9490afebf843ed7
7
+ data.tar.gz: 5b633c21eadd8797a5a6672191a391468a5b8c8e2b4c7968f74b67d7e3edc2978f3db0df2c157b835a7dcd75e899f4b26baf1d62c02ac7cf47f55943dd72dba8
data/ChangeLog CHANGED
@@ -1,4 +1,8 @@
1
- Release 0.4.2 - 2016012/10
1
+ Release 0.5.0 - 2017/01/17
2
+
3
+ * output: Add out_kafka2 plugin with v0.14 API
4
+
5
+ Release 0.4.2 - 2016/12/10
2
6
 
3
7
  * input: Add use_record_time and time_format parameters
4
8
  * Update ruby-kafka dependency to 0.3.16.beta2
@@ -12,7 +12,7 @@ Gem::Specification.new do |gem|
12
12
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
13
13
  gem.name = "fluent-plugin-kafka"
14
14
  gem.require_paths = ["lib"]
15
- gem.version = '0.4.2'
15
+ gem.version = '0.5.0'
16
16
  gem.required_ruby_version = ">= 2.1.0"
17
17
 
18
18
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -1,5 +1,13 @@
1
+ require "set"
2
+ require "kafka/partitioner"
3
+ require "kafka/message_buffer"
4
+ require "kafka/produce_operation"
5
+ require "kafka/pending_message_queue"
6
+ require "kafka/pending_message"
7
+ require "kafka/compressor"
1
8
  require 'kafka/producer'
2
9
 
10
+ # for out_kafka_buffered
3
11
  module Kafka
4
12
  class Producer
5
13
  def produce2(value, key: nil, topic:, partition: nil, partition_key: nil)
@@ -22,3 +30,196 @@ module Kafka
22
30
  end
23
31
  end
24
32
  end
33
+
34
+ # for out_kafka2
35
+ module Kafka
36
+ class Client
37
+ def topic_producer(topic, compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
38
+ compressor = Compressor.new(
39
+ codec_name: compression_codec,
40
+ threshold: compression_threshold,
41
+ instrumenter: @instrumenter,
42
+ )
43
+
44
+ TopicProducer.new(topic,
45
+ cluster: initialize_cluster,
46
+ logger: @logger,
47
+ instrumenter: @instrumenter,
48
+ compressor: compressor,
49
+ ack_timeout: ack_timeout,
50
+ required_acks: required_acks,
51
+ max_retries: max_retries,
52
+ retry_backoff: retry_backoff,
53
+ max_buffer_size: max_buffer_size,
54
+ max_buffer_bytesize: max_buffer_bytesize,
55
+ )
56
+ end
57
+ end
58
+
59
+ class TopicProducer
60
+ def initialize(topic, cluster:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
61
+ @cluster = cluster
62
+ @logger = logger
63
+ @instrumenter = instrumenter
64
+ @required_acks = required_acks == :all ? -1 : required_acks
65
+ @ack_timeout = ack_timeout
66
+ @max_retries = max_retries
67
+ @retry_backoff = retry_backoff
68
+ @max_buffer_size = max_buffer_size
69
+ @max_buffer_bytesize = max_buffer_bytesize
70
+ @compressor = compressor
71
+
72
+ @topic = topic
73
+ @cluster.add_target_topics(Set.new([topic]))
74
+
75
+ # A buffer organized by topic/partition.
76
+ @buffer = MessageBuffer.new
77
+
78
+ # Messages added by `#produce` but not yet assigned a partition.
79
+ @pending_message_queue = PendingMessageQueue.new
80
+ end
81
+
82
+ def produce(value, key, partition, partition_key)
83
+ create_time = Time.now
84
+
85
+ message = PendingMessage.new(
86
+ value,
87
+ key,
88
+ @topic,
89
+ partition,
90
+ partition_key,
91
+ create_time,
92
+ key.to_s.bytesize + value.to_s.bytesize
93
+ )
94
+
95
+ @pending_message_queue.write(message)
96
+
97
+ nil
98
+ end
99
+
100
+ def deliver_messages
101
+ # There's no need to do anything if the buffer is empty.
102
+ return if buffer_size == 0
103
+
104
+ deliver_messages_with_retries
105
+ end
106
+
107
+ # Returns the number of messages currently held in the buffer.
108
+ #
109
+ # @return [Integer] buffer size.
110
+ def buffer_size
111
+ @pending_message_queue.size + @buffer.size
112
+ end
113
+
114
+ def buffer_bytesize
115
+ @pending_message_queue.bytesize + @buffer.bytesize
116
+ end
117
+
118
+ # Deletes all buffered messages.
119
+ #
120
+ # @return [nil]
121
+ def clear_buffer
122
+ @buffer.clear
123
+ @pending_message_queue.clear
124
+ end
125
+
126
+ # Closes all connections to the brokers.
127
+ #
128
+ # @return [nil]
129
+ def shutdown
130
+ @cluster.disconnect
131
+ end
132
+
133
+ private
134
+
135
+ def deliver_messages_with_retries
136
+ attempt = 0
137
+
138
+ #@cluster.add_target_topics(@target_topics)
139
+
140
+ operation = ProduceOperation.new(
141
+ cluster: @cluster,
142
+ buffer: @buffer,
143
+ required_acks: @required_acks,
144
+ ack_timeout: @ack_timeout,
145
+ compressor: @compressor,
146
+ logger: @logger,
147
+ instrumenter: @instrumenter,
148
+ )
149
+
150
+ loop do
151
+ attempt += 1
152
+
153
+ @cluster.refresh_metadata_if_necessary!
154
+
155
+ assign_partitions!
156
+ operation.execute
157
+
158
+ if @required_acks.zero?
159
+ # No response is returned by the brokers, so we can't know which messages
160
+ # have been successfully written. Our only option is to assume that they all
161
+ # have.
162
+ @buffer.clear
163
+ end
164
+
165
+ if buffer_size.zero?
166
+ break
167
+ elsif attempt <= @max_retries
168
+ @logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
169
+
170
+ sleep @retry_backoff
171
+ else
172
+ @logger.error "Failed to send all messages; keeping remaining messages in buffer"
173
+ break
174
+ end
175
+ end
176
+
177
+ unless @pending_message_queue.empty?
178
+ # Mark the cluster as stale in order to force a cluster metadata refresh.
179
+ @cluster.mark_as_stale!
180
+ raise DeliveryFailed, "Failed to assign partitions to #{@pending_message_queue.size} messages"
181
+ end
182
+
183
+ unless @buffer.empty?
184
+ partitions = @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
185
+
186
+ raise DeliveryFailed, "Failed to send messages to #{partitions}"
187
+ end
188
+ end
189
+
190
+ def assign_partitions!
191
+ failed_messages = []
192
+ partition_count = @cluster.partitions_for(@topic).count
193
+
194
+ @pending_message_queue.each do |message|
195
+ partition = message.partition
196
+
197
+ begin
198
+ if partition.nil?
199
+ partition = Partitioner.partition_for_key(partition_count, message)
200
+ end
201
+
202
+ @buffer.write(
203
+ value: message.value,
204
+ key: message.key,
205
+ topic: message.topic,
206
+ partition: partition,
207
+ create_time: message.create_time,
208
+ )
209
+ rescue Kafka::Error => e
210
+ failed_messages << message
211
+ end
212
+ end
213
+
214
+ if failed_messages.any?
215
+ failed_messages.group_by(&:topic).each do |topic, messages|
216
+ @logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
217
+ end
218
+
219
+ @cluster.mark_as_stale!
220
+ end
221
+
222
+ @pending_message_queue.replace(failed_messages)
223
+ end
224
+ end
225
+ end
@@ -0,0 +1,187 @@
1
+ require 'fluent/plugin/output'
2
+ require 'fluent/plugin/kafka_plugin_util'
3
+
4
+ require 'kafka'
5
+ require 'fluent/plugin/kafka_producer_ext'
6
+
7
+ module Fluent::Plugin
8
+ class Fluent::Kafka2Output < Output
9
+ Fluent::Plugin.register_output('kafka2', self)
10
+
11
+ helpers :inject, :formatter
12
+
13
+ config_param :brokers, :array, :value_type => :string, :default => ['localhost:9092'],
14
+ :desc => <<-DESC
15
+ Set brokers directly:
16
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
17
+ DESC
18
+ config_param :default_topic, :string, :default => nil,
19
+ :desc => "Default output topic when record doesn't have topic field"
20
+ config_param :default_message_key, :string, :default => nil
21
+ config_param :default_partition_key, :string, :default => nil
22
+ config_param :default_partition, :integer, :default => nil
23
+ config_param :client_id, :string, :default => 'fluentd'
24
+ config_param :exclude_partition_key, :bool, :default => false,
25
+ :desc => 'Set true to remove partition key from data'
26
+ config_param :exclude_partition, :bool, :default => false,
27
+ :desc => 'Set true to remove partition from data'
28
+ config_param :exclude_message_key, :bool, :default => false,
29
+ :desc => 'Set true to remove partition key from data'
30
+ config_param :exclude_topic_key, :bool, :default => false,
31
+ :desc => 'Set true to remove topic name key from data'
32
+
33
+ config_param :get_kafka_client_log, :bool, :default => false
34
+
35
+ # ruby-kafka producer options
36
+ config_param :max_send_retries, :integer, :default => 2,
37
+ :desc => "Number of times to retry sending of messages to a leader."
38
+ config_param :required_acks, :integer, :default => -1,
39
+ :desc => "The number of acks required per request."
40
+ config_param :ack_timeout, :time, :default => nil,
41
+ :desc => "How long the producer waits for acks."
42
+ config_param :compression_codec, :string, :default => nil,
43
+ :desc => <<-DESC
44
+ The codec the producer uses to compress messages.
45
+ Supported codecs: (gzip|snappy)
46
+ DESC
47
+
48
+ config_section :buffer do
49
+ config_set_default :chunk_keys, ["topic"]
50
+ end
51
+ config_section :format do
52
+ config_set_default :@type, 'json'
53
+ end
54
+
55
+ include Fluent::KafkaPluginUtil::SSLSettings
56
+
57
+ def initialize
58
+ super
59
+
60
+ @kafka = nil
61
+ end
62
+
63
+ def refresh_client(raise_error = true)
64
+ begin
65
+ logger = @get_kafka_client_log ? log : nil
66
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
67
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key))
68
+ log.info "initialized kafka producer: #{@client_id}"
69
+ rescue Exception => e
70
+ if raise_error # During startup, error should be reported to engine and stop its phase for safety.
71
+ raise e
72
+ else
73
+ log.error e
74
+ end
75
+ end
76
+ end
77
+
78
+ def configure(conf)
79
+ super
80
+
81
+ if @brokers.size > 0
82
+ log.info "brokers has been set: #{@brokers}"
83
+ else
84
+ raise Fluent::Config, 'No brokers specified. Need one broker at least.'
85
+ end
86
+
87
+ formatter_conf = conf.elements('format').first
88
+ unless formatter_conf
89
+ raise Fluent::ConfigError, "<format> section is required."
90
+ end
91
+ unless formatter_conf["@type"]
92
+ raise Fluent::ConfigError, "format/@type is required."
93
+ end
94
+ @formatter_proc = setup_formatter(formatter_conf)
95
+
96
+ @producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
97
+ @producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
98
+ @producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
99
+ end
100
+
101
+ def multi_workers_ready?
102
+ true
103
+ end
104
+
105
+ def start
106
+ super
107
+ refresh_client
108
+ end
109
+
110
+ def close
111
+ super
112
+ @kafka.close if @kafka
113
+ end
114
+
115
+ def terminate
116
+ super
117
+ @kafka = nil
118
+ end
119
+
120
+ def setup_formatter(conf)
121
+ type = conf['@type']
122
+ case type
123
+ when 'json'
124
+ begin
125
+ require 'oj'
126
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
127
+ Proc.new { |tag, time, record| Oj.dump(record) }
128
+ rescue LoadError
129
+ require 'yajl'
130
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
131
+ end
132
+ when 'ltsv'
133
+ require 'ltsv'
134
+ Proc.new { |tag, time, record| LTSV.dump(record) }
135
+ else
136
+ @formatter = formatter_create(usage: 'kafka-plugin', conf: conf)
137
+ @formatter.method(:format)
138
+ end
139
+ end
140
+
141
+ # TODO: optimize write performance
142
+ def write(chunk)
143
+ tag = chunk.metadata.tag
144
+ topic = chunk.metadata.variables[:topic] || @default_topic || tag
145
+ producer = @kafka.topic_producer(topic, @producer_opts)
146
+
147
+ messages = 0
148
+ record_buf = nil
149
+
150
+ begin
151
+ chunk.msgpack_each { |time, record|
152
+ begin
153
+ record = inject_values_to_record(tag, time, record)
154
+ record.delete('topic'.freeze) if @exclude_topic_key
155
+ partition_key = (@exclude_partition_key ? record.delete('partition_key'.freeze) : record['partition_key'.freeze]) || @default_partition_key
156
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
157
+ message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
158
+
159
+ record_buf = @formatter_proc.call(tag, time, record)
160
+ rescue StandardError => e
161
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
162
+ next
163
+ end
164
+
165
+ log.on_trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
166
+ messages += 1
167
+
168
+ producer.produce(record_buf, message_key, partition, partition_key)
169
+ }
170
+
171
+ if messages > 0
172
+ log.trace { "#{messages} messages send." }
173
+ producer.deliver_messages
174
+ end
175
+ end
176
+ rescue Exception => e
177
+ log.warn "Send exception occurred: #{e}"
178
+ log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
179
+ # For safety, refresh client and its producers
180
+ refresh_client(false)
181
+ # Raise exception to retry sendind messages
182
+ raise e
183
+ ensure
184
+ producer.shutdown if producer
185
+ end
186
+ end
187
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-01-10 00:00:00.000000000 Z
12
+ date: 2017-01-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -108,6 +108,7 @@ files:
108
108
  - lib/fluent/plugin/kafka_plugin_util.rb
109
109
  - lib/fluent/plugin/kafka_producer_ext.rb
110
110
  - lib/fluent/plugin/out_kafka.rb
111
+ - lib/fluent/plugin/out_kafka2.rb
111
112
  - lib/fluent/plugin/out_kafka_buffered.rb
112
113
  - test/helper.rb
113
114
  - test/plugin/test_out_kafka.rb
@@ -130,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
130
131
  version: '0'
131
132
  requirements: []
132
133
  rubyforge_project:
133
- rubygems_version: 2.5.1
134
+ rubygems_version: 2.6.8
134
135
  signing_key:
135
136
  specification_version: 4
136
137
  summary: Fluentd plugin for Apache Kafka > 0.8