fluent-plugin-kinesis 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -23
  3. data/CHANGELOG.md +13 -0
  4. data/Gemfile +9 -9
  5. data/LICENSE.txt +201 -40
  6. data/Makefile +24 -31
  7. data/README.md +179 -308
  8. data/Rakefile +9 -13
  9. data/benchmark/task.rake +96 -58
  10. data/fluent-plugin-kinesis.gemspec +15 -19
  11. data/gemfiles/Gemfile.fluentd-0.12 +10 -10
  12. data/lib/fluent/plugin/kinesis.rb +166 -0
  13. data/lib/fluent/plugin/kinesis_helper/aggregator.rb +99 -0
  14. data/lib/fluent/plugin/kinesis_helper/api.rb +152 -121
  15. data/lib/fluent/plugin/kinesis_helper/client.rb +125 -12
  16. data/lib/fluent/plugin/out_kinesis_firehose.rb +40 -27
  17. data/lib/fluent/plugin/out_kinesis_streams.rb +51 -30
  18. data/lib/fluent/plugin/out_kinesis_streams_aggregated.rb +76 -0
  19. data/lib/fluent_plugin_kinesis/version.rb +10 -10
  20. metadata +18 -70
  21. data/README-v0.4.md +0 -348
  22. data/benchmark/dummy.conf +0 -0
  23. data/gemfiles/Gemfile.aws-sdk-2.4 +0 -20
  24. data/gemfiles/Gemfile.fluentd-0.10.58 +0 -20
  25. data/gemfiles/Gemfile.fluentd-0.14.11 +0 -20
  26. data/gemfiles/Gemfile.ruby-2.0 +0 -21
  27. data/gemfiles/Gemfile.ruby-2.1 +0 -21
  28. data/lib/fluent/plugin/kinesis_helper.rb +0 -36
  29. data/lib/fluent/plugin/kinesis_helper/class_methods.rb +0 -123
  30. data/lib/fluent/plugin/kinesis_helper/credentials.rb +0 -51
  31. data/lib/fluent/plugin/kinesis_helper/error.rb +0 -43
  32. data/lib/fluent/plugin/kinesis_helper/format.rb +0 -85
  33. data/lib/fluent/plugin/kinesis_helper/initialize.rb +0 -59
  34. data/lib/fluent/plugin/kinesis_helper/kpl.rb +0 -82
  35. data/lib/fluent/plugin/out_kinesis.rb +0 -323
  36. data/lib/fluent/plugin/out_kinesis_producer.rb +0 -48
  37. data/lib/fluent/plugin/patched_detach_process_impl.rb +0 -103
  38. data/lib/kinesis_producer.rb +0 -24
  39. data/lib/kinesis_producer/binary.rb +0 -10
  40. data/lib/kinesis_producer/daemon.rb +0 -270
  41. data/lib/kinesis_producer/library.rb +0 -122
  42. data/lib/kinesis_producer/protobuf/config.pb.rb +0 -66
  43. data/lib/kinesis_producer/protobuf/messages.pb.rb +0 -151
  44. data/lib/kinesis_producer/tasks/binary.rake +0 -73
@@ -1,323 +0,0 @@
1
- #
2
- # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
- #
4
- # Licensed under the Amazon Software License (the "License").
5
- # You may not use this file except in compliance with the License.
6
- # A copy of the License is located at
7
- #
8
- # http://aws.amazon.com/asl/
9
- #
10
- # or in the "license" file accompanying this file. This file is distributed
11
- # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12
- # express or implied. See the License for the specific language governing
13
- # permissions and limitations under the License.
14
-
15
- require 'aws-sdk'
16
- require 'yajl'
17
- require 'logger'
18
- require 'securerandom'
19
- require 'zlib'
20
- require 'fluent_plugin_kinesis/version'
21
-
22
- module FluentPluginKinesis
23
- class OutputFilter < Fluent::BufferedOutput
24
-
25
- # detach_multi_process has been deleted at 0.14.12
26
- # https://github.com/fluent/fluentd/commit/fcd8cc18e1f3a95710a80f982b91a1414fadc432
27
- require 'fluent/version'
28
- if Gem::Version.new(Fluent::VERSION) < Gem::Version.new('0.14.12')
29
- require 'fluent/process'
30
- include Fluent::DetachMultiProcessMixin
31
- end
32
- include Fluent::SetTimeKeyMixin
33
- include Fluent::SetTagKeyMixin
34
-
35
- USER_AGENT_NAME = 'fluent-plugin-kinesis-output-filter'
36
- PROC_BASE_STR = 'proc {|record| %s }'
37
- PUT_RECORDS_MAX_COUNT = 500
38
- PUT_RECORD_MAX_DATA_SIZE = 1024 * 1024
39
- PUT_RECORDS_MAX_DATA_SIZE = 1024 * 1024 * 5
40
-
41
- Fluent::Plugin.register_output('kinesis',self)
42
-
43
- config_set_default :include_time_key, true
44
- config_set_default :include_tag_key, true
45
-
46
- config_param :aws_key_id, :string, default: nil, :secret => true
47
- config_param :aws_sec_key, :string, default: nil, :secret => true
48
- # The 'region' parameter is optional because
49
- # it may be set as an environment variable.
50
- config_param :region, :string, default: nil
51
- config_param :ensure_stream_connection, :bool, default: true
52
-
53
- config_param :profile, :string, :default => nil
54
- config_param :credentials_path, :string, :default => nil
55
- config_param :role_arn, :string, :default => nil
56
- config_param :external_id, :string, :default => nil
57
-
58
- config_param :stream_name, :string
59
- config_param :random_partition_key, :bool, default: false
60
- config_param :partition_key, :string, default: nil
61
- config_param :partition_key_expr, :string, default: nil
62
- config_param :explicit_hash_key, :string, default: nil
63
- config_param :explicit_hash_key_expr, :string, default: nil
64
- config_param :order_events, :bool, default: false
65
- config_param :retries_on_putrecords, :integer, default: 3
66
- config_param :use_yajl, :bool, default: false
67
- config_param :zlib_compression, :bool, default: false
68
-
69
- config_param :debug, :bool, default: false
70
-
71
- config_param :http_proxy, :string, default: nil
72
-
73
- def configure(conf)
74
- log.warn("Deprecated warning: out_kinesis is no longer supported after v1.0.0. Please check out_kinesis_streams out.")
75
- super
76
- validate_params
77
-
78
- if @detach_process or (@num_threads > 1)
79
- @parallel_mode = true
80
- if @detach_process
81
- @use_detach_multi_process_mixin = true
82
- end
83
- else
84
- @parallel_mode = false
85
- end
86
-
87
- if @parallel_mode
88
- if @order_events
89
- log.warn 'You have set "order_events" to true, however this configuration will be ignored due to "detach_process" and/or "num_threads".'
90
- end
91
- @order_events = false
92
- end
93
-
94
- if @partition_key_expr
95
- partition_key_proc_str = sprintf(
96
- PROC_BASE_STR, @partition_key_expr
97
- )
98
- @partition_key_proc = eval(partition_key_proc_str)
99
- end
100
-
101
- if @explicit_hash_key_expr
102
- explicit_hash_key_proc_str = sprintf(
103
- PROC_BASE_STR, @explicit_hash_key_expr
104
- )
105
- @explicit_hash_key_proc = eval(explicit_hash_key_proc_str)
106
- end
107
-
108
- @dump_class = @use_yajl ? Yajl : JSON
109
- end
110
-
111
- def start
112
- detach_multi_process do
113
- super
114
- load_client
115
- if @ensure_stream_connection
116
- check_connection_to_stream
117
- end
118
- end
119
- end
120
-
121
- def format(tag, time, record)
122
- data = {
123
- data: @dump_class.dump(record),
124
- partition_key: get_key(:partition_key,record)
125
- }
126
-
127
- if @explicit_hash_key or @explicit_hash_key_proc
128
- data[:explicit_hash_key] = get_key(:explicit_hash_key,record)
129
- end
130
-
131
- data.to_msgpack
132
- end
133
-
134
- def write(chunk)
135
- data_list = chunk.to_enum(:msgpack_each).map{|record|
136
- build_data_to_put(record)
137
- }.find_all{|record|
138
- unless record_exceeds_max_size?(record[:data])
139
- true
140
- else
141
- log.error sprintf('Record exceeds the %.3f KB(s) per-record size limit and will not be delivered: %s', PUT_RECORD_MAX_DATA_SIZE / 1024.0, record[:data])
142
- false
143
- end
144
- }
145
-
146
- if @order_events
147
- put_record_for_order_events(data_list)
148
- else
149
- records_array = build_records_array_to_put(data_list)
150
- records_array.each{|records|
151
- put_records_with_retry(records)
152
- }
153
- end
154
- end
155
-
156
- private
157
- def validate_params
158
- unless @random_partition_key or @partition_key or @partition_key_expr
159
- raise Fluent::ConfigError, "'random_partition_key' or 'partition_key' or 'partition_key_expr' is required"
160
- end
161
- end
162
-
163
- def load_client
164
-
165
- user_agent_suffix = "#{USER_AGENT_NAME}/#{FluentPluginKinesis::VERSION}"
166
-
167
- options = {
168
- user_agent_suffix: user_agent_suffix
169
- }
170
-
171
- if @region
172
- options[:region] = @region
173
- end
174
-
175
- if @aws_key_id && @aws_sec_key
176
- options.update(
177
- access_key_id: @aws_key_id,
178
- secret_access_key: @aws_sec_key,
179
- )
180
- elsif @profile
181
- credentials_opts = {:profile_name => @profile}
182
- credentials_opts[:path] = @credentials_path if @credentials_path
183
- credentials = Aws::SharedCredentials.new(credentials_opts)
184
- options[:credentials] = credentials
185
- elsif @role_arn
186
- credentials = Aws::AssumeRoleCredentials.new(
187
- client: Aws::STS::Client.new(options),
188
- role_arn: @role_arn,
189
- role_session_name: "aws-fluent-plugin-kinesis",
190
- external_id: @external_id,
191
- duration_seconds: 60 * 60
192
- )
193
- options[:credentials] = credentials
194
- end
195
-
196
- if @debug
197
- options.update(
198
- logger: Logger.new(log.out),
199
- log_level: :debug
200
- )
201
- # XXX: Add the following options, if necessary
202
- # :http_wire_trace => true
203
- end
204
-
205
- if @http_proxy
206
- options[:http_proxy] = @http_proxy
207
- end
208
-
209
- @client = Aws::Kinesis::Client.new(options)
210
-
211
- end
212
-
213
- def check_connection_to_stream
214
- @client.describe_stream(stream_name: @stream_name)
215
- end
216
-
217
- def get_key(name, record)
218
- if @random_partition_key
219
- SecureRandom.uuid
220
- else
221
- key = instance_variable_get("@#{name}")
222
- key_proc = instance_variable_get("@#{name}_proc")
223
-
224
- value = key ? record[key] : record
225
-
226
- if key_proc
227
- value = key_proc.call(value)
228
- end
229
-
230
- value.to_s
231
- end
232
- end
233
-
234
- def build_data_to_put(data)
235
- if @zlib_compression
236
- Hash[data.map{|k, v| [k.to_sym, k=="data" ? Zlib::Deflate.deflate(v) : v] }]
237
- else
238
- Hash[data.map{|k, v| [k.to_sym, v] }]
239
- end
240
- end
241
-
242
- def put_record_for_order_events(data_list)
243
- sequence_number_for_ordering = nil
244
- data_list.each do |data_to_put|
245
- if sequence_number_for_ordering
246
- data_to_put.update(
247
- sequence_number_for_ordering: sequence_number_for_ordering
248
- )
249
- end
250
- data_to_put.update(
251
- stream_name: @stream_name
252
- )
253
- result = @client.put_record(data_to_put)
254
- sequence_number_for_ordering = result[:sequence_number]
255
- end
256
- end
257
-
258
- def build_records_array_to_put(data_list)
259
- records_array = []
260
- records = []
261
- records_payload_length = 0
262
- data_list.each{|data_to_put|
263
- payload = data_to_put[:data]
264
- partition_key = data_to_put[:partition_key]
265
- if records.length >= PUT_RECORDS_MAX_COUNT or (records_payload_length + payload.length + partition_key.length) >= PUT_RECORDS_MAX_DATA_SIZE
266
- records_array.push(records)
267
- records = []
268
- records_payload_length = 0
269
- end
270
- records.push(data_to_put)
271
- records_payload_length += (payload.length + partition_key.length)
272
- }
273
- records_array.push(records) unless records.empty?
274
- records_array
275
- end
276
-
277
- def put_records_with_retry(records,retry_count=0)
278
- response = @client.put_records(
279
- records: records,
280
- stream_name: @stream_name
281
- )
282
-
283
- if response[:failed_record_count] && response[:failed_record_count] > 0
284
- failed_records = []
285
- response[:records].each_with_index{|record,index|
286
- if record[:error_code]
287
- failed_records.push({body: records[index], error_code: record[:error_code]})
288
- end
289
- }
290
-
291
- if(retry_count < @retries_on_putrecords)
292
- sleep(calculate_sleep_duration(retry_count))
293
- retry_count += 1
294
- log.warn sprintf('Retrying to put records. Retry count: %d', retry_count)
295
- put_records_with_retry(
296
- failed_records.map{|record| record[:body]},
297
- retry_count
298
- )
299
- else
300
- failed_records.each{|record|
301
- log.error sprintf(
302
- 'Could not put record, Error: %s, Record: %s',
303
- record[:error_code],
304
- @dump_class.dump(record[:body])
305
- )
306
- }
307
- end
308
- end
309
- end
310
-
311
- def calculate_sleep_duration(current_retry)
312
- Array.new(@retries_on_putrecords){|n| ((2 ** n) * scaling_factor)}[current_retry]
313
- end
314
-
315
- def scaling_factor
316
- 0.5 + Kernel.rand * 0.1
317
- end
318
-
319
- def record_exceeds_max_size?(record_string)
320
- return record_string.length > PUT_RECORD_MAX_DATA_SIZE
321
- end
322
- end
323
- end
@@ -1,48 +0,0 @@
1
- #
2
- # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
- #
4
- # Licensed under the Amazon Software License (the "License").
5
- # You may not use this file except in compliance with the License.
6
- # A copy of the License is located at
7
- #
8
- # http://aws.amazon.com/asl/
9
- #
10
- # or in the "license" file accompanying this file. This file is distributed
11
- # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12
- # express or implied. See the License for the specific language governing
13
- # permissions and limitations under the License.
14
-
15
- require 'fluent/plugin/kinesis_helper'
16
-
17
- module Fluent
18
- class KinesisProducerOutput < BufferedOutput
19
- include KinesisHelper
20
- Fluent::Plugin.register_output('kinesis_producer', self)
21
- config_param_for_producer
22
-
23
- def configure(conf)
24
- super
25
- unless @stream_name or @stream_name_prefix
26
- raise Fluent::ConfigError, "'stream_name' or 'stream_name_prefix' is required"
27
- end
28
- if @stream_name and @stream_name_prefix
29
- raise Fluent::ConfigError, "Only one of 'stream_name' or 'stream_name_prefix' is allowed"
30
- end
31
- end
32
-
33
- def write(chunk)
34
- records = convert_to_records(chunk)
35
- wait_futures(write_chunk_to_kpl(records))
36
- end
37
-
38
- private
39
-
40
- def convert_format(tag, time, record)
41
- {
42
- data: data_format(tag, time, record),
43
- partition_key: key(record),
44
- stream_name: @stream_name ? @stream_name : @stream_name_prefix + tag,
45
- }
46
- end
47
- end
48
- end
@@ -1,103 +0,0 @@
1
- #
2
- # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
- #
4
- # Licensed under the Amazon Software License (the "License").
5
- # You may not use this file except in compliance with the License.
6
- # A copy of the License is located at
7
- #
8
- # http://aws.amazon.com/asl/
9
- #
10
- # or in the "license" file accompanying this file. This file is distributed
11
- # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12
- # express or implied. See the License for the specific language governing
13
- # permissions and limitations under the License.
14
-
15
- module Fluent
16
- module PatchedDetachProcessImpl
17
- def on_detach_process(i)
18
- end
19
-
20
- def on_exit_process(i)
21
- end
22
-
23
- private
24
-
25
- def detach_process_impl(num, &block)
26
- children = []
27
-
28
- num.times do |i|
29
- pid, forward_thread = DetachProcessManager.instance.fork(self)
30
-
31
- if pid
32
- # parent process
33
- $log.info "detached process", :class=>self.class, :pid=>pid
34
- children << [pid, forward_thread]
35
- next
36
- end
37
-
38
- # child process
39
- begin
40
- on_detach_process(i)
41
-
42
- block.call
43
-
44
- # disable Engine.stop called by signal handler
45
- Engine.define_singleton_method(:stop) do
46
- # do nothing
47
- end
48
- # override signal handlers called by parent process
49
- fin = ::Fluent::DetachProcessImpl::FinishWait.new
50
- trap :INT do
51
- fin.stop
52
- end
53
- trap :TERM do
54
- fin.stop
55
- end
56
- #forward_thread.join # TODO this thread won't stop because parent doesn't close pipe
57
- fin.wait
58
-
59
- on_exit_process(i)
60
- exit! 0
61
- ensure
62
- $log.error "unknown error while shutting down this child process", :error=>$!.to_s, :pid=>Process.pid
63
- $log.error_backtrace
64
- end
65
-
66
- exit! 1
67
- end
68
-
69
- # parent process
70
- # override shutdown method to kill child processes
71
- define_singleton_method(:shutdown) do
72
- children.each {|pair|
73
- begin
74
- pid = pair[0]
75
- forward_thread = pair[1]
76
- if pid
77
- Process.kill(:TERM, pid)
78
- forward_thread.join # wait until child closes pipe
79
- Process.waitpid(pid)
80
- pair[0] = nil
81
- end
82
- rescue
83
- $log.error "unknown error while shutting down remote child process", :error=>$!.to_s
84
- $log.error_backtrace
85
- end
86
- }
87
- end
88
-
89
- # override target.emit and write event stream to the pipe
90
- forwarders = children.map {|pair| pair[1].forwarder }
91
- if forwarders.length > 1
92
- # use roundrobin
93
- fwd = DetachProcessManager::MultiForwarder.new(forwarders)
94
- else
95
- fwd = forwarders[0]
96
- end
97
- define_singleton_method(:emit) do |tag,es,chain|
98
- chain.next
99
- fwd.emit(tag, es)
100
- end
101
- end
102
- end
103
- end