fluent-plugin-kinesis 1.3.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -23
  3. data/CHANGELOG.md +13 -0
  4. data/Gemfile +9 -9
  5. data/LICENSE.txt +201 -40
  6. data/Makefile +24 -31
  7. data/README.md +179 -308
  8. data/Rakefile +9 -13
  9. data/benchmark/task.rake +96 -58
  10. data/fluent-plugin-kinesis.gemspec +15 -19
  11. data/gemfiles/Gemfile.fluentd-0.12 +10 -10
  12. data/lib/fluent/plugin/kinesis.rb +166 -0
  13. data/lib/fluent/plugin/kinesis_helper/aggregator.rb +99 -0
  14. data/lib/fluent/plugin/kinesis_helper/api.rb +152 -121
  15. data/lib/fluent/plugin/kinesis_helper/client.rb +125 -12
  16. data/lib/fluent/plugin/out_kinesis_firehose.rb +40 -27
  17. data/lib/fluent/plugin/out_kinesis_streams.rb +51 -30
  18. data/lib/fluent/plugin/out_kinesis_streams_aggregated.rb +76 -0
  19. data/lib/fluent_plugin_kinesis/version.rb +10 -10
  20. metadata +18 -70
  21. data/README-v0.4.md +0 -348
  22. data/benchmark/dummy.conf +0 -0
  23. data/gemfiles/Gemfile.aws-sdk-2.4 +0 -20
  24. data/gemfiles/Gemfile.fluentd-0.10.58 +0 -20
  25. data/gemfiles/Gemfile.fluentd-0.14.11 +0 -20
  26. data/gemfiles/Gemfile.ruby-2.0 +0 -21
  27. data/gemfiles/Gemfile.ruby-2.1 +0 -21
  28. data/lib/fluent/plugin/kinesis_helper.rb +0 -36
  29. data/lib/fluent/plugin/kinesis_helper/class_methods.rb +0 -123
  30. data/lib/fluent/plugin/kinesis_helper/credentials.rb +0 -51
  31. data/lib/fluent/plugin/kinesis_helper/error.rb +0 -43
  32. data/lib/fluent/plugin/kinesis_helper/format.rb +0 -85
  33. data/lib/fluent/plugin/kinesis_helper/initialize.rb +0 -59
  34. data/lib/fluent/plugin/kinesis_helper/kpl.rb +0 -82
  35. data/lib/fluent/plugin/out_kinesis.rb +0 -323
  36. data/lib/fluent/plugin/out_kinesis_producer.rb +0 -48
  37. data/lib/fluent/plugin/patched_detach_process_impl.rb +0 -103
  38. data/lib/kinesis_producer.rb +0 -24
  39. data/lib/kinesis_producer/binary.rb +0 -10
  40. data/lib/kinesis_producer/daemon.rb +0 -270
  41. data/lib/kinesis_producer/library.rb +0 -122
  42. data/lib/kinesis_producer/protobuf/config.pb.rb +0 -66
  43. data/lib/kinesis_producer/protobuf/messages.pb.rb +0 -151
  44. data/lib/kinesis_producer/tasks/binary.rake +0 -73
@@ -1,323 +0,0 @@
1
- #
2
- # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
- #
4
- # Licensed under the Amazon Software License (the "License").
5
- # You may not use this file except in compliance with the License.
6
- # A copy of the License is located at
7
- #
8
- # http://aws.amazon.com/asl/
9
- #
10
- # or in the "license" file accompanying this file. This file is distributed
11
- # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12
- # express or implied. See the License for the specific language governing
13
- # permissions and limitations under the License.
14
-
15
- require 'aws-sdk'
16
- require 'yajl'
17
- require 'logger'
18
- require 'securerandom'
19
- require 'zlib'
20
- require 'fluent_plugin_kinesis/version'
21
-
22
- module FluentPluginKinesis
23
- class OutputFilter < Fluent::BufferedOutput
24
-
25
- # detach_multi_process has been deleted at 0.14.12
26
- # https://github.com/fluent/fluentd/commit/fcd8cc18e1f3a95710a80f982b91a1414fadc432
27
- require 'fluent/version'
28
- if Gem::Version.new(Fluent::VERSION) < Gem::Version.new('0.14.12')
29
- require 'fluent/process'
30
- include Fluent::DetachMultiProcessMixin
31
- end
32
- include Fluent::SetTimeKeyMixin
33
- include Fluent::SetTagKeyMixin
34
-
35
- USER_AGENT_NAME = 'fluent-plugin-kinesis-output-filter'
36
- PROC_BASE_STR = 'proc {|record| %s }'
37
- PUT_RECORDS_MAX_COUNT = 500
38
- PUT_RECORD_MAX_DATA_SIZE = 1024 * 1024
39
- PUT_RECORDS_MAX_DATA_SIZE = 1024 * 1024 * 5
40
-
41
- Fluent::Plugin.register_output('kinesis',self)
42
-
43
- config_set_default :include_time_key, true
44
- config_set_default :include_tag_key, true
45
-
46
- config_param :aws_key_id, :string, default: nil, :secret => true
47
- config_param :aws_sec_key, :string, default: nil, :secret => true
48
- # The 'region' parameter is optional because
49
- # it may be set as an environment variable.
50
- config_param :region, :string, default: nil
51
- config_param :ensure_stream_connection, :bool, default: true
52
-
53
- config_param :profile, :string, :default => nil
54
- config_param :credentials_path, :string, :default => nil
55
- config_param :role_arn, :string, :default => nil
56
- config_param :external_id, :string, :default => nil
57
-
58
- config_param :stream_name, :string
59
- config_param :random_partition_key, :bool, default: false
60
- config_param :partition_key, :string, default: nil
61
- config_param :partition_key_expr, :string, default: nil
62
- config_param :explicit_hash_key, :string, default: nil
63
- config_param :explicit_hash_key_expr, :string, default: nil
64
- config_param :order_events, :bool, default: false
65
- config_param :retries_on_putrecords, :integer, default: 3
66
- config_param :use_yajl, :bool, default: false
67
- config_param :zlib_compression, :bool, default: false
68
-
69
- config_param :debug, :bool, default: false
70
-
71
- config_param :http_proxy, :string, default: nil
72
-
73
- def configure(conf)
74
- log.warn("Deprecated warning: out_kinesis is no longer supported after v1.0.0. Please check out_kinesis_streams out.")
75
- super
76
- validate_params
77
-
78
- if @detach_process or (@num_threads > 1)
79
- @parallel_mode = true
80
- if @detach_process
81
- @use_detach_multi_process_mixin = true
82
- end
83
- else
84
- @parallel_mode = false
85
- end
86
-
87
- if @parallel_mode
88
- if @order_events
89
- log.warn 'You have set "order_events" to true, however this configuration will be ignored due to "detach_process" and/or "num_threads".'
90
- end
91
- @order_events = false
92
- end
93
-
94
- if @partition_key_expr
95
- partition_key_proc_str = sprintf(
96
- PROC_BASE_STR, @partition_key_expr
97
- )
98
- @partition_key_proc = eval(partition_key_proc_str)
99
- end
100
-
101
- if @explicit_hash_key_expr
102
- explicit_hash_key_proc_str = sprintf(
103
- PROC_BASE_STR, @explicit_hash_key_expr
104
- )
105
- @explicit_hash_key_proc = eval(explicit_hash_key_proc_str)
106
- end
107
-
108
- @dump_class = @use_yajl ? Yajl : JSON
109
- end
110
-
111
- def start
112
- detach_multi_process do
113
- super
114
- load_client
115
- if @ensure_stream_connection
116
- check_connection_to_stream
117
- end
118
- end
119
- end
120
-
121
- def format(tag, time, record)
122
- data = {
123
- data: @dump_class.dump(record),
124
- partition_key: get_key(:partition_key,record)
125
- }
126
-
127
- if @explicit_hash_key or @explicit_hash_key_proc
128
- data[:explicit_hash_key] = get_key(:explicit_hash_key,record)
129
- end
130
-
131
- data.to_msgpack
132
- end
133
-
134
- def write(chunk)
135
- data_list = chunk.to_enum(:msgpack_each).map{|record|
136
- build_data_to_put(record)
137
- }.find_all{|record|
138
- unless record_exceeds_max_size?(record[:data])
139
- true
140
- else
141
- log.error sprintf('Record exceeds the %.3f KB(s) per-record size limit and will not be delivered: %s', PUT_RECORD_MAX_DATA_SIZE / 1024.0, record[:data])
142
- false
143
- end
144
- }
145
-
146
- if @order_events
147
- put_record_for_order_events(data_list)
148
- else
149
- records_array = build_records_array_to_put(data_list)
150
- records_array.each{|records|
151
- put_records_with_retry(records)
152
- }
153
- end
154
- end
155
-
156
- private
157
- def validate_params
158
- unless @random_partition_key or @partition_key or @partition_key_expr
159
- raise Fluent::ConfigError, "'random_partition_key' or 'partition_key' or 'partition_key_expr' is required"
160
- end
161
- end
162
-
163
- def load_client
164
-
165
- user_agent_suffix = "#{USER_AGENT_NAME}/#{FluentPluginKinesis::VERSION}"
166
-
167
- options = {
168
- user_agent_suffix: user_agent_suffix
169
- }
170
-
171
- if @region
172
- options[:region] = @region
173
- end
174
-
175
- if @aws_key_id && @aws_sec_key
176
- options.update(
177
- access_key_id: @aws_key_id,
178
- secret_access_key: @aws_sec_key,
179
- )
180
- elsif @profile
181
- credentials_opts = {:profile_name => @profile}
182
- credentials_opts[:path] = @credentials_path if @credentials_path
183
- credentials = Aws::SharedCredentials.new(credentials_opts)
184
- options[:credentials] = credentials
185
- elsif @role_arn
186
- credentials = Aws::AssumeRoleCredentials.new(
187
- client: Aws::STS::Client.new(options),
188
- role_arn: @role_arn,
189
- role_session_name: "aws-fluent-plugin-kinesis",
190
- external_id: @external_id,
191
- duration_seconds: 60 * 60
192
- )
193
- options[:credentials] = credentials
194
- end
195
-
196
- if @debug
197
- options.update(
198
- logger: Logger.new(log.out),
199
- log_level: :debug
200
- )
201
- # XXX: Add the following options, if necessary
202
- # :http_wire_trace => true
203
- end
204
-
205
- if @http_proxy
206
- options[:http_proxy] = @http_proxy
207
- end
208
-
209
- @client = Aws::Kinesis::Client.new(options)
210
-
211
- end
212
-
213
- def check_connection_to_stream
214
- @client.describe_stream(stream_name: @stream_name)
215
- end
216
-
217
- def get_key(name, record)
218
- if @random_partition_key
219
- SecureRandom.uuid
220
- else
221
- key = instance_variable_get("@#{name}")
222
- key_proc = instance_variable_get("@#{name}_proc")
223
-
224
- value = key ? record[key] : record
225
-
226
- if key_proc
227
- value = key_proc.call(value)
228
- end
229
-
230
- value.to_s
231
- end
232
- end
233
-
234
- def build_data_to_put(data)
235
- if @zlib_compression
236
- Hash[data.map{|k, v| [k.to_sym, k=="data" ? Zlib::Deflate.deflate(v) : v] }]
237
- else
238
- Hash[data.map{|k, v| [k.to_sym, v] }]
239
- end
240
- end
241
-
242
- def put_record_for_order_events(data_list)
243
- sequence_number_for_ordering = nil
244
- data_list.each do |data_to_put|
245
- if sequence_number_for_ordering
246
- data_to_put.update(
247
- sequence_number_for_ordering: sequence_number_for_ordering
248
- )
249
- end
250
- data_to_put.update(
251
- stream_name: @stream_name
252
- )
253
- result = @client.put_record(data_to_put)
254
- sequence_number_for_ordering = result[:sequence_number]
255
- end
256
- end
257
-
258
- def build_records_array_to_put(data_list)
259
- records_array = []
260
- records = []
261
- records_payload_length = 0
262
- data_list.each{|data_to_put|
263
- payload = data_to_put[:data]
264
- partition_key = data_to_put[:partition_key]
265
- if records.length >= PUT_RECORDS_MAX_COUNT or (records_payload_length + payload.length + partition_key.length) >= PUT_RECORDS_MAX_DATA_SIZE
266
- records_array.push(records)
267
- records = []
268
- records_payload_length = 0
269
- end
270
- records.push(data_to_put)
271
- records_payload_length += (payload.length + partition_key.length)
272
- }
273
- records_array.push(records) unless records.empty?
274
- records_array
275
- end
276
-
277
- def put_records_with_retry(records,retry_count=0)
278
- response = @client.put_records(
279
- records: records,
280
- stream_name: @stream_name
281
- )
282
-
283
- if response[:failed_record_count] && response[:failed_record_count] > 0
284
- failed_records = []
285
- response[:records].each_with_index{|record,index|
286
- if record[:error_code]
287
- failed_records.push({body: records[index], error_code: record[:error_code]})
288
- end
289
- }
290
-
291
- if(retry_count < @retries_on_putrecords)
292
- sleep(calculate_sleep_duration(retry_count))
293
- retry_count += 1
294
- log.warn sprintf('Retrying to put records. Retry count: %d', retry_count)
295
- put_records_with_retry(
296
- failed_records.map{|record| record[:body]},
297
- retry_count
298
- )
299
- else
300
- failed_records.each{|record|
301
- log.error sprintf(
302
- 'Could not put record, Error: %s, Record: %s',
303
- record[:error_code],
304
- @dump_class.dump(record[:body])
305
- )
306
- }
307
- end
308
- end
309
- end
310
-
311
- def calculate_sleep_duration(current_retry)
312
- Array.new(@retries_on_putrecords){|n| ((2 ** n) * scaling_factor)}[current_retry]
313
- end
314
-
315
- def scaling_factor
316
- 0.5 + Kernel.rand * 0.1
317
- end
318
-
319
- def record_exceeds_max_size?(record_string)
320
- return record_string.length > PUT_RECORD_MAX_DATA_SIZE
321
- end
322
- end
323
- end
@@ -1,48 +0,0 @@
1
- #
2
- # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
- #
4
- # Licensed under the Amazon Software License (the "License").
5
- # You may not use this file except in compliance with the License.
6
- # A copy of the License is located at
7
- #
8
- # http://aws.amazon.com/asl/
9
- #
10
- # or in the "license" file accompanying this file. This file is distributed
11
- # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12
- # express or implied. See the License for the specific language governing
13
- # permissions and limitations under the License.
14
-
15
- require 'fluent/plugin/kinesis_helper'
16
-
17
- module Fluent
18
- class KinesisProducerOutput < BufferedOutput
19
- include KinesisHelper
20
- Fluent::Plugin.register_output('kinesis_producer', self)
21
- config_param_for_producer
22
-
23
- def configure(conf)
24
- super
25
- unless @stream_name or @stream_name_prefix
26
- raise Fluent::ConfigError, "'stream_name' or 'stream_name_prefix' is required"
27
- end
28
- if @stream_name and @stream_name_prefix
29
- raise Fluent::ConfigError, "Only one of 'stream_name' or 'stream_name_prefix' is allowed"
30
- end
31
- end
32
-
33
- def write(chunk)
34
- records = convert_to_records(chunk)
35
- wait_futures(write_chunk_to_kpl(records))
36
- end
37
-
38
- private
39
-
40
- def convert_format(tag, time, record)
41
- {
42
- data: data_format(tag, time, record),
43
- partition_key: key(record),
44
- stream_name: @stream_name ? @stream_name : @stream_name_prefix + tag,
45
- }
46
- end
47
- end
48
- end
@@ -1,103 +0,0 @@
1
- #
2
- # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
- #
4
- # Licensed under the Amazon Software License (the "License").
5
- # You may not use this file except in compliance with the License.
6
- # A copy of the License is located at
7
- #
8
- # http://aws.amazon.com/asl/
9
- #
10
- # or in the "license" file accompanying this file. This file is distributed
11
- # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12
- # express or implied. See the License for the specific language governing
13
- # permissions and limitations under the License.
14
-
15
- module Fluent
16
- module PatchedDetachProcessImpl
17
- def on_detach_process(i)
18
- end
19
-
20
- def on_exit_process(i)
21
- end
22
-
23
- private
24
-
25
- def detach_process_impl(num, &block)
26
- children = []
27
-
28
- num.times do |i|
29
- pid, forward_thread = DetachProcessManager.instance.fork(self)
30
-
31
- if pid
32
- # parent process
33
- $log.info "detached process", :class=>self.class, :pid=>pid
34
- children << [pid, forward_thread]
35
- next
36
- end
37
-
38
- # child process
39
- begin
40
- on_detach_process(i)
41
-
42
- block.call
43
-
44
- # disable Engine.stop called by signal handler
45
- Engine.define_singleton_method(:stop) do
46
- # do nothing
47
- end
48
- # override signal handlers called by parent process
49
- fin = ::Fluent::DetachProcessImpl::FinishWait.new
50
- trap :INT do
51
- fin.stop
52
- end
53
- trap :TERM do
54
- fin.stop
55
- end
56
- #forward_thread.join # TODO this thread won't stop because parent doesn't close pipe
57
- fin.wait
58
-
59
- on_exit_process(i)
60
- exit! 0
61
- ensure
62
- $log.error "unknown error while shutting down this child process", :error=>$!.to_s, :pid=>Process.pid
63
- $log.error_backtrace
64
- end
65
-
66
- exit! 1
67
- end
68
-
69
- # parent process
70
- # override shutdown method to kill child processes
71
- define_singleton_method(:shutdown) do
72
- children.each {|pair|
73
- begin
74
- pid = pair[0]
75
- forward_thread = pair[1]
76
- if pid
77
- Process.kill(:TERM, pid)
78
- forward_thread.join # wait until child closes pipe
79
- Process.waitpid(pid)
80
- pair[0] = nil
81
- end
82
- rescue
83
- $log.error "unknown error while shutting down remote child process", :error=>$!.to_s
84
- $log.error_backtrace
85
- end
86
- }
87
- end
88
-
89
- # override target.emit and write event stream to the pipe
90
- forwarders = children.map {|pair| pair[1].forwarder }
91
- if forwarders.length > 1
92
- # use roundrobin
93
- fwd = DetachProcessManager::MultiForwarder.new(forwarders)
94
- else
95
- fwd = forwarders[0]
96
- end
97
- define_singleton_method(:emit) do |tag,es,chain|
98
- chain.next
99
- fwd.emit(tag, es)
100
- end
101
- end
102
- end
103
- end