fluentd-plugin-kinesis-intuit 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ #
2
+ # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"). You
5
+ # may not use this file except in compliance with the License. A copy of
6
+ # the License is located at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # or in the "license" file accompanying this file. This file is
11
+ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
12
+ # ANY KIND, either express or implied. See the License for the specific
13
+ # language governing permissions and limitations under the License.
14
+
15
+ source 'https://rubygems.org'
16
+
17
+ # Specify your gem's dependencies in fluent-plugin-kinesis.gemspec
18
+ gemspec path: ".."
19
+
20
+ gem "fluentd", "0.14.10"
@@ -0,0 +1,31 @@
1
+ #
2
+ # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"). You
5
+ # may not use this file except in compliance with the License. A copy of
6
+ # the License is located at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # or in the "license" file accompanying this file. This file is
11
+ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
12
+ # ANY KIND, either express or implied. See the License for the specific
13
+ # language governing permissions and limitations under the License.
14
+
15
+ source 'https://rubygems.org'
16
+
17
+ # Specify your gem's dependencies in fluent-plugin-kinesis.gemspec
18
+ gemspec path: ".."
19
+
20
+ # Specify related gems for td-agent v3.2.0
21
+ # https://github.com/treasure-data/omnibus-td-agent/blob/v3.2.0/config/projects/td-agent3.rb#L27
22
+ gem "fluentd", "1.2.2"
23
+ # https://github.com/treasure-data/omnibus-td-agent/blob/v3.2.0/plugin_gems.rb#L16-L23
24
+ gem "jmespath", "1.4.0"
25
+ gem "aws-partitions", "1.87.0"
26
+ gem "aws-sigv4", "1.0.2"
27
+ gem "aws-sdk-core", "3.21.2"
28
+ gem "aws-sdk-kms", "1.5.0"
29
+ gem "aws-sdk-sqs", "1.3.0"
30
+ gem "aws-sdk-s3", "1.13.0"
31
+ gem "fluent-plugin-s3", "1.1.3"
@@ -0,0 +1,146 @@
1
+ #
2
+ # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"). You
5
+ # may not use this file except in compliance with the License. A copy of
6
+ # the License is located at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # or in the "license" file accompanying this file. This file is
11
+ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
12
+ # ANY KIND, either express or implied. See the License for the specific
13
+ # language governing permissions and limitations under the License.
14
+
15
+ require 'fluent/plugin/output'
16
+ require 'fluent/plugin/kinesis_helper/client'
17
+ require 'fluent/plugin/kinesis_helper/api'
18
+ require 'zlib'
19
+
20
+ module Fluent
21
+ module Plugin
22
+ class KinesisOutput < Fluent::Plugin::Output
23
+ include Fluent::MessagePackFactory::Mixin
24
+ include KinesisHelper::Client
25
+ include KinesisHelper::API
26
+
27
+ class SkipRecordError < ::StandardError
28
+ def initialize(message, record)
29
+ super message
30
+ @record_message = if record.is_a? Array
31
+ record.reverse.map(&:to_s).join(', ')
32
+ else
33
+ record.to_s
34
+ end
35
+ end
36
+
37
+ def to_s
38
+ super + ": " + @record_message
39
+ end
40
+ end
41
+ class KeyNotFoundError < SkipRecordError
42
+ def initialize(key, record)
43
+ super "Key '#{key}' doesn't exist", record
44
+ end
45
+ end
46
+ class ExceedMaxRecordSizeError < SkipRecordError
47
+ def initialize(size, record)
48
+ super "Record size limit exceeded in #{size/1024} KB", record
49
+ end
50
+ end
51
+ class InvalidRecordError < SkipRecordError
52
+ def initialize(record)
53
+ super "Invalid type of record", record
54
+ end
55
+ end
56
+
57
+ config_param :data_key, :string, default: nil
58
+ config_param :log_truncate_max_size, :integer, default: 1024
59
+ config_param :compression, :string, default: nil
60
+ config_section :format do
61
+ config_set_default :@type, 'json'
62
+ end
63
+ config_section :inject do
64
+ config_set_default :time_type, 'string'
65
+ config_set_default :time_format, '%Y-%m-%dT%H:%M:%S.%N%z'
66
+ end
67
+
68
+ config_param :debug, :bool, default: false
69
+
70
+ helpers :formatter, :inject
71
+
72
+ def configure(conf)
73
+ super
74
+ @data_formatter = data_formatter_create(conf)
75
+ end
76
+
77
+ def multi_workers_ready?
78
+ true
79
+ end
80
+
81
+ private
82
+
83
+ def data_formatter_create(conf)
84
+ formatter = formatter_create
85
+ compressor = compressor_create
86
+ if @data_key.nil?
87
+ ->(tag, time, record) {
88
+ record = inject_values_to_record(tag, time, record)
89
+ compressor.call(formatter.format(tag, time, record).chomp.b)
90
+ }
91
+ else
92
+ ->(tag, time, record) {
93
+ raise InvalidRecordError, record unless record.is_a? Hash
94
+ raise KeyNotFoundError.new(@data_key, record) if record[@data_key].nil?
95
+ compressor.call(record[@data_key].to_s.b)
96
+ }
97
+ end
98
+ end
99
+
100
+ def compressor_create
101
+ case @compression
102
+ when "zlib"
103
+ ->(data) { Zlib::Deflate.deflate(data) }
104
+ else
105
+ ->(data) { data }
106
+ end
107
+ end
108
+
109
+ def format_for_api(&block)
110
+ converted = block.call
111
+ size = size_of_values(converted)
112
+ if size > @max_record_size
113
+ raise ExceedMaxRecordSizeError.new(size, converted)
114
+ end
115
+ converted.to_msgpack
116
+ rescue SkipRecordError => e
117
+ log.error(truncate e)
118
+ ''
119
+ end
120
+
121
+ def write_records_batch(chunk, &block)
122
+ unique_id = chunk.dump_unique_id_hex(chunk.unique_id)
123
+ chunk.open do |io|
124
+ records = msgpack_unpacker(io).to_enum
125
+ split_to_batches(records) do |batch, size|
126
+ log.debug(sprintf "Write chunk %s / %3d records / %4d KB", unique_id, batch.size, size/1024)
127
+ batch_request_with_retry(batch, &block)
128
+ log.debug("Finish writing chunk")
129
+ end
130
+ end
131
+ end
132
+
133
+ def request_type
134
+ self.class::RequestType
135
+ end
136
+
137
+ def truncate(msg)
138
+ if @log_truncate_max_size == 0 or (msg.to_s.size <= @log_truncate_max_size)
139
+ msg.to_s
140
+ else
141
+ msg.to_s[0...@log_truncate_max_size]
142
+ end
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,101 @@
1
+ #
2
+ # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"). You
5
+ # may not use this file except in compliance with the License. A copy of
6
+ # the License is located at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # or in the "license" file accompanying this file. This file is
11
+ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
12
+ # ANY KIND, either express or implied. See the License for the specific
13
+ # language governing permissions and limitations under the License.
14
+
15
+ require 'fluent/configurable'
16
+ require 'google/protobuf'
17
+
18
+ Google::Protobuf::DescriptorPool.generated_pool.build do
19
+ add_message "AggregatedRecord" do
20
+ repeated :partition_key_table, :string, 1
21
+ repeated :explicit_hash_key_table, :string, 2
22
+ repeated :records, :message, 3, "Record"
23
+ end
24
+ add_message "Tag" do
25
+ optional :key, :string, 1
26
+ optional :value, :string, 2
27
+ end
28
+ add_message "Record" do
29
+ optional :partition_key_index, :uint64, 1
30
+ optional :explicit_hash_key_index, :uint64, 2
31
+ optional :data, :bytes, 3
32
+ repeated :tags, :message, 4, "Tag"
33
+ end
34
+ end
35
+
36
+ module Fluent
37
+ module Plugin
38
+ module KinesisHelper
39
+ class Aggregator
40
+ AggregatedRecord = Google::Protobuf::DescriptorPool.generated_pool.lookup("AggregatedRecord").msgclass
41
+ Tag = Google::Protobuf::DescriptorPool.generated_pool.lookup("Tag").msgclass
42
+ Record = Google::Protobuf::DescriptorPool.generated_pool.lookup("Record").msgclass
43
+
44
+ class InvalidEncodingError < ::StandardError; end
45
+
46
+ MagicNumber = ['F3899AC2'].pack('H*')
47
+
48
+ def aggregate(records, partition_key)
49
+ message = AggregatedRecord.encode(AggregatedRecord.new(
50
+ partition_key_table: ['a', partition_key],
51
+ records: records.map{|data|
52
+ Record.new(partition_key_index: 1, data: data)
53
+ },
54
+ ))
55
+ [MagicNumber, message, Digest::MD5.digest(message)].pack("A4A*A16")
56
+ end
57
+
58
+ def deaggregate(encoded)
59
+ unless aggregated?(encoded)
60
+ raise InvalidEncodingError, "Invalid MagicNumber #{encoded[0..3]}}"
61
+ end
62
+ message, digest = encoded[4..encoded.length-17], encoded[encoded.length-16..-1]
63
+ if Digest::MD5.digest(message) != digest
64
+ raise InvalidEncodingError, "Digest mismatch #{digest}"
65
+ end
66
+ decoded = AggregatedRecord.decode(message)
67
+ records = decoded.records.map(&:data)
68
+ partition_key = decoded.partition_key_table[1]
69
+ [records, partition_key]
70
+ end
71
+
72
+ def aggregated?(encoded)
73
+ encoded[0..3] == MagicNumber
74
+ end
75
+
76
+ def aggregated_size_offset(partition_key)
77
+ data = 'd'
78
+ encoded = aggregate([record(data)], partition_key)
79
+ finalize(encoded).size - data.size
80
+ end
81
+
82
+ module Mixin
83
+ AggregateOffset = 25
84
+ RecordOffset = 10
85
+
86
+ module Params
87
+ include Fluent::Configurable
88
+ end
89
+
90
+ def self.included(mod)
91
+ mod.include Params
92
+ end
93
+
94
+ def aggregator
95
+ @aggregator ||= Aggregator.new
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,198 @@
1
+ #
2
+ # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"). You
5
+ # may not use this file except in compliance with the License. A copy of
6
+ # the License is located at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # or in the "license" file accompanying this file. This file is
11
+ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
12
+ # ANY KIND, either express or implied. See the License for the specific
13
+ # language governing permissions and limitations under the License.
14
+
15
+ require 'fluent_plugin_kinesis/version'
16
+ require 'fluent/configurable'
17
+
18
+ module Fluent
19
+ module Plugin
20
+ module KinesisHelper
21
+ module API
22
+ MaxRecordSize = 1024 * 1024 # 1 MB
23
+
24
+ module APIParams
25
+ include Fluent::Configurable
26
+ config_param :max_record_size, :integer, default: MaxRecordSize
27
+ end
28
+
29
+ def self.included(mod)
30
+ mod.include APIParams
31
+ end
32
+
33
+ def configure(conf)
34
+ super
35
+ if @max_record_size > MaxRecordSize
36
+ raise ConfigError, "max_record_size can't be grater than #{MaxRecordSize/1024} KB."
37
+ end
38
+ end
39
+
40
+ module BatchRequest
41
+ module BatchRequestParams
42
+ include Fluent::Configurable
43
+ config_param :retries_on_batch_request, :integer, default: 8
44
+ config_param :reset_backoff_if_success, :bool, default: true
45
+ config_param :batch_request_max_count, :integer, default: nil
46
+ config_param :batch_request_max_size, :integer, default: nil
47
+ end
48
+
49
+ def self.included(mod)
50
+ mod.include BatchRequestParams
51
+ end
52
+
53
+ def configure(conf)
54
+ super
55
+ if @batch_request_max_count.nil?
56
+ @batch_request_max_count = self.class::BatchRequestLimitCount
57
+ elsif @batch_request_max_count > self.class::BatchRequestLimitCount
58
+ raise ConfigError, "batch_request_max_count can't be grater than #{self.class::BatchRequestLimitCount}."
59
+ end
60
+ if @batch_request_max_size.nil?
61
+ @batch_request_max_size = self.class::BatchRequestLimitSize
62
+ elsif @batch_request_max_size > self.class::BatchRequestLimitSize
63
+ raise ConfigError, "batch_request_max_size can't be grater than #{self.class::BatchRequestLimitSize}."
64
+ end
65
+ end
66
+
67
+ def size_of_values(record)
68
+ record.compact.map(&:size).inject(:+) || 0
69
+ end
70
+
71
+ private
72
+
73
+ def split_to_batches(records, &block)
74
+ batch = []
75
+ size = 0
76
+ records.each do |record|
77
+ record_size = size_of_values(record)
78
+ if batch.size+1 > @batch_request_max_count or size+record_size > @batch_request_max_size
79
+ yield(batch, size)
80
+ batch = []
81
+ size = 0
82
+ end
83
+ batch << record
84
+ size += record_size
85
+ end
86
+ yield(batch, size) if batch.size > 0
87
+ end
88
+
89
+ def batch_request_with_retry(batch, retry_count=0, backoff: nil, &block)
90
+ backoff ||= Backoff.new
91
+ res = yield(batch)
92
+ if failed_count(res) > 0
93
+ failed_records = collect_failed_records(batch, res)
94
+ if retry_count < @retries_on_batch_request
95
+ backoff.reset if @reset_backoff_if_success and any_records_shipped?(res)
96
+ wait_second = backoff.next
97
+ msg = 'Retrying to request batch. Retry count: %3d, Retry records: %3d, Wait seconds %3.2f' % [retry_count+1, failed_records.size, wait_second]
98
+ log.warn(truncate msg)
99
+ # TODO: sleep() doesn't wait the given seconds sometime.
100
+ # The root cause is unknown so far, so I'd like to add debug print only. It should be fixed in the future.
101
+ log.debug("#{Thread.current.object_id} sleep start")
102
+ sleep(wait_second)
103
+ log.debug("#{Thread.current.object_id} sleep finish")
104
+ batch_request_with_retry(retry_records(failed_records), retry_count+1, backoff: backoff, &block)
105
+ else
106
+ give_up_retries(failed_records)
107
+ end
108
+ end
109
+ end
110
+
111
+ def any_records_shipped?(res)
112
+ results(res).size > failed_count(res)
113
+ end
114
+
115
+ def collect_failed_records(records, res)
116
+ failed_records = []
117
+ results(res).each_with_index do |record, index|
118
+ next unless record[:error_code]
119
+ original = case request_type
120
+ when :streams, :firehose; records[index]
121
+ when :streams_aggregated; records
122
+ end
123
+ failed_records.push(
124
+ original: original,
125
+ error_code: record[:error_code],
126
+ error_message: record[:error_message]
127
+ )
128
+ end
129
+ failed_records
130
+ end
131
+
132
+ def retry_records(failed_records)
133
+ case request_type
134
+ when :streams, :firehose
135
+ failed_records.map{|r| r[:original] }
136
+ when :streams_aggregated
137
+ failed_records.first[:original]
138
+ end
139
+ end
140
+
141
+ def failed_count(res)
142
+ failed_field = case request_type
143
+ when :streams; :failed_record_count
144
+ when :streams_aggregated; :failed_record_count
145
+ when :firehose; :failed_put_count
146
+ end
147
+ res[failed_field]
148
+ end
149
+
150
+ def results(res)
151
+ result_field = case request_type
152
+ when :streams; :records
153
+ when :streams_aggregated; :records
154
+ when :firehose; :request_responses
155
+ end
156
+ res[result_field]
157
+ end
158
+
159
+ def give_up_retries(failed_records)
160
+ failed_records.each {|record|
161
+ log.error(truncate 'Could not put record, Error: %s/%s, Record: %s' % [
162
+ record[:error_code],
163
+ record[:error_message],
164
+ record[:original]
165
+ ])
166
+ }
167
+ end
168
+
169
+ class Backoff
170
+ def initialize
171
+ @count = 0
172
+ end
173
+
174
+ def next
175
+ value = calc(@count)
176
+ @count += 1
177
+ value
178
+ end
179
+
180
+ def reset
181
+ @count = 0
182
+ end
183
+
184
+ private
185
+
186
+ def calc(count)
187
+ (2 ** count) * scaling_factor
188
+ end
189
+
190
+ def scaling_factor
191
+ 0.3 + (0.5-rand) * 0.1
192
+ end
193
+ end
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end