fluentd-plugin-kinesis-intuit 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,20 @@
1
+ #
2
+ # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"). You
5
+ # may not use this file except in compliance with the License. A copy of
6
+ # the License is located at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # or in the "license" file accompanying this file. This file is
11
+ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
12
+ # ANY KIND, either express or implied. See the License for the specific
13
+ # language governing permissions and limitations under the License.
14
+
15
+ source 'https://rubygems.org'
16
+
17
+ # Specify your gem's dependencies in fluent-plugin-kinesis.gemspec
18
+ gemspec path: ".."
19
+
20
+ gem "fluentd", "0.14.10"
@@ -0,0 +1,31 @@
1
+ #
2
+ # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"). You
5
+ # may not use this file except in compliance with the License. A copy of
6
+ # the License is located at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # or in the "license" file accompanying this file. This file is
11
+ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
12
+ # ANY KIND, either express or implied. See the License for the specific
13
+ # language governing permissions and limitations under the License.
14
+
15
+ source 'https://rubygems.org'
16
+
17
+ # Specify your gem's dependencies in fluent-plugin-kinesis.gemspec
18
+ gemspec path: ".."
19
+
20
+ # Specify related gems for td-agent v3.2.0
21
+ # https://github.com/treasure-data/omnibus-td-agent/blob/v3.2.0/config/projects/td-agent3.rb#L27
22
+ gem "fluentd", "1.2.2"
23
+ # https://github.com/treasure-data/omnibus-td-agent/blob/v3.2.0/plugin_gems.rb#L16-L23
24
+ gem "jmespath", "1.4.0"
25
+ gem "aws-partitions", "1.87.0"
26
+ gem "aws-sigv4", "1.0.2"
27
+ gem "aws-sdk-core", "3.21.2"
28
+ gem "aws-sdk-kms", "1.5.0"
29
+ gem "aws-sdk-sqs", "1.3.0"
30
+ gem "aws-sdk-s3", "1.13.0"
31
+ gem "fluent-plugin-s3", "1.1.3"
@@ -0,0 +1,146 @@
1
+ #
2
+ # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"). You
5
+ # may not use this file except in compliance with the License. A copy of
6
+ # the License is located at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # or in the "license" file accompanying this file. This file is
11
+ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
12
+ # ANY KIND, either express or implied. See the License for the specific
13
+ # language governing permissions and limitations under the License.
14
+
15
+ require 'fluent/plugin/output'
16
+ require 'fluent/plugin/kinesis_helper/client'
17
+ require 'fluent/plugin/kinesis_helper/api'
18
+ require 'zlib'
19
+
20
+ module Fluent
21
+ module Plugin
22
+ class KinesisOutput < Fluent::Plugin::Output
23
+ include Fluent::MessagePackFactory::Mixin
24
+ include KinesisHelper::Client
25
+ include KinesisHelper::API
26
+
27
+ class SkipRecordError < ::StandardError
28
+ def initialize(message, record)
29
+ super message
30
+ @record_message = if record.is_a? Array
31
+ record.reverse.map(&:to_s).join(', ')
32
+ else
33
+ record.to_s
34
+ end
35
+ end
36
+
37
+ def to_s
38
+ super + ": " + @record_message
39
+ end
40
+ end
41
+ class KeyNotFoundError < SkipRecordError
42
+ def initialize(key, record)
43
+ super "Key '#{key}' doesn't exist", record
44
+ end
45
+ end
46
+ class ExceedMaxRecordSizeError < SkipRecordError
47
+ def initialize(size, record)
48
+ super "Record size limit exceeded in #{size/1024} KB", record
49
+ end
50
+ end
51
+ class InvalidRecordError < SkipRecordError
52
+ def initialize(record)
53
+ super "Invalid type of record", record
54
+ end
55
+ end
56
+
57
+ config_param :data_key, :string, default: nil
58
+ config_param :log_truncate_max_size, :integer, default: 1024
59
+ config_param :compression, :string, default: nil
60
+ config_section :format do
61
+ config_set_default :@type, 'json'
62
+ end
63
+ config_section :inject do
64
+ config_set_default :time_type, 'string'
65
+ config_set_default :time_format, '%Y-%m-%dT%H:%M:%S.%N%z'
66
+ end
67
+
68
+ config_param :debug, :bool, default: false
69
+
70
+ helpers :formatter, :inject
71
+
72
+ def configure(conf)
73
+ super
74
+ @data_formatter = data_formatter_create(conf)
75
+ end
76
+
77
+ def multi_workers_ready?
78
+ true
79
+ end
80
+
81
+ private
82
+
83
+ def data_formatter_create(conf)
84
+ formatter = formatter_create
85
+ compressor = compressor_create
86
+ if @data_key.nil?
87
+ ->(tag, time, record) {
88
+ record = inject_values_to_record(tag, time, record)
89
+ compressor.call(formatter.format(tag, time, record).chomp.b)
90
+ }
91
+ else
92
+ ->(tag, time, record) {
93
+ raise InvalidRecordError, record unless record.is_a? Hash
94
+ raise KeyNotFoundError.new(@data_key, record) if record[@data_key].nil?
95
+ compressor.call(record[@data_key].to_s.b)
96
+ }
97
+ end
98
+ end
99
+
100
+ def compressor_create
101
+ case @compression
102
+ when "zlib"
103
+ ->(data) { Zlib::Deflate.deflate(data) }
104
+ else
105
+ ->(data) { data }
106
+ end
107
+ end
108
+
109
+ def format_for_api(&block)
110
+ converted = block.call
111
+ size = size_of_values(converted)
112
+ if size > @max_record_size
113
+ raise ExceedMaxRecordSizeError.new(size, converted)
114
+ end
115
+ converted.to_msgpack
116
+ rescue SkipRecordError => e
117
+ log.error(truncate e)
118
+ ''
119
+ end
120
+
121
+ def write_records_batch(chunk, &block)
122
+ unique_id = chunk.dump_unique_id_hex(chunk.unique_id)
123
+ chunk.open do |io|
124
+ records = msgpack_unpacker(io).to_enum
125
+ split_to_batches(records) do |batch, size|
126
+ log.debug(sprintf "Write chunk %s / %3d records / %4d KB", unique_id, batch.size, size/1024)
127
+ batch_request_with_retry(batch, &block)
128
+ log.debug("Finish writing chunk")
129
+ end
130
+ end
131
+ end
132
+
133
+ def request_type
134
+ self.class::RequestType
135
+ end
136
+
137
+ def truncate(msg)
138
+ if @log_truncate_max_size == 0 or (msg.to_s.size <= @log_truncate_max_size)
139
+ msg.to_s
140
+ else
141
+ msg.to_s[0...@log_truncate_max_size]
142
+ end
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,101 @@
1
+ #
2
+ # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"). You
5
+ # may not use this file except in compliance with the License. A copy of
6
+ # the License is located at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # or in the "license" file accompanying this file. This file is
11
+ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
12
+ # ANY KIND, either express or implied. See the License for the specific
13
+ # language governing permissions and limitations under the License.
14
+
15
+ require 'fluent/configurable'
16
+ require 'google/protobuf'
17
+
18
+ Google::Protobuf::DescriptorPool.generated_pool.build do
19
+ add_message "AggregatedRecord" do
20
+ repeated :partition_key_table, :string, 1
21
+ repeated :explicit_hash_key_table, :string, 2
22
+ repeated :records, :message, 3, "Record"
23
+ end
24
+ add_message "Tag" do
25
+ optional :key, :string, 1
26
+ optional :value, :string, 2
27
+ end
28
+ add_message "Record" do
29
+ optional :partition_key_index, :uint64, 1
30
+ optional :explicit_hash_key_index, :uint64, 2
31
+ optional :data, :bytes, 3
32
+ repeated :tags, :message, 4, "Tag"
33
+ end
34
+ end
35
+
36
+ module Fluent
37
+ module Plugin
38
+ module KinesisHelper
39
+ class Aggregator
40
+ AggregatedRecord = Google::Protobuf::DescriptorPool.generated_pool.lookup("AggregatedRecord").msgclass
41
+ Tag = Google::Protobuf::DescriptorPool.generated_pool.lookup("Tag").msgclass
42
+ Record = Google::Protobuf::DescriptorPool.generated_pool.lookup("Record").msgclass
43
+
44
+ class InvalidEncodingError < ::StandardError; end
45
+
46
+ MagicNumber = ['F3899AC2'].pack('H*')
47
+
48
+ def aggregate(records, partition_key)
49
+ message = AggregatedRecord.encode(AggregatedRecord.new(
50
+ partition_key_table: ['a', partition_key],
51
+ records: records.map{|data|
52
+ Record.new(partition_key_index: 1, data: data)
53
+ },
54
+ ))
55
+ [MagicNumber, message, Digest::MD5.digest(message)].pack("A4A*A16")
56
+ end
57
+
58
+ def deaggregate(encoded)
59
+ unless aggregated?(encoded)
60
+ raise InvalidEncodingError, "Invalid MagicNumber #{encoded[0..3]}}"
61
+ end
62
+ message, digest = encoded[4..encoded.length-17], encoded[encoded.length-16..-1]
63
+ if Digest::MD5.digest(message) != digest
64
+ raise InvalidEncodingError, "Digest mismatch #{digest}"
65
+ end
66
+ decoded = AggregatedRecord.decode(message)
67
+ records = decoded.records.map(&:data)
68
+ partition_key = decoded.partition_key_table[1]
69
+ [records, partition_key]
70
+ end
71
+
72
+ def aggregated?(encoded)
73
+ encoded[0..3] == MagicNumber
74
+ end
75
+
76
+ def aggregated_size_offset(partition_key)
77
+ data = 'd'
78
+ encoded = aggregate([record(data)], partition_key)
79
+ finalize(encoded).size - data.size
80
+ end
81
+
82
+ module Mixin
83
+ AggregateOffset = 25
84
+ RecordOffset = 10
85
+
86
+ module Params
87
+ include Fluent::Configurable
88
+ end
89
+
90
+ def self.included(mod)
91
+ mod.include Params
92
+ end
93
+
94
+ def aggregator
95
+ @aggregator ||= Aggregator.new
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,198 @@
1
+ #
2
+ # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"). You
5
+ # may not use this file except in compliance with the License. A copy of
6
+ # the License is located at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # or in the "license" file accompanying this file. This file is
11
+ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
12
+ # ANY KIND, either express or implied. See the License for the specific
13
+ # language governing permissions and limitations under the License.
14
+
15
+ require 'fluent_plugin_kinesis/version'
16
+ require 'fluent/configurable'
17
+
18
+ module Fluent
19
+ module Plugin
20
+ module KinesisHelper
21
+ module API
22
+ MaxRecordSize = 1024 * 1024 # 1 MB
23
+
24
+ module APIParams
25
+ include Fluent::Configurable
26
+ config_param :max_record_size, :integer, default: MaxRecordSize
27
+ end
28
+
29
+ def self.included(mod)
30
+ mod.include APIParams
31
+ end
32
+
33
+ def configure(conf)
34
+ super
35
+ if @max_record_size > MaxRecordSize
36
+ raise ConfigError, "max_record_size can't be grater than #{MaxRecordSize/1024} KB."
37
+ end
38
+ end
39
+
40
+ module BatchRequest
41
+ module BatchRequestParams
42
+ include Fluent::Configurable
43
+ config_param :retries_on_batch_request, :integer, default: 8
44
+ config_param :reset_backoff_if_success, :bool, default: true
45
+ config_param :batch_request_max_count, :integer, default: nil
46
+ config_param :batch_request_max_size, :integer, default: nil
47
+ end
48
+
49
+ def self.included(mod)
50
+ mod.include BatchRequestParams
51
+ end
52
+
53
+ def configure(conf)
54
+ super
55
+ if @batch_request_max_count.nil?
56
+ @batch_request_max_count = self.class::BatchRequestLimitCount
57
+ elsif @batch_request_max_count > self.class::BatchRequestLimitCount
58
+ raise ConfigError, "batch_request_max_count can't be grater than #{self.class::BatchRequestLimitCount}."
59
+ end
60
+ if @batch_request_max_size.nil?
61
+ @batch_request_max_size = self.class::BatchRequestLimitSize
62
+ elsif @batch_request_max_size > self.class::BatchRequestLimitSize
63
+ raise ConfigError, "batch_request_max_size can't be grater than #{self.class::BatchRequestLimitSize}."
64
+ end
65
+ end
66
+
67
+ def size_of_values(record)
68
+ record.compact.map(&:size).inject(:+) || 0
69
+ end
70
+
71
+ private
72
+
73
+ def split_to_batches(records, &block)
74
+ batch = []
75
+ size = 0
76
+ records.each do |record|
77
+ record_size = size_of_values(record)
78
+ if batch.size+1 > @batch_request_max_count or size+record_size > @batch_request_max_size
79
+ yield(batch, size)
80
+ batch = []
81
+ size = 0
82
+ end
83
+ batch << record
84
+ size += record_size
85
+ end
86
+ yield(batch, size) if batch.size > 0
87
+ end
88
+
89
+ def batch_request_with_retry(batch, retry_count=0, backoff: nil, &block)
90
+ backoff ||= Backoff.new
91
+ res = yield(batch)
92
+ if failed_count(res) > 0
93
+ failed_records = collect_failed_records(batch, res)
94
+ if retry_count < @retries_on_batch_request
95
+ backoff.reset if @reset_backoff_if_success and any_records_shipped?(res)
96
+ wait_second = backoff.next
97
+ msg = 'Retrying to request batch. Retry count: %3d, Retry records: %3d, Wait seconds %3.2f' % [retry_count+1, failed_records.size, wait_second]
98
+ log.warn(truncate msg)
99
+ # TODO: sleep() doesn't wait the given seconds sometime.
100
+ # The root cause is unknown so far, so I'd like to add debug print only. It should be fixed in the future.
101
+ log.debug("#{Thread.current.object_id} sleep start")
102
+ sleep(wait_second)
103
+ log.debug("#{Thread.current.object_id} sleep finish")
104
+ batch_request_with_retry(retry_records(failed_records), retry_count+1, backoff: backoff, &block)
105
+ else
106
+ give_up_retries(failed_records)
107
+ end
108
+ end
109
+ end
110
+
111
+ def any_records_shipped?(res)
112
+ results(res).size > failed_count(res)
113
+ end
114
+
115
+ def collect_failed_records(records, res)
116
+ failed_records = []
117
+ results(res).each_with_index do |record, index|
118
+ next unless record[:error_code]
119
+ original = case request_type
120
+ when :streams, :firehose; records[index]
121
+ when :streams_aggregated; records
122
+ end
123
+ failed_records.push(
124
+ original: original,
125
+ error_code: record[:error_code],
126
+ error_message: record[:error_message]
127
+ )
128
+ end
129
+ failed_records
130
+ end
131
+
132
+ def retry_records(failed_records)
133
+ case request_type
134
+ when :streams, :firehose
135
+ failed_records.map{|r| r[:original] }
136
+ when :streams_aggregated
137
+ failed_records.first[:original]
138
+ end
139
+ end
140
+
141
+ def failed_count(res)
142
+ failed_field = case request_type
143
+ when :streams; :failed_record_count
144
+ when :streams_aggregated; :failed_record_count
145
+ when :firehose; :failed_put_count
146
+ end
147
+ res[failed_field]
148
+ end
149
+
150
+ def results(res)
151
+ result_field = case request_type
152
+ when :streams; :records
153
+ when :streams_aggregated; :records
154
+ when :firehose; :request_responses
155
+ end
156
+ res[result_field]
157
+ end
158
+
159
+ def give_up_retries(failed_records)
160
+ failed_records.each {|record|
161
+ log.error(truncate 'Could not put record, Error: %s/%s, Record: %s' % [
162
+ record[:error_code],
163
+ record[:error_message],
164
+ record[:original]
165
+ ])
166
+ }
167
+ end
168
+
169
+ class Backoff
170
+ def initialize
171
+ @count = 0
172
+ end
173
+
174
+ def next
175
+ value = calc(@count)
176
+ @count += 1
177
+ value
178
+ end
179
+
180
+ def reset
181
+ @count = 0
182
+ end
183
+
184
+ private
185
+
186
+ def calc(count)
187
+ (2 ** count) * scaling_factor
188
+ end
189
+
190
+ def scaling_factor
191
+ 0.3 + (0.5-rand) * 0.1
192
+ end
193
+ end
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end