RubyGems - adp-fluent-plugin-kinesis - Versions diffs - 0.0.1 - Mend

adp-fluent-plugin-kinesis 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

checksums.yaml +7 -0
data/.github/PULL_REQUEST_TEMPLATE.md +6 -0
data/.gitignore +15 -0
data/.travis.yml +56 -0
data/CHANGELOG.md +172 -0
data/CODE_OF_CONDUCT.md +4 -0
data/CONTRIBUTING.md +61 -0
data/CONTRIBUTORS.txt +8 -0
data/Gemfile +18 -0
data/LICENSE.txt +201 -0
data/Makefile +44 -0
data/NOTICE.txt +2 -0
data/README.md +559 -0
data/Rakefile +26 -0
data/adp-fluent-plugin-kinesis.gemspec +71 -0
data/benchmark/task.rake +106 -0
data/gemfiles/Gemfile.fluentd-0.14.22 +6 -0
data/gemfiles/Gemfile.fluentd-1.13.3 +6 -0
data/gemfiles/Gemfile.td-agent-3.1.0 +17 -0
data/gemfiles/Gemfile.td-agent-3.1.1 +17 -0
data/gemfiles/Gemfile.td-agent-3.2.0 +17 -0
data/gemfiles/Gemfile.td-agent-3.2.1 +17 -0
data/gemfiles/Gemfile.td-agent-3.3.0 +17 -0
data/gemfiles/Gemfile.td-agent-3.4.0 +17 -0
data/gemfiles/Gemfile.td-agent-3.4.1 +17 -0
data/gemfiles/Gemfile.td-agent-3.5.0 +17 -0
data/gemfiles/Gemfile.td-agent-3.5.1 +17 -0
data/gemfiles/Gemfile.td-agent-3.6.0 +17 -0
data/gemfiles/Gemfile.td-agent-3.7.0 +17 -0
data/gemfiles/Gemfile.td-agent-3.7.1 +17 -0
data/gemfiles/Gemfile.td-agent-3.8.0 +17 -0
data/gemfiles/Gemfile.td-agent-3.8.1 +18 -0
data/gemfiles/Gemfile.td-agent-4.0.0 +25 -0
data/gemfiles/Gemfile.td-agent-4.0.1 +21 -0
data/gemfiles/Gemfile.td-agent-4.1.0 +21 -0
data/gemfiles/Gemfile.td-agent-4.1.1 +21 -0
data/gemfiles/Gemfile.td-agent-4.2.0 +21 -0
data/lib/fluent/plugin/kinesis.rb +174 -0
data/lib/fluent/plugin/kinesis_helper/aggregator.rb +101 -0
data/lib/fluent/plugin/kinesis_helper/api.rb +254 -0
data/lib/fluent/plugin/kinesis_helper/client.rb +210 -0
data/lib/fluent/plugin/kinesis_helper/compression.rb +27 -0
data/lib/fluent/plugin/out_kinesis_firehose.rb +60 -0
data/lib/fluent/plugin/out_kinesis_streams.rb +72 -0
data/lib/fluent/plugin/out_kinesis_streams_aggregated.rb +79 -0
data/lib/fluent_plugin_kinesis/version.rb +17 -0
metadata +339 -0

data/lib/fluent/plugin/kinesis.rb ADDED Viewed

@@ -0,0 +1,174 @@
+#
+# Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+require 'fluent/version'
+require 'fluent/msgpack_factory'
+require 'fluent/plugin/output'
+require 'fluent/plugin/kinesis_helper/client'
+require 'fluent/plugin/kinesis_helper/api'
+require 'zlib'
+module Fluent
+  module Plugin
+    class KinesisOutput < Fluent::Plugin::Output
+      include KinesisHelper::Client
+      include KinesisHelper::API
+      class SkipRecordError < ::StandardError
+        def initialize(message, record)
+          super message
+          @record_message = if record.is_a? Array
+                              record.reverse.map(&:to_s).join(', ')
+                            else
+                              record.to_s
+                            end
+        end
+        def to_s
+          super + ": " + @record_message
+        end
+      end
+      class KeyNotFoundError < SkipRecordError
+        def initialize(key, record)
+          super "Key '#{key}' doesn't exist", record
+        end
+      end
+      class ExceedMaxRecordSizeError < SkipRecordError
+        def initialize(size, record)
+          super "Record size limit exceeded in #{size/1024} KB", record
+        end
+      end
+      class InvalidRecordError < SkipRecordError
+        def initialize(record)
+          super "Invalid type of record", record
+        end
+      end
+      config_param :data_key,              :string,  default: nil
+      config_param :log_truncate_max_size, :integer, default: 1024
+      config_param :compression,           :string,  default: nil
+      desc "Formatter calls chomp and removes separator from the end of each record. This option is for compatible format with plugin v2. (default: false)"
+      # https://github.com/awslabs/aws-fluent-plugin-kinesis/issues/142
+      config_param :chomp_record,          :bool,    default: false
+      config_section :format do
+        config_set_default :@type, 'json'
+      end
+      config_section :inject do
+        config_set_default :time_type, 'string'
+        config_set_default :time_format, '%Y-%m-%dT%H:%M:%S.%N%z'
+      end
+      config_param :debug, :bool, default: false
+      helpers :formatter, :inject
+      def configure(conf)
+        super
+        @data_formatter = data_formatter_create(conf)
+      end
+      def multi_workers_ready?
+        true
+      end
+      def formatted_to_msgpack_binary?
+        true
+      end
+      private
+      def data_formatter_create(conf)
+        formatter = formatter_create
+        compressor = compressor_create
+        if @data_key.nil?
+          if @chomp_record
+            ->(tag, time, record) {
+              record = inject_values_to_record(tag, time, record)
+              # Formatter calls chomp and removes separator from the end of each record.
+              # This option is for compatible format with plugin v2.
+              # https://github.com/awslabs/aws-fluent-plugin-kinesis/issues/142
+              compressor.call(formatter.format(tag, time, record).chomp.b)
+            }
+          else
+            ->(tag, time, record) {
+              record = inject_values_to_record(tag, time, record)
+              compressor.call(formatter.format(tag, time, record).b)
+            }
+          end
+        else
+          ->(tag, time, record) {
+            raise InvalidRecordError, record unless record.is_a? Hash
+            raise KeyNotFoundError.new(@data_key, record) if record[@data_key].nil?
+            compressor.call(record[@data_key].to_s.b)
+          }
+        end
+      end
+      def compressor_create
+        case @compression
+        when "zlib"
+          ->(data) { Zlib::Deflate.deflate(data) }
+        when "gzip"
+          ->(data) { Gzip.compress(data) }
+        else
+          ->(data) { data }
+        end
+      end
+      def format_for_api(&block)
+        converted = block.call
+        size = size_of_values(converted)
+        if size > @max_record_size
+          raise ExceedMaxRecordSizeError.new(size, converted)
+        end
+        converted.to_msgpack
+      rescue SkipRecordError => e
+        log.error(truncate e)
+        ''
+      end
+      if Gem::Version.new(Fluent::VERSION) >= Gem::Version.new('1.8.0')
+        def msgpack_unpacker(*args)
+          Fluent::MessagePackFactory.msgpack_unpacker(*args)
+        end
+      else
+        include Fluent::MessagePackFactory::Mixin
+      end
+      def write_records_batch(chunk, stream_name, &block)
+        unique_id = chunk.dump_unique_id_hex(chunk.unique_id)
+        records = chunk.to_enum(:msgpack_each)
+        split_to_batches(records) do |batch, size|
+          log.debug(sprintf "%s: Write chunk %s / %3d records / %4d KB", stream_name, unique_id, batch.size, size/1024)
+          batch_request_with_retry(batch, &block)
+          log.debug(sprintf "%s: Finish writing chunk", stream_name)
+        end
+      end
+      def request_type
+        self.class::RequestType
+      end
+      def truncate(msg)
+        if @log_truncate_max_size == 0 or (msg.to_s.size <= @log_truncate_max_size)
+          msg.to_s
+        else
+          msg.to_s[0...@log_truncate_max_size]
+        end
+      end
+    end
+  end
+end

data/lib/fluent/plugin/kinesis_helper/aggregator.rb ADDED Viewed

@@ -0,0 +1,101 @@
+#
+# Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+require 'fluent/configurable'
+require 'google/protobuf'
+Google::Protobuf::DescriptorPool.generated_pool.build do
+  add_message "AggregatedRecord" do
+    repeated :partition_key_table, :string, 1
+    repeated :explicit_hash_key_table, :string, 2
+    repeated :records, :message, 3, "Record"
+  end
+  add_message "Tag" do
+    optional :key, :string, 1
+    optional :value, :string, 2
+  end
+  add_message "Record" do
+    optional :partition_key_index, :uint64, 1
+    optional :explicit_hash_key_index, :uint64, 2
+    optional :data, :bytes, 3
+    repeated :tags, :message, 4, "Tag"
+  end
+end
+module Fluent
+  module Plugin
+    module KinesisHelper
+      class Aggregator
+        AggregatedRecord = Google::Protobuf::DescriptorPool.generated_pool.lookup("AggregatedRecord").msgclass
+        Tag = Google::Protobuf::DescriptorPool.generated_pool.lookup("Tag").msgclass
+        Record = Google::Protobuf::DescriptorPool.generated_pool.lookup("Record").msgclass
+        class InvalidEncodingError < ::StandardError; end
+        MagicNumber = ['F3899AC2'].pack('H*')
+        def aggregate(records, partition_key)
+          message = AggregatedRecord.encode(AggregatedRecord.new(
+            partition_key_table: ['a', partition_key],
+            records: records.map{|data|
+              Record.new(partition_key_index: 1, data: data)
+            },
+          ))
+          [MagicNumber, message, Digest::MD5.digest(message)].pack("A4A*A16")
+        end
+        def deaggregate(encoded)
+          unless aggregated?(encoded)
+            raise InvalidEncodingError, "Invalid MagicNumber #{encoded[0..3]}}"
+          end
+          message, digest = encoded[4..encoded.length-17], encoded[encoded.length-16..-1]
+          if Digest::MD5.digest(message) != digest
+            raise InvalidEncodingError, "Digest mismatch #{digest}"
+          end
+          decoded = AggregatedRecord.decode(message)
+          records = decoded.records.map(&:data)
+          partition_key = decoded.partition_key_table[1]
+          [records, partition_key]
+        end
+        def aggregated?(encoded)
+          encoded[0..3] == MagicNumber
+        end
+        def aggregated_size_offset(partition_key)
+          data = 'd'
+          encoded = aggregate([record(data)], partition_key)
+          finalize(encoded).size - data.size
+        end
+        module Mixin
+          AggregateOffset = 25
+          RecordOffset = 10
+          module Params
+            include Fluent::Configurable
+          end
+          def self.included(mod)
+            mod.include Params
+          end
+          def aggregator
+            @aggregator ||= Aggregator.new
+          end
+        end
+      end
+    end
+  end
+end

data/lib/fluent/plugin/kinesis_helper/api.rb ADDED Viewed

@@ -0,0 +1,254 @@
+#
+# Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+require 'fluent_plugin_kinesis/version'
+require 'fluent/configurable'
+require 'benchmark'
+module Fluent
+  module Plugin
+    module KinesisHelper
+      module API
+        MaxRecordSize = 1024 * 1024 # 1 MB
+        module APIParams
+          include Fluent::Configurable
+          config_param :max_record_size, :integer, default: MaxRecordSize
+        end
+        def self.included(mod)
+          mod.include APIParams
+        end
+        def configure(conf)
+          super
+          if @max_record_size > MaxRecordSize
+            raise ConfigError, "max_record_size can't be grater than #{MaxRecordSize/1024} KB."
+          end
+        end
+        module BatchRequest
+          module BatchRequestParams
+            include Fluent::Configurable
+            config_param :retries_on_batch_request,                        :integer, default: 8
+            config_param :reset_backoff_if_success,                        :bool,    default: true
+            config_param :batch_request_max_count,                         :integer, default: nil
+            config_param :batch_request_max_size,                          :integer, default: nil
+            config_param :drop_failed_records_after_batch_request_retries, :bool,    default: true
+            config_param :monitor_num_of_batch_request_retries,            :bool,    default: false
+          end
+          def self.included(mod)
+            mod.include BatchRequestParams
+          end
+          def configure(conf)
+            super
+            if @batch_request_max_count.nil?
+              @batch_request_max_count = self.class::BatchRequestLimitCount
+            elsif @batch_request_max_count > self.class::BatchRequestLimitCount
+              raise ConfigError, "batch_request_max_count can't be grater than #{self.class::BatchRequestLimitCount}."
+            end
+            if @batch_request_max_size.nil?
+              @batch_request_max_size = self.class::BatchRequestLimitSize
+            elsif @batch_request_max_size > self.class::BatchRequestLimitSize
+              raise ConfigError, "batch_request_max_size can't be grater than #{self.class::BatchRequestLimitSize}."
+            end
+          end
+          def size_of_values(record)
+            record.compact.map(&:size).inject(:+) || 0
+          end
+          private
+          def split_to_batches(records, &block)
+            batch = []
+            size = 0
+            records.each do |record|
+              record_size = size_of_values(record)
+              if batch.size+1 > @batch_request_max_count or size+record_size > @batch_request_max_size
+                yield(batch, size)
+                batch = []
+                size = 0
+              end
+              batch << record
+              size += record_size
+            end
+            yield(batch, size) if batch.size > 0
+          end
+          def batch_request_with_retry(batch, retry_count=0, backoff: nil, &block)
+            backoff ||= Backoff.new
+            res = yield(batch)
+            if failed_count(res) > 0
+              failed_records = collect_failed_records(batch, res)
+              if retry_count < @retries_on_batch_request
+                backoff.reset if @reset_backoff_if_success and any_records_shipped?(res)
+                wait_second = backoff.next
+                msg = 'Retrying to request batch. Retry count: %3d, Retry records: %3d, Wait seconds %3.2f' % [retry_count+1, failed_records.size, wait_second]
+                log.warn(truncate msg)
+                # Increment num_errors to monitor batch request retries from "monitor_agent" or "fluent-plugin-prometheus"
+                increment_num_errors if @monitor_num_of_batch_request_retries
+                reliable_sleep(wait_second)
+                batch_request_with_retry(retry_records(failed_records), retry_count+1, backoff: backoff, &block)
+              else
+                give_up_retries(failed_records)
+              end
+            end
+          end
+          # Sleep seems to not sleep as long as we ask it, our guess is that something wakes up the thread,
+          # so we keep on going to sleep if that happens.
+          # TODO: find out who is causing the sleep to be too short and try to make them stop it instead
+          def reliable_sleep(wait_second)
+            loop do
+              actual = Benchmark.realtime { sleep(wait_second) }
+              break if actual >= wait_second
+              log.error("#{Thread.current.object_id} sleep failed expected #{wait_second} but slept #{actual}")
+              wait_second -= actual
+            end
+          end
+          def any_records_shipped?(res)
+            results(res).size > failed_count(res)
+          end
+          def collect_failed_records(records, res)
+            failed_records = []
+            results(res).each_with_index do |record, index|
+              next unless record[:error_code]
+              original = case request_type
+                         when :streams, :firehose; records[index]
+                         when :streams_aggregated; records
+                         end
+              failed_records.push(
+                original:      original,
+                error_code:    record[:error_code],
+                error_message: record[:error_message]
+              )
+            end
+            failed_records
+          end
+          def retry_records(failed_records)
+            case request_type
+            when :streams, :firehose
+              failed_records.map{|r| r[:original] }
+            when :streams_aggregated
+              failed_records.first[:original]
+            end
+          end
+          def failed_count(res)
+            failed_field = case request_type
+                           when :streams;            :failed_record_count
+                           when :streams_aggregated; :failed_record_count
+                           when :firehose;           :failed_put_count
+                           end
+            res[failed_field]
+          end
+          def results(res)
+            result_field = case request_type
+                           when :streams;            :records
+                           when :streams_aggregated; :records
+                           when :firehose;           :request_responses
+                           end
+            res[result_field]
+          end
+          def give_up_retries(failed_records)
+            failed_records.each {|record|
+              log.error(truncate 'Could not put record, Error: %s/%s, Record: %s' % [
+                record[:error_code],
+                record[:error_message],
+                record[:original]
+              ])
+            }
+            if @drop_failed_records_after_batch_request_retries
+              # Increment num_errors to monitor batch request failure from "monitor_agent" or "fluent-plugin-prometheus"
+              increment_num_errors
+            else
+              # Raise error and return chunk to Fluentd for retrying
+              case request_type
+              # @see https://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecords.html
+              # @see https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/Kinesis/Client.html#put_records-instance_method
+              # @see https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/Kinesis/Errors.html
+              when :streams, :streams_aggregated
+                provisioned_throughput_exceeded_records = failed_records.select { |record| record[:error_code] == 'ProvisionedThroughputExceededException' }
+                target_failed_record = provisioned_throughput_exceeded_records.first || failed_records.first
+                target_error = provisioned_throughput_exceeded_records.empty? ?
+                  Aws::Kinesis::Errors::ServiceError :
+                  Aws::Kinesis::Errors::ProvisionedThroughputExceededException
+              # @see https://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecords.html
+              # @see https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/Firehose/Client.html#put_record_batch-instance_method
+              # @see https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/Firehose/Errors.html
+              when :firehose
+                service_unavailable_exception_records = failed_records.select { |record| record[:error_code] == 'ServiceUnavailableException' }
+                target_failed_record = service_unavailable_exception_records.first || failed_records.first
+                target_error = service_unavailable_exception_records.empty? ?
+                  Aws::Firehose::Errors::ServiceError :
+                  Aws::Firehose::Errors::ServiceUnavailableException
+              end
+              log.error("Raise #{target_failed_record[:error_code]} and return chunk to Fluentd buffer for retrying")
+              raise target_error.new(Seahorse::Client::RequestContext.new, target_failed_record[:error_message])
+            end
+          end
+          def increment_num_errors
+            # Prepare Fluent::Plugin::Output instance variables to count errors in this method.
+            # These instance variables are initialized here for possible future breaking changes of Fluentd.
+            @num_errors ||= 0
+            # @see https://github.com/fluent/fluentd/commit/d245454658d16170431d276fcd5849fb0d88ab2b
+            if Gem::Version.new(Fluent::VERSION) >= Gem::Version.new('1.7.0')
+              @counter_mutex ||= Mutex.new
+              @counter_mutex.synchronize{ @num_errors += 1 }
+            else
+              @counters_monitor ||= Monitor.new
+              @counters_monitor.synchronize{ @num_errors += 1 }
+            end
+          end
+          class Backoff
+            def initialize
+              @count = 0
+            end
+            def next
+              value = calc(@count)
+              @count += 1
+              value
+            end
+            def reset
+              @count = 0
+            end
+            private
+            def calc(count)
+              (2 ** count) * scaling_factor
+            end
+            def scaling_factor
+              0.3 + (0.5-rand) * 0.1
+            end
+          end
+        end
+      end
+    end
+  end
+end