kinesis_firehose_batcher 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1f7aa77caaf4838350f55a6d9e53cb238a3b7990
4
+ data.tar.gz: 1e40c75fbf390d61b789dd1ceb8459dd1f926234
5
+ SHA512:
6
+ metadata.gz: 23c200f8d4861b6d8c02fb3844f0000cfb14c56fbb384a9aa39823a998179410a3a9d2e9141dc739cfcdec5ad4ed11d1c2bedb0efef73290fd4b03fde9c7401d
7
+ data.tar.gz: 68e699c094df07422e71e7a6fc6a51d3485650758311f5d0ea8418c96527ce70e88344180a0e73ff63982c6fbc1a456305e8c028d5830771197ecdd8ca1ddae9
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source "http://rubygems.org"
2
+
3
+ group :development do
4
+ gem "rake", '~> 10.5'
5
+ gem "bundler", "~> 1.0"
6
+ gem "rspec", "~> 3.2.0"
7
+ gem "jeweler", "~> 2.0.1"
8
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2016 WeTransfer
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,24 @@
1
+ # kinesis_firehose_batcher
2
+
3
+ Send record strings to AWS Kinesis Firehose in sensible batches (straddling the limits for the batch
4
+ size, maximum number of records and maximum record length).
5
+
6
+ client = Aws::Firehose::Client.new(region: "us-east-1")
7
+ batcher = KinesisFirehoseBatcher.new(client: client, delivery_stream_name: 'my-stream')
8
+ 9000.times { batcher << JSON.dump(some_record) }
9
+ batcher.send!
10
+
11
+ ## Contributing to kinesis_firehose_batcher
12
+
13
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
14
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
15
+ * Fork the project.
16
+ * Start a feature/bugfix branch.
17
+ * Commit and push until you are happy with your contribution.
18
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
19
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
20
+
21
+ ## Copyright
22
+
23
+ Copyright (c) 2016 WeTransfer. See LICENSE.txt for further details.
24
+
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ require_relative 'lib/kinesis_firehose_batcher'
16
+
17
+ Jeweler::Tasks.new do |gem|
18
+ # gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
19
+ gem.version = KinesisFirehoseBatcher::VERSION
20
+ gem.name = "kinesis_firehose_batcher"
21
+ gem.homepage = "https://github.com/wetransfer/kinesis_firehose_batcher"
22
+ gem.license = "MIT"
23
+ gem.description = %Q{Sends records to Firehose, automatically honors the limits}
24
+ gem.summary = %Q{Batch-send records to AWS Kinesis Firehose}
25
+ gem.email = "me@julik.nl"
26
+ gem.authors = ["Julik Tarkhanov"]
27
+ # dependencies defined in Gemfile
28
+ end
29
+ Jeweler::RubygemsDotOrgTasks.new
30
+
31
+ require 'rspec/core'
32
+ require 'rspec/core/rake_task'
33
+ RSpec::Core::RakeTask.new(:spec) do |spec|
34
+ spec.pattern = FileList['spec/**/*_spec.rb']
35
+ end
36
+
37
+ task :default => :spec
@@ -0,0 +1,59 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+ # stub: kinesis_firehose_batcher 0.0.1 ruby lib
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "kinesis_firehose_batcher"
9
+ s.version = "0.0.1"
10
+
11
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib"]
13
+ s.authors = ["Julik Tarkhanov"]
14
+ s.date = "2016-01-24"
15
+ s.description = "Sends records to Firehose, automatically honors the limits"
16
+ s.email = "me@julik.nl"
17
+ s.extra_rdoc_files = [
18
+ "LICENSE.txt",
19
+ "README.md"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ ".rspec",
24
+ "Gemfile",
25
+ "LICENSE.txt",
26
+ "README.md",
27
+ "Rakefile",
28
+ "kinesis_firehose_batcher.gemspec",
29
+ "lib/kinesis_firehose_batcher.rb",
30
+ "spec/kinesis_firehose_batcher_spec.rb",
31
+ "spec/spec_helper.rb"
32
+ ]
33
+ s.homepage = "https://github.com/wetransfer/kinesis_firehose_batcher"
34
+ s.licenses = ["MIT"]
35
+ s.rubygems_version = "2.2.2"
36
+ s.summary = "Batch-send records to AWS Kinesis Firehose"
37
+
38
+ if s.respond_to? :specification_version then
39
+ s.specification_version = 4
40
+
41
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
42
+ s.add_development_dependency(%q<rake>, ["~> 10.5"])
43
+ s.add_development_dependency(%q<bundler>, ["~> 1.0"])
44
+ s.add_development_dependency(%q<rspec>, ["~> 3.2.0"])
45
+ s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
46
+ else
47
+ s.add_dependency(%q<rake>, ["~> 10.5"])
48
+ s.add_dependency(%q<bundler>, ["~> 1.0"])
49
+ s.add_dependency(%q<rspec>, ["~> 3.2.0"])
50
+ s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
51
+ end
52
+ else
53
+ s.add_dependency(%q<rake>, ["~> 10.5"])
54
+ s.add_dependency(%q<bundler>, ["~> 1.0"])
55
+ s.add_dependency(%q<rspec>, ["~> 3.2.0"])
56
+ s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
57
+ end
58
+ end
59
+
@@ -0,0 +1,127 @@
1
+ class KinesisFirehoseBatcher
2
+ VERSION = '0.0.1'
3
+
4
+ MAX_BYTES_PER_BATCH = 4 * 1024 * 1024
5
+ MAX_RECORDS_PER_BATCH = 500
6
+ MAX_BYTES_PER_RECORD = 1000 * 1024
7
+
8
+ # Gets raised when Firehose still refuses to accept records
9
+ # even once the tries have been exhausted
10
+ class RetriesExahusted < StandardError
11
+ end
12
+
13
+ # Gets raised when a record string is too large to be sent via Firehose.
14
+ # If you encounter this, you need to use a side-channel to send data to
15
+ # the Firehose destination outside of the Firehose flow. For example,
16
+ # if you have a stream dumping data to S3 you might have to just grab the
17
+ # record and upload it separately.
18
+ class RecordTooLarge < StandardError
19
+
20
+ # @param [String] the record that was too large
21
+ attr_reader :record
22
+
23
+ def initialize(failed_record_string, *args_for_super)
24
+ @record = failed_record_string
25
+ super(*args_for_super)
26
+ end
27
+ end
28
+
29
+ def initialize(client:, delivery_stream_name:, max_retries: 100)
30
+ @buffer = []
31
+ @client, @delivery_stream_name, @max_retries = client, delivery_stream_name, max_retries
32
+ end
33
+
34
+ # Add a record (string) to the batch
35
+ #
36
+ # @param str[String] The record to add
37
+ def <<(str)
38
+ raise RecordTooLarge.new(str) if str.bytesize >= MAX_BYTES_PER_RECORD
39
+ @buffer << str
40
+ end
41
+
42
+ # Send the accumulated records in the buffer, and empty the buffer afterwards.
43
+ #
44
+ # @return [void]
45
+ def send!
46
+ return if @buffer.empty?
47
+ recursive_send(@buffer)
48
+ @buffer.clear
49
+ end
50
+
51
+ # Tells how many records are in the buffer, ready to send
52
+ #
53
+ # @return [Fixnum] the number of records outstanding
54
+ def buffered
55
+ @buffer.length
56
+ end
57
+
58
+ private
59
+
60
+ # Accepts an array of Strings (records) and sends them via the client
61
+ # supplied to the constructor
62
+ #
63
+ # Kept in a separate method because normal Kinesis (not Firehose) has
64
+ # a different way of packing records and a different method signature
65
+ #
66
+ # @param record_strings[Array<String>] the array of records to send
67
+ # @return [Aws::Firehose::Types::PutRecordBatchOutput]
68
+ def send_via_client(record_strings)
69
+ record_hashes = record_strings.map do | str |
70
+ {data: str}
71
+ end
72
+ @client.put_record_batch(records: record_hashes, delivery_stream_name: @delivery_stream_name)
73
+ end
74
+
75
+ def recursive_send(array_of_record_strings)
76
+ if array_of_record_strings.empty?
77
+ return
78
+ elsif overflow?(array_of_record_strings)
79
+ # Split the array into parts and try again.
80
+ array_of_record_strings.each_slice(array_of_record_strings.length / 2) do | slice |
81
+ recursive_send(slice)
82
+ end
83
+ else
84
+ send_with_retries(array_of_record_strings)
85
+ array_of_record_strings.clear
86
+ end
87
+ end
88
+
89
+ def overflow?(array_of_record_strings)
90
+ # Each PutRecordBatch request supports up to 500 array_of_record_strings.
91
+ # Each record in the request can be as large as 1,000 KB (before 64-bit encoding),
92
+ # up to a limit of 4 MB for the entire request.
93
+ return true if array_of_record_strings.length >= MAX_RECORDS_PER_BATCH
94
+ return true if packet_size(array_of_record_strings) >= MAX_BYTES_PER_BATCH
95
+ end
96
+
97
+ def send_with_retries(record_hashes)
98
+ result = send_via_client(record_hashes)
99
+
100
+ return if result.failed_put_count.zero?
101
+
102
+ tries = 0
103
+
104
+ while result.failed_put_count.nonzero?
105
+ tries += 1
106
+ if tries >= @max_retries
107
+ msg = "%d records still failed to send after %d tries" % [result.failed_put_count, tries]
108
+ raise RetriesExahusted.new(msg)
109
+ end
110
+
111
+ # Replace all the records that did manage to send with nils
112
+ result.request_responses.each_with_index do | record_response, record_i |
113
+ record_hashes[record_i] = nil if record_response.record_id
114
+ end
115
+
116
+ # Squash out all the nils (records that were sent during the previous try
117
+ record_hashes.compact!
118
+
119
+ #... and try to send again
120
+ result = send_via_client(record_hashes)
121
+ end
122
+ end
123
+
124
+ def packet_size(array_of_strings)
125
+ array_of_strings.inject(0) {|bytesize, record_str| bytesize + record_str.bytesize }
126
+ end
127
+ end
@@ -0,0 +1,113 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'securerandom'
3
+ describe KinesisFirehoseBatcher do
4
+ it "batches the records for sending" do
5
+ client = double('Aws::Firehose::Client')
6
+
7
+ s = described_class.new(client: client, delivery_stream_name: 'some-stream')
8
+
9
+ 2000.times { s << "Some message\n" }
10
+ 1000.times { s << "Another message\n" }
11
+
12
+ expect(s.buffered).to eq(3000)
13
+
14
+ s << "Closing message"
15
+
16
+ message_sends = []
17
+ expect(client).to receive(:put_record_batch).exactly(9).times {|*args|
18
+ message_sends << args
19
+ double(:failed_put_count => 0)
20
+ }
21
+
22
+ s.send!
23
+
24
+ expect(message_sends).not_to be_empty
25
+ message_sends.each do | one_put_batch_call |
26
+ args = one_put_batch_call[0]
27
+ expect(args[:delivery_stream_name]).to eq('some-stream')
28
+
29
+ records = args[:records]
30
+ expect(records).not_to be_empty
31
+ expect(records.length).to be < KinesisFirehoseBatcher::MAX_RECORDS_PER_BATCH
32
+
33
+ record_strings = records.map{|e| e.fetch(:data) }
34
+ entire_message = record_strings.join
35
+
36
+ expect(entire_message.bytesize).to be < KinesisFirehoseBatcher::MAX_BYTES_PER_BATCH
37
+ end
38
+
39
+ total_records = message_sends.map{|e| e[0][:records]}.flatten
40
+ expect(total_records.length).to eq(3001)
41
+ end
42
+
43
+ it 'attempts to resend the records that failed' do
44
+ client = double('Aws::Firehose::Client')
45
+ s = described_class.new(client: client, delivery_stream_name: 'some-stream')
46
+
47
+ expect(s).to receive(:send_via_client).once.with(['Hello 1', 'Hello 2', 'Hello 3']) {
48
+ fake_record_responses = [
49
+ double(record_id: SecureRandom.hex(2)),
50
+ double(record_id: nil),
51
+ double(record_id: SecureRandom.hex(2)),
52
+ ]
53
+ double('Firehose response', :request_responses => fake_record_responses, :failed_put_count => 1)
54
+ }
55
+
56
+ expect(s).to receive(:send_via_client).once.with(['Hello 2']) {
57
+ fake_record_responses = [
58
+ double(record_id: SecureRandom.hex(2)),
59
+ ]
60
+ double('Firehose response', :request_responses => fake_record_responses, :failed_put_count => 0)
61
+ }
62
+
63
+ (1..3).each { |i| s << "Hello #{i}" }
64
+ s.send!
65
+ end
66
+
67
+ it 'sends via the given client' do
68
+ client = double('Aws::Firehose::Client')
69
+ s = described_class.new(client: client, delivery_stream_name: 'some-stream')
70
+ expect(client).to receive(:put_record_batch).with({
71
+ delivery_stream_name: 'some-stream',
72
+ records: [{data: 'Hello 1'}, {data: 'Hello 2'}, {data: 'Hello 3'}]
73
+ }).and_return(double(failed_put_count: 0))
74
+
75
+ (1..3).each { |i| s << "Hello #{i}" }
76
+
77
+ s.send!
78
+ end
79
+
80
+ it 'gives up after the set number of retries' do
81
+ client = double('Aws::Firehose::Client')
82
+ s = described_class.new(client: double(), delivery_stream_name: 'some-stream', max_retries: 123)
83
+
84
+ tries_used = 0
85
+ allow(s).to receive(:send_via_client) { |record_strings|
86
+ tries_used += 1
87
+ # Always fail one record of the batch
88
+ sent_record_response = double(record_id: SecureRandom.hex(2))
89
+ failed_record_response = double(record_id: nil)
90
+
91
+ record_responses = [sent_record_response] * (record_strings.length - 1)
92
+ record_responses << failed_record_response
93
+ record_responses.shuffle!
94
+
95
+ double('Firehose response', :request_responses => record_responses, :failed_put_count => 1)
96
+ }
97
+ 7000.times { s << "Hello and goodbye - just a record here\n" }
98
+ expect {
99
+ s.send!
100
+ }.to raise_error(KinesisFirehoseBatcher::RetriesExahusted)
101
+ expect(tries_used).to eq(123)
102
+ end
103
+
104
+ it 'explicitly fails when a record is too large' do
105
+ s = described_class.new(client: double(), delivery_stream_name: 'some-stream')
106
+ expect {
107
+ s << Random.new.bytes(1024 * 1024 * 3)
108
+ }.to raise_error {|e|
109
+ expect(e).to be_kind_of(KinesisFirehoseBatcher::RecordTooLarge)
110
+ expect(e.record).to be_kind_of(String)
111
+ }
112
+ end
113
+ end
@@ -0,0 +1,13 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+
4
+ require 'rspec'
5
+ require 'kinesis_firehose_batcher'
6
+
7
+ # Requires supporting files with custom matchers and macros, etc,
8
+ # in ./support/ and its subdirectories.
9
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
10
+
11
+ RSpec.configure do |config|
12
+
13
+ end
metadata ADDED
@@ -0,0 +1,111 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kinesis_firehose_batcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Julik Tarkhanov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-01-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '10.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '10.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 3.2.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 3.2.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: jeweler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 2.0.1
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 2.0.1
69
+ description: Sends records to Firehose, automatically honors the limits
70
+ email: me@julik.nl
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files:
74
+ - LICENSE.txt
75
+ - README.md
76
+ files:
77
+ - ".document"
78
+ - ".rspec"
79
+ - Gemfile
80
+ - LICENSE.txt
81
+ - README.md
82
+ - Rakefile
83
+ - kinesis_firehose_batcher.gemspec
84
+ - lib/kinesis_firehose_batcher.rb
85
+ - spec/kinesis_firehose_batcher_spec.rb
86
+ - spec/spec_helper.rb
87
+ homepage: https://github.com/wetransfer/kinesis_firehose_batcher
88
+ licenses:
89
+ - MIT
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.2.2
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: Batch-send records to AWS Kinesis Firehose
111
+ test_files: []