kinesis-aggregation 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 821942e8c524497425129ef5635c5ed100eb242633f3c7fb23cd50c532d0420c
4
+ data.tar.gz: e87ba1b471301ecf3af12d0560541ac492fc812355993c9dde6d6fd8e9bd017f
5
+ SHA512:
6
+ metadata.gz: f400c84e734a0fae10df8b3605ba33b65ddd34d1c793a350119662588dc24dc1d1ad2beebc7b38e670fa6ac090bd05b65eee929c5f9306f25857f27c7e2e617f
7
+ data.tar.gz: 467f392079b81abd6e5269c25ab075bdd29e7a6a3e473fa6a4ecba1f7158b88ab9eebb1039dff6d52dbcaa0c9ef72cfb0b3d2b612cdc23394980aea8ef514a6e
@@ -0,0 +1,12 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /tmp/
10
+
11
+ # rspec failure tracking
12
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.1
6
+ before_install: gem install bundler -v 2.1.4
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in kinesis-aggregation.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rspec", "~> 3.0"
@@ -0,0 +1,49 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ kinesis-aggregation (0.1.0)
5
+ activesupport (>= 4.0)
6
+ google-protobuf (~> 3.13)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ activesupport (4.2.11.3)
12
+ i18n (~> 0.7)
13
+ minitest (~> 5.1)
14
+ thread_safe (~> 0.3, >= 0.3.4)
15
+ tzinfo (~> 1.1)
16
+ concurrent-ruby (1.1.7)
17
+ diff-lcs (1.4.4)
18
+ google-protobuf (3.13.0)
19
+ i18n (0.9.5)
20
+ concurrent-ruby (~> 1.0)
21
+ minitest (5.14.2)
22
+ rake (12.3.3)
23
+ rspec (3.9.0)
24
+ rspec-core (~> 3.9.0)
25
+ rspec-expectations (~> 3.9.0)
26
+ rspec-mocks (~> 3.9.0)
27
+ rspec-core (3.9.3)
28
+ rspec-support (~> 3.9.3)
29
+ rspec-expectations (3.9.2)
30
+ diff-lcs (>= 1.2.0, < 2.0)
31
+ rspec-support (~> 3.9.0)
32
+ rspec-mocks (3.9.1)
33
+ diff-lcs (>= 1.2.0, < 2.0)
34
+ rspec-support (~> 3.9.0)
35
+ rspec-support (3.9.3)
36
+ thread_safe (0.3.6)
37
+ tzinfo (1.2.8)
38
+ thread_safe (~> 0.1)
39
+
40
+ PLATFORMS
41
+ ruby
42
+
43
+ DEPENDENCIES
44
+ kinesis-aggregation!
45
+ rake (~> 12.0)
46
+ rspec (~> 3.0)
47
+
48
+ BUNDLED WITH
49
+ 2.1.4
@@ -0,0 +1,76 @@
1
+ # Kinesis::Aggregation
2
+
3
+ This gem knows how to read and write kinesis aggregated messages. This is most useful when writing a ruby lambda that consumes kinesis aggregated messages.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'kinesis-aggregation'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle install
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install kinesis-aggregation
20
+
21
+ ## Usage
22
+
23
+ ### Aggregation
24
+
25
+ ```ruby
26
+ aggregator = Kinesis::Aggregation::Aggregator.new
27
+
28
+ # explicit_hash_key is optional
29
+ aggregator.add_user_record(partition_key: 'fc03dd88-3e79-448a-b01a-7cf1bd47b784',
30
+ explicit_hash_key: '38486495867508399078159723846051807020',
31
+ data: "RECORD 22 peeobhczbzdmskboupgyq\n")
32
+
33
+ aggregator.add_user_record(partition_key: 'cae41b1c-ea61-43f2-90be-b8755ebf88e2',
34
+ explicit_hash_key: '193787600037681706952143357071916352604',
35
+ data: "RECORD 23 uswkxftxroeusscxsjhno\n")
36
+
37
+ aggregator.add_user_record(partition_key: 'd490690c-e74d-4db2-a3c8-d8f2f184fd23',
38
+ explicit_hash_key: '266880436964932424265466916734068684439',
39
+ data: "RECORD 24 casehdgivfaxeustlyszy\n")
40
+
41
+ aggregator.add_user_record(partition_key: 'c924bc09-b85e-47f1-b32e-336522ee53c8',
42
+ explicit_hash_key: '339606600942967391854603552402021847292',
43
+ data: "RECORD 25 nvffvpmuogdopjhamevrk\n")
44
+ aggregated_record = aggregator.aggregate!
45
+ ```
46
+
47
+ ### Deaggregatoin
48
+
49
+ ```ruby
50
+ deaggregated_records = Kinesis::Aggregation::Deaggregator.new(aggregated_record).deaggregate
51
+ ```
52
+
53
+ ### Use from within a lambda
54
+
55
+ ```ruby
56
+ require 'kinesis/aggregation'
57
+
58
+ def handler(event:, context:)
59
+ event['Records'].each do |aggregated_record|
60
+ records = Kinesis::Aggregation::Deaggregator.new(aggregated_record).deaggregate
61
+
62
+ # interesting code goes here
63
+ end
64
+ end
65
+ ```
66
+
67
+ ## Development
68
+
69
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
70
+
71
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
72
+
73
+ ## Contributing
74
+
75
+ Bug reports and pull requests are welcome on GitHub at https://github.com/hawknewton/ruby-kinesis-aggregation.
76
+
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "kinesis/aggregation"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,29 @@
1
+ require_relative 'lib/kinesis/aggregation/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = 'kinesis-aggregation'
5
+ spec.version = Kinesis::Aggregation::VERSION
6
+ spec.authors = ['hawknewton']
7
+ spec.email = ['hawk.newton@gmail.com']
8
+
9
+ spec.summary = %q{Read and write AWS KPL aggregate messages}
10
+ spec.description = %q{Read and write AWS KPL aggregate messages}
11
+ spec.homepage = 'https://github.com/hawknewton/ruby-kinesis-aggregation'
12
+ spec.licenses = ['Apache-2.0']
13
+ spec.required_ruby_version = Gem::Requirement.new('>= 2.3.0')
14
+
15
+ spec.metadata['homepage_uri'] = spec.homepage
16
+ spec.metadata['source_code_uri'] = 'https://github.com/hawknewton/ruby-kinesis-aggregation'
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ end
23
+ spec.bindir = 'exe'
24
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
+ spec.require_paths = ['lib']
26
+
27
+ spec.add_dependency('google-protobuf', '~> 3.13')
28
+ spec.add_dependency('activesupport', '>= 4.0')
29
+ end
@@ -0,0 +1,17 @@
1
+ message AggregatedRecord {
2
+ repeated string partition_key_table = 1;
3
+ repeated string explicit_hash_key_table = 2;
4
+ repeated Record records = 3;
5
+ }
6
+
7
+ message Tag {
8
+ required string key = 1;
9
+ optional string value = 2;
10
+ }
11
+
12
+ message Record {
13
+ required uint64 partition_key_index = 1;
14
+ optional uint64 explicit_hash_key_index = 2;
15
+ required bytes data = 3;
16
+ repeated Tag tags = 4;
17
+ }
@@ -0,0 +1,12 @@
1
+ require "kinesis/aggregation/version"
2
+ require 'kinesis/aggregation/deaggregator'
3
+ require 'kinesis/aggregation/aggregator'
4
+ require 'kinesis/aggregation/kpl_pb'
5
+ require 'kinesis/aggregation/kinesis_analytics_converter'
6
+
7
+ module Kinesis
8
+ module Aggregation
9
+ class Error < StandardError; end
10
+ # Your code goes here...
11
+ end
12
+ end
@@ -0,0 +1,60 @@
1
+ module Kinesis
2
+ module Aggregation
3
+ class Aggregator
4
+ MAGIC = "\xf3\x89\x9a\xc2".force_encoding('ASCII-8BIT').freeze
5
+
6
+ def initialize
7
+ @user_records = []
8
+ end
9
+
10
+ def add_user_record(user_record)
11
+ @user_records << user_record
12
+ end
13
+
14
+ def aggregate!
15
+ result = {
16
+ partition_key: @user_records.first[:partition_key],
17
+ explicit_hash_key: @user_records.first[:explicit_hash_key] || '',
18
+ data: Base64.encode64(data)
19
+ }
20
+ @user_records = []
21
+ result
22
+ end
23
+
24
+ def num_user_records
25
+ @user_records.length
26
+ end
27
+
28
+ private
29
+
30
+ def aggregated_record
31
+ AggregatedRecord.new(
32
+ partition_key_table: @user_records.map { |r| r[:partition_key] },
33
+ explicit_hash_key_table: explicit_hash_key_table,
34
+ records: records
35
+ )
36
+ end
37
+
38
+ def data
39
+ bytes = AggregatedRecord.encode(aggregated_record)
40
+ MAGIC + bytes + Digest::MD5.digest(bytes)
41
+ end
42
+
43
+ def explicit_hash_key_table
44
+ @user_records.map { |r| r[:explicit_hash_key] }.compact
45
+ end
46
+
47
+ def records
48
+ @user_records.map.with_index do |user_record, index|
49
+ record = Record.new(
50
+ partition_key_index: index,
51
+ data: user_record[:data]
52
+ )
53
+ record.explicit_hash_key_index = index if user_record[:explicit_hash_key]
54
+ record
55
+ end
56
+
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,81 @@
1
+ require 'base64'
2
+ require 'digest'
3
+ require 'active_support/core_ext/hash/indifferent_access'
4
+
5
+ module Kinesis
6
+ module Aggregation
7
+ class Deaggregator
8
+ MAGIC = "\xf3\x89\x9a\xc2".force_encoding('ASCII-8BIT').freeze
9
+ DIGEST_SIZE = 16
10
+
11
+ def initialize(raw_record)
12
+ @raw_record = raw_record.with_indifferent_access
13
+ end
14
+
15
+ def deaggregate
16
+ return [kinesis_record] unless aggregated_record? && computed_md5 == kinesis_record_md5
17
+
18
+ aggregated_record.records.map do |record|
19
+ base_record.merge(
20
+ kinesis: {
21
+ kinesisSchemaVersion: kinesis_record[:kinesis][:kinesisSchemaVersion],
22
+ sequenceNumber: kinesis_record[:kinesis][:sequenceNumber],
23
+ approximateArrivalTimestamp: kinesis_record[:kinesis][:approximateArrivalTimestamp],
24
+ explicitHashKey: explicit_hash_for_for(record),
25
+ partitionKey: partition_key_for(record),
26
+ data: Base64.encode64(record.data),
27
+ recordId: kinesis_record[:kinesis][:recordId]
28
+ }
29
+ )
30
+ end
31
+ end
32
+
33
+ private
34
+ def aggregated_record?
35
+ data[0..MAGIC.length - 1] == MAGIC
36
+ end
37
+
38
+ def aggregated_record
39
+ @aggregated_record ||= AggregatedRecord.decode(kinesis_record_message_data)
40
+ end
41
+
42
+ def base_record
43
+ kinesis_record.reject { |k, _v| k == :kinesis }
44
+ end
45
+
46
+ def data
47
+ @data ||= Base64.decode64(kinesis_record[:kinesis][:data])
48
+ end
49
+
50
+ def explicit_hash_for_for(record)
51
+ aggregated_record.explicit_hash_key_table[record.explicit_hash_key_index]
52
+ end
53
+
54
+ def kinesis_record
55
+ @kinesis_record ||= begin
56
+ if @raw_record.has_key?(:kinesisStreamRecordMetadata)
57
+ KinesisAnalyticsConverter.new(@raw_record).convert
58
+ else
59
+ @raw_record
60
+ end
61
+ end
62
+ end
63
+
64
+ def partition_key_for(record)
65
+ aggregated_record.partition_key_table[record.partition_key_index]
66
+ end
67
+
68
+ def kinesis_record_md5
69
+ data[data.length - 16..-1]
70
+ end
71
+
72
+ def kinesis_record_message_data
73
+ data[MAGIC.length..-17]
74
+ end
75
+
76
+ def computed_md5
77
+ Digest::MD5.digest(kinesis_record_message_data)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,19 @@
1
+ class KinesisAnalyticsConverter
2
+ def initialize(kinesis_analytics_record)
3
+ @kinesis_analytics_record = kinesis_analytics_record
4
+ end
5
+
6
+ def convert
7
+ {
8
+ kinesis: {
9
+ kinesisSchemaVersion: '1.0',
10
+ sequenceNumber: @kinesis_analytics_record[:kinesisStreamRecordMetadata][:sequenceNumber],
11
+ partitionKey: @kinesis_analytics_record[:kinesisStreamRecordMetadata][:partitionKey],
12
+ approximateArrivalTimestamp: @kinesis_analytics_record[:kinesisStreamRecordMetadata][:approximateArrivalTimestamp],
13
+ shardId: @kinesis_analytics_record[:kinesisStreamRecordMetadata][:shardId],
14
+ data: @kinesis_analytics_record[:data],
15
+ recordId: @kinesis_analytics_record[:recordId]
16
+ }
17
+ }
18
+ end
19
+ end
@@ -0,0 +1,28 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # source: kpl.proto
3
+
4
+ require 'google/protobuf'
5
+
6
+ Google::Protobuf::DescriptorPool.generated_pool.build do
7
+ add_file("kpl.proto", :syntax => :proto2) do
8
+ add_message "AggregatedRecord" do
9
+ repeated :partition_key_table, :string, 1
10
+ repeated :explicit_hash_key_table, :string, 2
11
+ repeated :records, :message, 3, "Record"
12
+ end
13
+ add_message "Tag" do
14
+ required :key, :string, 1
15
+ optional :value, :string, 2
16
+ end
17
+ add_message "Record" do
18
+ required :partition_key_index, :uint64, 1
19
+ optional :explicit_hash_key_index, :uint64, 2
20
+ required :data, :bytes, 3
21
+ repeated :tags, :message, 4, "Tag"
22
+ end
23
+ end
24
+ end
25
+
26
+ AggregatedRecord = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("AggregatedRecord").msgclass
27
+ Tag = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("Tag").msgclass
28
+ Record = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("Record").msgclass
@@ -0,0 +1,5 @@
1
+ module Kinesis
2
+ module Aggregation
3
+ VERSION = "0.1.0"
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kinesis-aggregation
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - hawknewton
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-11-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: google-protobuf
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.13'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activesupport
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '4.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '4.0'
41
+ description: Read and write AWS KPL aggregate messages
42
+ email:
43
+ - hawk.newton@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - ".rspec"
50
+ - ".travis.yml"
51
+ - Gemfile
52
+ - Gemfile.lock
53
+ - README.md
54
+ - Rakefile
55
+ - bin/console
56
+ - bin/setup
57
+ - kinesis-aggregation.gemspec
58
+ - kpl.proto
59
+ - lib/kinesis/aggregation.rb
60
+ - lib/kinesis/aggregation/aggregator.rb
61
+ - lib/kinesis/aggregation/deaggregator.rb
62
+ - lib/kinesis/aggregation/kinesis_analytics_converter.rb
63
+ - lib/kinesis/aggregation/kpl_pb.rb
64
+ - lib/kinesis/aggregation/version.rb
65
+ homepage: https://github.com/hawknewton/ruby-kinesis-aggregation
66
+ licenses:
67
+ - Apache-2.0
68
+ metadata:
69
+ homepage_uri: https://github.com/hawknewton/ruby-kinesis-aggregation
70
+ source_code_uri: https://github.com/hawknewton/ruby-kinesis-aggregation
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: 2.3.0
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubygems_version: 3.1.2
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: Read and write AWS KPL aggregate messages
90
+ test_files: []