kinesis-aggregation 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 821942e8c524497425129ef5635c5ed100eb242633f3c7fb23cd50c532d0420c
4
+ data.tar.gz: e87ba1b471301ecf3af12d0560541ac492fc812355993c9dde6d6fd8e9bd017f
5
+ SHA512:
6
+ metadata.gz: f400c84e734a0fae10df8b3605ba33b65ddd34d1c793a350119662588dc24dc1d1ad2beebc7b38e670fa6ac090bd05b65eee929c5f9306f25857f27c7e2e617f
7
+ data.tar.gz: 467f392079b81abd6e5269c25ab075bdd29e7a6a3e473fa6a4ecba1f7158b88ab9eebb1039dff6d52dbcaa0c9ef72cfb0b3d2b612cdc23394980aea8ef514a6e
@@ -0,0 +1,12 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /tmp/
10
+
11
+ # rspec failure tracking
12
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.1
6
+ before_install: gem install bundler -v 2.1.4
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in kinesis-aggregation.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rspec", "~> 3.0"
@@ -0,0 +1,49 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ kinesis-aggregation (0.1.0)
5
+ activesupport (>= 4.0)
6
+ google-protobuf (~> 3.13)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ activesupport (4.2.11.3)
12
+ i18n (~> 0.7)
13
+ minitest (~> 5.1)
14
+ thread_safe (~> 0.3, >= 0.3.4)
15
+ tzinfo (~> 1.1)
16
+ concurrent-ruby (1.1.7)
17
+ diff-lcs (1.4.4)
18
+ google-protobuf (3.13.0)
19
+ i18n (0.9.5)
20
+ concurrent-ruby (~> 1.0)
21
+ minitest (5.14.2)
22
+ rake (12.3.3)
23
+ rspec (3.9.0)
24
+ rspec-core (~> 3.9.0)
25
+ rspec-expectations (~> 3.9.0)
26
+ rspec-mocks (~> 3.9.0)
27
+ rspec-core (3.9.3)
28
+ rspec-support (~> 3.9.3)
29
+ rspec-expectations (3.9.2)
30
+ diff-lcs (>= 1.2.0, < 2.0)
31
+ rspec-support (~> 3.9.0)
32
+ rspec-mocks (3.9.1)
33
+ diff-lcs (>= 1.2.0, < 2.0)
34
+ rspec-support (~> 3.9.0)
35
+ rspec-support (3.9.3)
36
+ thread_safe (0.3.6)
37
+ tzinfo (1.2.8)
38
+ thread_safe (~> 0.1)
39
+
40
+ PLATFORMS
41
+ ruby
42
+
43
+ DEPENDENCIES
44
+ kinesis-aggregation!
45
+ rake (~> 12.0)
46
+ rspec (~> 3.0)
47
+
48
+ BUNDLED WITH
49
+ 2.1.4
@@ -0,0 +1,76 @@
1
+ # Kinesis::Aggregation
2
+
3
+ This gem knows how to read and write kinesis aggregated messages. This is most useful when writing a ruby lambda that consumes kinesis aggregated messages.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'kinesis-aggregation'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle install
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install kinesis-aggregation
20
+
21
+ ## Usage
22
+
23
+ ### Aggregation
24
+
25
+ ```ruby
26
+ aggregator = Kinesis::Aggregation::Aggregator.new
27
+
28
+ # explicit_hash_key is optional
29
+ aggregator.add_user_record(partition_key: 'fc03dd88-3e79-448a-b01a-7cf1bd47b784',
30
+ explicit_hash_key: '38486495867508399078159723846051807020',
31
+ data: "RECORD 22 peeobhczbzdmskboupgyq\n")
32
+
33
+ aggregator.add_user_record(partition_key: 'cae41b1c-ea61-43f2-90be-b8755ebf88e2',
34
+ explicit_hash_key: '193787600037681706952143357071916352604',
35
+ data: "RECORD 23 uswkxftxroeusscxsjhno\n")
36
+
37
+ aggregator.add_user_record(partition_key: 'd490690c-e74d-4db2-a3c8-d8f2f184fd23',
38
+ explicit_hash_key: '266880436964932424265466916734068684439',
39
+ data: "RECORD 24 casehdgivfaxeustlyszy\n")
40
+
41
+ aggregator.add_user_record(partition_key: 'c924bc09-b85e-47f1-b32e-336522ee53c8',
42
+ explicit_hash_key: '339606600942967391854603552402021847292',
43
+ data: "RECORD 25 nvffvpmuogdopjhamevrk\n")
44
+ aggregated_record = aggregator.aggregate!
45
+ ```
46
+
47
+ ### Deaggregatoin
48
+
49
+ ```ruby
50
+ deaggregated_records = Kinesis::Aggregation::Deaggregator.new(aggregated_record).deaggregate
51
+ ```
52
+
53
+ ### Use from within a lambda
54
+
55
+ ```ruby
56
+ require 'kinesis/aggregation'
57
+
58
+ def handler(event:, context:)
59
+ event['Records'].each do |aggregated_record|
60
+ records = Kinesis::Aggregation::Deaggregator.new(aggregated_record).deaggregate
61
+
62
+ # interesting code goes here
63
+ end
64
+ end
65
+ ```
66
+
67
+ ## Development
68
+
69
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
70
+
71
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
72
+
73
+ ## Contributing
74
+
75
+ Bug reports and pull requests are welcome on GitHub at https://github.com/hawknewton/ruby-kinesis-aggregation.
76
+
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "kinesis/aggregation"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,29 @@
1
+ require_relative 'lib/kinesis/aggregation/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = 'kinesis-aggregation'
5
+ spec.version = Kinesis::Aggregation::VERSION
6
+ spec.authors = ['hawknewton']
7
+ spec.email = ['hawk.newton@gmail.com']
8
+
9
+ spec.summary = %q{Read and write AWS KPL aggregate messages}
10
+ spec.description = %q{Read and write AWS KPL aggregate messages}
11
+ spec.homepage = 'https://github.com/hawknewton/ruby-kinesis-aggregation'
12
+ spec.licenses = ['Apache-2.0']
13
+ spec.required_ruby_version = Gem::Requirement.new('>= 2.3.0')
14
+
15
+ spec.metadata['homepage_uri'] = spec.homepage
16
+ spec.metadata['source_code_uri'] = 'https://github.com/hawknewton/ruby-kinesis-aggregation'
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ end
23
+ spec.bindir = 'exe'
24
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
+ spec.require_paths = ['lib']
26
+
27
+ spec.add_dependency('google-protobuf', '~> 3.13')
28
+ spec.add_dependency('activesupport', '>= 4.0')
29
+ end
@@ -0,0 +1,17 @@
1
+ message AggregatedRecord {
2
+ repeated string partition_key_table = 1;
3
+ repeated string explicit_hash_key_table = 2;
4
+ repeated Record records = 3;
5
+ }
6
+
7
+ message Tag {
8
+ required string key = 1;
9
+ optional string value = 2;
10
+ }
11
+
12
+ message Record {
13
+ required uint64 partition_key_index = 1;
14
+ optional uint64 explicit_hash_key_index = 2;
15
+ required bytes data = 3;
16
+ repeated Tag tags = 4;
17
+ }
@@ -0,0 +1,12 @@
1
+ require "kinesis/aggregation/version"
2
+ require 'kinesis/aggregation/deaggregator'
3
+ require 'kinesis/aggregation/aggregator'
4
+ require 'kinesis/aggregation/kpl_pb'
5
+ require 'kinesis/aggregation/kinesis_analytics_converter'
6
+
7
+ module Kinesis
8
+ module Aggregation
9
+ class Error < StandardError; end
10
+ # Your code goes here...
11
+ end
12
+ end
@@ -0,0 +1,60 @@
1
+ module Kinesis
2
+ module Aggregation
3
+ class Aggregator
4
+ MAGIC = "\xf3\x89\x9a\xc2".force_encoding('ASCII-8BIT').freeze
5
+
6
+ def initialize
7
+ @user_records = []
8
+ end
9
+
10
+ def add_user_record(user_record)
11
+ @user_records << user_record
12
+ end
13
+
14
+ def aggregate!
15
+ result = {
16
+ partition_key: @user_records.first[:partition_key],
17
+ explicit_hash_key: @user_records.first[:explicit_hash_key] || '',
18
+ data: Base64.encode64(data)
19
+ }
20
+ @user_records = []
21
+ result
22
+ end
23
+
24
+ def num_user_records
25
+ @user_records.length
26
+ end
27
+
28
+ private
29
+
30
+ def aggregated_record
31
+ AggregatedRecord.new(
32
+ partition_key_table: @user_records.map { |r| r[:partition_key] },
33
+ explicit_hash_key_table: explicit_hash_key_table,
34
+ records: records
35
+ )
36
+ end
37
+
38
+ def data
39
+ bytes = AggregatedRecord.encode(aggregated_record)
40
+ MAGIC + bytes + Digest::MD5.digest(bytes)
41
+ end
42
+
43
+ def explicit_hash_key_table
44
+ @user_records.map { |r| r[:explicit_hash_key] }.compact
45
+ end
46
+
47
+ def records
48
+ @user_records.map.with_index do |user_record, index|
49
+ record = Record.new(
50
+ partition_key_index: index,
51
+ data: user_record[:data]
52
+ )
53
+ record.explicit_hash_key_index = index if user_record[:explicit_hash_key]
54
+ record
55
+ end
56
+
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,81 @@
1
+ require 'base64'
2
+ require 'digest'
3
+ require 'active_support/core_ext/hash/indifferent_access'
4
+
5
+ module Kinesis
6
+ module Aggregation
7
+ class Deaggregator
8
+ MAGIC = "\xf3\x89\x9a\xc2".force_encoding('ASCII-8BIT').freeze
9
+ DIGEST_SIZE = 16
10
+
11
+ def initialize(raw_record)
12
+ @raw_record = raw_record.with_indifferent_access
13
+ end
14
+
15
+ def deaggregate
16
+ return [kinesis_record] unless aggregated_record? && computed_md5 == kinesis_record_md5
17
+
18
+ aggregated_record.records.map do |record|
19
+ base_record.merge(
20
+ kinesis: {
21
+ kinesisSchemaVersion: kinesis_record[:kinesis][:kinesisSchemaVersion],
22
+ sequenceNumber: kinesis_record[:kinesis][:sequenceNumber],
23
+ approximateArrivalTimestamp: kinesis_record[:kinesis][:approximateArrivalTimestamp],
24
+ explicitHashKey: explicit_hash_for_for(record),
25
+ partitionKey: partition_key_for(record),
26
+ data: Base64.encode64(record.data),
27
+ recordId: kinesis_record[:kinesis][:recordId]
28
+ }
29
+ )
30
+ end
31
+ end
32
+
33
+ private
34
+ def aggregated_record?
35
+ data[0..MAGIC.length - 1] == MAGIC
36
+ end
37
+
38
+ def aggregated_record
39
+ @aggregated_record ||= AggregatedRecord.decode(kinesis_record_message_data)
40
+ end
41
+
42
+ def base_record
43
+ kinesis_record.reject { |k, _v| k == :kinesis }
44
+ end
45
+
46
+ def data
47
+ @data ||= Base64.decode64(kinesis_record[:kinesis][:data])
48
+ end
49
+
50
+ def explicit_hash_for_for(record)
51
+ aggregated_record.explicit_hash_key_table[record.explicit_hash_key_index]
52
+ end
53
+
54
+ def kinesis_record
55
+ @kinesis_record ||= begin
56
+ if @raw_record.has_key?(:kinesisStreamRecordMetadata)
57
+ KinesisAnalyticsConverter.new(@raw_record).convert
58
+ else
59
+ @raw_record
60
+ end
61
+ end
62
+ end
63
+
64
+ def partition_key_for(record)
65
+ aggregated_record.partition_key_table[record.partition_key_index]
66
+ end
67
+
68
+ def kinesis_record_md5
69
+ data[data.length - 16..-1]
70
+ end
71
+
72
+ def kinesis_record_message_data
73
+ data[MAGIC.length..-17]
74
+ end
75
+
76
+ def computed_md5
77
+ Digest::MD5.digest(kinesis_record_message_data)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,19 @@
1
+ class KinesisAnalyticsConverter
2
+ def initialize(kinesis_analytics_record)
3
+ @kinesis_analytics_record = kinesis_analytics_record
4
+ end
5
+
6
+ def convert
7
+ {
8
+ kinesis: {
9
+ kinesisSchemaVersion: '1.0',
10
+ sequenceNumber: @kinesis_analytics_record[:kinesisStreamRecordMetadata][:sequenceNumber],
11
+ partitionKey: @kinesis_analytics_record[:kinesisStreamRecordMetadata][:partitionKey],
12
+ approximateArrivalTimestamp: @kinesis_analytics_record[:kinesisStreamRecordMetadata][:approximateArrivalTimestamp],
13
+ shardId: @kinesis_analytics_record[:kinesisStreamRecordMetadata][:shardId],
14
+ data: @kinesis_analytics_record[:data],
15
+ recordId: @kinesis_analytics_record[:recordId]
16
+ }
17
+ }
18
+ end
19
+ end
@@ -0,0 +1,28 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # source: kpl.proto
3
+
4
+ require 'google/protobuf'
5
+
6
+ Google::Protobuf::DescriptorPool.generated_pool.build do
7
+ add_file("kpl.proto", :syntax => :proto2) do
8
+ add_message "AggregatedRecord" do
9
+ repeated :partition_key_table, :string, 1
10
+ repeated :explicit_hash_key_table, :string, 2
11
+ repeated :records, :message, 3, "Record"
12
+ end
13
+ add_message "Tag" do
14
+ required :key, :string, 1
15
+ optional :value, :string, 2
16
+ end
17
+ add_message "Record" do
18
+ required :partition_key_index, :uint64, 1
19
+ optional :explicit_hash_key_index, :uint64, 2
20
+ required :data, :bytes, 3
21
+ repeated :tags, :message, 4, "Tag"
22
+ end
23
+ end
24
+ end
25
+
26
+ AggregatedRecord = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("AggregatedRecord").msgclass
27
+ Tag = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("Tag").msgclass
28
+ Record = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("Record").msgclass
@@ -0,0 +1,5 @@
1
+ module Kinesis
2
+ module Aggregation
3
+ VERSION = "0.1.0"
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kinesis-aggregation
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - hawknewton
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-11-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: google-protobuf
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.13'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activesupport
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '4.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '4.0'
41
+ description: Read and write AWS KPL aggregate messages
42
+ email:
43
+ - hawk.newton@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - ".rspec"
50
+ - ".travis.yml"
51
+ - Gemfile
52
+ - Gemfile.lock
53
+ - README.md
54
+ - Rakefile
55
+ - bin/console
56
+ - bin/setup
57
+ - kinesis-aggregation.gemspec
58
+ - kpl.proto
59
+ - lib/kinesis/aggregation.rb
60
+ - lib/kinesis/aggregation/aggregator.rb
61
+ - lib/kinesis/aggregation/deaggregator.rb
62
+ - lib/kinesis/aggregation/kinesis_analytics_converter.rb
63
+ - lib/kinesis/aggregation/kpl_pb.rb
64
+ - lib/kinesis/aggregation/version.rb
65
+ homepage: https://github.com/hawknewton/ruby-kinesis-aggregation
66
+ licenses:
67
+ - Apache-2.0
68
+ metadata:
69
+ homepage_uri: https://github.com/hawknewton/ruby-kinesis-aggregation
70
+ source_code_uri: https://github.com/hawknewton/ruby-kinesis-aggregation
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: 2.3.0
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubygems_version: 3.1.2
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: Read and write AWS KPL aggregate messages
90
+ test_files: []