kcl-rb 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/main.yml +58 -0
  3. data/.gitignore +11 -0
  4. data/.rubocop.yml +93 -0
  5. data/.ruby-version +1 -0
  6. data/Gemfile +4 -0
  7. data/Gemfile.lock +90 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +130 -0
  10. data/Rakefile +2 -0
  11. data/aws/config +3 -0
  12. data/aws/credentials +3 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +8 -0
  15. data/demo/Gemfile +5 -0
  16. data/demo/Gemfile.lock +60 -0
  17. data/demo/README.md +38 -0
  18. data/demo/Rakefile +31 -0
  19. data/demo/aws/config +3 -0
  20. data/demo/aws/credentials +3 -0
  21. data/demo/docker-compose.yml +23 -0
  22. data/demo/lib/kcl_demo.rb +49 -0
  23. data/demo/lib/kcl_demo/demo_record_processor.rb +43 -0
  24. data/demo/lib/kcl_demo/demo_record_processor_factory.rb +7 -0
  25. data/demo/terraform/main.tf +35 -0
  26. data/docker-compose.yml +22 -0
  27. data/kcl-rb.gemspec +36 -0
  28. data/lib/kcl.rb +32 -0
  29. data/lib/kcl/checkpointer.rb +179 -0
  30. data/lib/kcl/checkpoints/sentinel.rb +17 -0
  31. data/lib/kcl/config.rb +35 -0
  32. data/lib/kcl/errors.rb +6 -0
  33. data/lib/kcl/logger.rb +3 -0
  34. data/lib/kcl/proxies/dynamo_db_proxy.rb +132 -0
  35. data/lib/kcl/proxies/kinesis_proxy.rb +56 -0
  36. data/lib/kcl/record_processor.rb +13 -0
  37. data/lib/kcl/record_processor_factory.rb +5 -0
  38. data/lib/kcl/types/extended_sequence_number.rb +89 -0
  39. data/lib/kcl/types/initialization_input.rb +13 -0
  40. data/lib/kcl/types/records_input.rb +15 -0
  41. data/lib/kcl/types/shutdown_input.rb +13 -0
  42. data/lib/kcl/version.rb +3 -0
  43. data/lib/kcl/worker.rb +159 -0
  44. data/lib/kcl/workers/consumer.rb +80 -0
  45. data/lib/kcl/workers/record_checkpointer.rb +14 -0
  46. data/lib/kcl/workers/shard_info.rb +47 -0
  47. data/lib/kcl/workers/shutdown_reason.rb +6 -0
  48. data/terraform/main.tf +35 -0
  49. metadata +191 -0
@@ -0,0 +1,17 @@
1
+ # Enumeration of the sentinel values of checkpoints.
2
+ # Used during initialization of ShardConsumers to determine the starting point
3
+ # in the shard and to flag that a shard has been completely processed.
4
+ module Kcl::Checkpoints
5
+ module Sentinel
6
+ # Start from the first available record in the shard.
7
+ TRIM_HORIZON = 'TRIM_HORIZON'.freeze
8
+ # Start from the latest record in the shard.
9
+ LATEST = 'LATEST'.freeze
10
+ # We've completely processed all records in this shard.
11
+ SHARD_END = 'SHARD_END'.freeze
12
+ # Start from the record at or after the specified server-side timestamp.
13
+ AT_TIMESTAMP = 'AT_TIMESTAMP'.freeze
14
+ # Continue from the sequence number in the shard.
15
+ AFTER_SEQUENCE_NUMBER = 'AFTER_SEQUENCE_NUMBER'.freeze
16
+ end
17
+ end
@@ -0,0 +1,35 @@
1
+ class Kcl::Config
2
+ attr_accessor :aws_region,
3
+ :aws_access_key_id,
4
+ :aws_secret_access_key,
5
+ :dynamodb_endpoint,
6
+ :dynamodb_table_name,
7
+ :dynamodb_read_capacity,
8
+ :dynamodb_write_capacity,
9
+ :dynamodb_failover_seconds,
10
+ :kinesis_endpoint,
11
+ :kinesis_stream_name,
12
+ :logger,
13
+ :log_level,
14
+ :max_lease_count,
15
+ :use_ssl,
16
+ :worker_count
17
+
18
+ # Set default values
19
+ def initialize
20
+ @aws_region = nil
21
+ @aws_access_key_id = nil
22
+ @aws_secret_access_key = nil
23
+ @dynamodb_endpoint = 'https://localhost:4566'
24
+ @dynamodb_table_name = nil
25
+ @dynamodb_read_capacity = 10
26
+ @dynamodb_write_capacity = 10
27
+ @dynamodb_failover_seconds = 10
28
+ @kinesis_endpoint = 'https://localhost:4566'
29
+ @kinesis_stream_name = nil
30
+ @logger = nil
31
+ @max_lease_count = 1
32
+ @use_ssl = false
33
+ @worker_count = 1
34
+ end
35
+ end
@@ -0,0 +1,6 @@
1
+ module Kcl::Errors
2
+ class IllegalArgumentError < StandardError; end
3
+ class CheckpointNotFoundError < StandardError; end
4
+ class SequenceNumberNotFoundError < StandardError; end
5
+ class LeaseNotAquiredError < StandardError; end
6
+ end
@@ -0,0 +1,3 @@
1
+ require 'logger'
2
+
3
+ class Kcl::Logger < ::Logger; end
@@ -0,0 +1,132 @@
1
+ require 'aws-sdk-dynamodb'
2
+
3
+ module Kcl::Proxies
4
+ class DynamoDbProxy
5
+ attr_reader :client
6
+
7
+ def initialize(config)
8
+ @client = Aws::DynamoDB::Client.new(
9
+ {
10
+ access_key_id: config.aws_access_key_id,
11
+ secret_access_key: config.aws_secret_access_key,
12
+ region: config.aws_region,
13
+ endpoint: config.dynamodb_endpoint,
14
+ ssl_verify_peer: config.use_ssl
15
+ }
16
+ )
17
+ end
18
+
19
+ # @params [String] table_name
20
+ def exists?(table_name)
21
+ @client.describe_table({ table_name: table_name })
22
+ true
23
+ rescue Aws::DynamoDB::Errors::NotFound,
24
+ Aws::DynamoDB::Errors::ResourceNotFoundException
25
+ false
26
+ end
27
+
28
+ # @params [String] table_name
29
+ # @params [Array] attributes
30
+ # @params [Array] schema
31
+ # @params [Hash] throughputs
32
+ def create_table(table_name, attributes = [], schema = [], throughputs = {})
33
+ @client.create_table(
34
+ {
35
+ table_name: table_name,
36
+ attribute_definitions: attributes,
37
+ key_schema: schema,
38
+ provisioned_throughput: throughputs
39
+ }
40
+ )
41
+ end
42
+
43
+ # @params [String] table_name
44
+ def delete_table(table_name)
45
+ @client.delete_table({ table_name: table_name })
46
+ true
47
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
48
+ false
49
+ end
50
+
51
+ # @params [String] table_name
52
+ # @params [Hash] conditions
53
+ # @return [Hash]
54
+ def get_item(table_name, conditions)
55
+ response = @client.get_item(
56
+ {
57
+ table_name: table_name,
58
+ key: conditions
59
+ }
60
+ )
61
+ response.item
62
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
63
+ nil
64
+ end
65
+
66
+ # @params [String] table_name
67
+ # @params [Hash] item
68
+ # @return [Boolean]
69
+ def put_item(table_name, item)
70
+ @client.put_item(
71
+ {
72
+ table_name: table_name,
73
+ item: item
74
+ }
75
+ )
76
+ true
77
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
78
+ false
79
+ end
80
+
81
+ # @params [String] table_name
82
+ # @params [Hash] conditions
83
+ # @params [String] update_expression
84
+ # @return [Boolean]
85
+ def update_item(table_name, conditions, update_expression)
86
+ @client.update_item(
87
+ {
88
+ table_name: table_name,
89
+ key: conditions,
90
+ update_expression: update_expression
91
+ }
92
+ )
93
+ true
94
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
95
+ false
96
+ end
97
+
98
+ # @params [String] table_name
99
+ # @params [Hash] item
100
+ # @params [String] condition_expression
101
+ # @params [Hash] expression_attributes
102
+ # @return [Boolean]
103
+ def conditional_update_item(table_name, item, condition_expression, expression_attributes)
104
+ @client.put_item(
105
+ {
106
+ table_name: table_name,
107
+ item: item,
108
+ condition_expression: condition_expression,
109
+ expression_attribute_values: expression_attributes
110
+ }
111
+ )
112
+ true
113
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
114
+ false
115
+ end
116
+
117
+ # @params [String] table_name
118
+ # @params [Hash] conditions
119
+ # @return [Boolean]
120
+ def remove_item(table_name, conditions)
121
+ @client.delete_item(
122
+ {
123
+ table_name: table_name,
124
+ key: conditions
125
+ }
126
+ )
127
+ true
128
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
129
+ false
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,56 @@
1
+ require 'aws-sdk-kinesis'
2
+
3
+ module Kcl::Proxies
4
+ class KinesisProxy
5
+ attr_reader :client
6
+
7
+ def initialize(config)
8
+ @client = Aws::Kinesis::Client.new(
9
+ {
10
+ access_key_id: config.aws_access_key_id,
11
+ secret_access_key: config.aws_secret_access_key,
12
+ region: config.aws_region,
13
+ endpoint: config.kinesis_endpoint,
14
+ ssl_verify_peer: config.use_ssl
15
+ }
16
+ )
17
+ @stream_name = config.kinesis_stream_name
18
+ end
19
+
20
+ # @return [Array]
21
+ def shards
22
+ res = @client.describe_stream({ stream_name: @stream_name })
23
+ res.stream_description.shards
24
+ end
25
+
26
+ # @param [String] shard_id
27
+ # @param [String] shard_iterator_type
28
+ # @return [String]
29
+ def get_shard_iterator(shard_id, shard_iterator_type = nil, sequence_number = nil)
30
+ params = {
31
+ stream_name: @stream_name,
32
+ shard_id: shard_id,
33
+ shard_iterator_type: shard_iterator_type || Kcl::Checkpoints::Sentinel::LATEST
34
+ }
35
+ if shard_iterator_type == Kcl::Checkpoints::Sentinel::AFTER_SEQUENCE_NUMBER
36
+ params[:starting_sequence_number] = sequence_number
37
+ end
38
+ res = @client.get_shard_iterator(params)
39
+ res.shard_iterator
40
+ end
41
+
42
+ # @param [String] shard_iterator
43
+ # @return [Hash]
44
+ def get_records(shard_iterator)
45
+ res = @client.get_records({ shard_iterator: shard_iterator })
46
+ { records: res.records, next_shard_iterator: res.next_shard_iterator }
47
+ end
48
+
49
+ # @param [Hash] data
50
+ # @return [Hash]
51
+ def put_record(data)
52
+ res = @client.put_record(data)
53
+ { shard_id: res.shard_id, sequence_number: res.sequence_number }
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,13 @@
1
+ class Kcl::RecordProcessor
2
+ def after_initialize(_initialization_input)
3
+ raise NotImplementedError.new("You must implement #{self.class}##{__method__}")
4
+ end
5
+
6
+ def process_records(_records_input)
7
+ raise NotImplementedError.new("You must implement #{self.class}##{__method__}")
8
+ end
9
+
10
+ def shutdown(_shutdown_input)
11
+ raise NotImplementedError.new("You must implement #{self.class}##{__method__}")
12
+ end
13
+ end
@@ -0,0 +1,5 @@
1
+ class Kcl::RecordProcessorFactory
2
+ def create_processor
3
+ raise NotImplementedError.new("You must implement #{self.class}##{__method__}")
4
+ end
5
+ end
@@ -0,0 +1,89 @@
1
+ require 'bigdecimal'
2
+
3
+ module Kcl::Types
4
+ class ExtendedSequenceNumber
5
+ attr_reader :sequence_number, :sub_sequence_number
6
+
7
+ TRIM_HORIZON_VALUE = BigDecimal(-2)
8
+ LATEST_VALUE = BigDecimal(-1)
9
+ AT_TIMESTAMP_VALUE = BigDecimal(-3)
10
+
11
+ # @return [Kcl::Types::ExtendedSequenceNumber]
12
+ def self.latest
13
+ @_latest ||= self.new(Kcl::Checkpoints::Sentinel::LATEST)
14
+ end
15
+
16
+ # @return [Kcl::Types::ExtendedSequenceNumber]
17
+ def self.shard_end
18
+ @_shard_end ||= self.new(Kcl::Checkpoints::Sentinel::SHARD_END)
19
+ end
20
+
21
+ # @return [Kcl::Types::ExtendedSequenceNumber]
22
+ def self.trim_horizon
23
+ @_trim_horizon ||= self.new(Kcl::Checkpoints::Sentinel::TRIM_HORIZON)
24
+ end
25
+
26
+ # @param [String] str
27
+ # @return [Boolean]
28
+ def self.digits_or_sentinel?(str)
29
+ digits?(str) || sentinel?(str)
30
+ end
31
+
32
+ # @param [String] str
33
+ # @return [Boolean]
34
+ def self.sentinel?(str)
35
+ case str
36
+ when Kcl::Checkpoints::Sentinel::TRIM_HORIZON,
37
+ Kcl::Checkpoints::Sentinel::LATEST,
38
+ Kcl::Checkpoints::Sentinel::SHARD_END,
39
+ Kcl::Checkpoints::Sentinel::AT_TIMESTAMP
40
+ true
41
+ else
42
+ false
43
+ end
44
+ end
45
+
46
+ # @param [String] str
47
+ # @return [Boolean]
48
+ def self.digits?(str)
49
+ return false if str.nil? || str.empty?
50
+ (str =~ /\A[0-9]+\z/) != nil
51
+ end
52
+
53
+ # @param [String] sequence_number
54
+ # @param [Number] sub_sequence_number
55
+ def initialize(sequence_number, sub_sequence_number = 0)
56
+ @sequence_number = sequence_number
57
+ @sub_sequence_number = sub_sequence_number
58
+ end
59
+
60
+ # @return [BigDecimal]
61
+ def value
62
+ if self.class.digits?(@sequence_number)
63
+ return BigDecimal(@sequence_number)
64
+ end
65
+
66
+ case @sequence_number
67
+ when Kcl::Checkpoints::Sentinel::LATEST
68
+ LATEST_VALUE
69
+ when Kcl::Checkpoints::Sentinel::TRIM_HORIZON
70
+ TRIM_HORIZON_VALUE
71
+ when Kcl::Checkpoints::Sentinel::AT_TIMESTAMP
72
+ AT_TIMESTAMP_VALUE
73
+ else
74
+ raise Kcl::Errors::IllegalArgumentError.new(
75
+ 'Expected a string of digits, TRIM_HORIZON, LATEST or AT_TIMESTAMP but received ' + @sequence_number
76
+ )
77
+ end
78
+ end
79
+
80
+ # @param [Kcl::Types::ExtendedSequenceNumber] extended_sequence_number
81
+ # @return [Boolean]
82
+ def equals(extended_sequence_number)
83
+ if @sequence_number != extended_sequence_number.sequence_number
84
+ return false
85
+ end
86
+ @sub_sequence_number == extended_sequence_number.sub_sequence_number
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,13 @@
1
+ module Kcl::Types
2
+ # Container for the parameters to the RecordProcessor's method.
3
+ class InitializationInput
4
+ attr_reader :shard_id, :extended_sequence_number
5
+
6
+ # @param [String] shard_id
7
+ # @param [Kcl::Types::ExtendedSequenceNumber] extended_sequence_number
8
+ def initialize(shard_id, extended_sequence_number)
9
+ @shard_id = shard_id
10
+ @extended_sequence_number = extended_sequence_number
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ module Kcl::Types
2
+ # Container for the parameters to the IRecordProcessor's method.
3
+ class RecordsInput
4
+ attr_reader :records, :millis_behind_latest, :record_checkpointer
5
+
6
+ # @param [Array] records
7
+ # @param [Number] millis_behind_latest
8
+ # @param [Kcl::Workers::RecordCheckpointer] record_checkpointer
9
+ def initialize(records, millis_behind_latest, record_checkpointer)
10
+ @records = records
11
+ @millis_behind_latest = millis_behind_latest
12
+ @record_checkpointer = record_checkpointer
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,13 @@
1
+ module Kcl::Types
2
+ # Container for the parameters to the IRecordProcessor's method.
3
+ class ShutdownInput
4
+ attr_reader :shutdown_reason, :record_checkpointer
5
+
6
+ # @param [Kcl::Worker::ShutdownReason] shutdown_reason
7
+ # @param [Kcl::Workers::RecordCheckpointer] record_checkpointer
8
+ def initialize(shutdown_reason, record_checkpointer)
9
+ @shutdown_reason = shutdown_reason
10
+ @record_checkpointer = record_checkpointer
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module Kcl
2
+ VERSION = '1.0.0'.freeze
3
+ end
@@ -0,0 +1,159 @@
1
+ require 'eventmachine'
2
+
3
+ class Kcl::Worker
4
+ PROCESS_INTERVAL = 1 # by sec
5
+
6
+ def self.run(id, record_processor_factory)
7
+ worker = self.new(id, record_processor_factory)
8
+ worker.start
9
+ end
10
+
11
+ def initialize(id, record_processor_factory)
12
+ @id = id
13
+ @record_processor_factory = record_processor_factory
14
+ @live_shards = {} # Map<String, Boolean>
15
+ @shards = {} # Map<String, Kcl::Workers::ShardInfo>
16
+ @kinesis = nil # Kcl::Proxies::KinesisProxy
17
+ @checkpointer = nil # Kcl::Checkpointer
18
+ @timer = nil
19
+ end
20
+
21
+ # Start consuming data from the stream,
22
+ # and pass it to the application record processors.
23
+ def start
24
+ Kcl.logger.info("Start worker at #{object_id}")
25
+
26
+ EM.run do
27
+ trap_signals
28
+
29
+ @timer = EM::PeriodicTimer.new(PROCESS_INTERVAL) do
30
+ sync_shards!
31
+ consume_shards! if available_lease_shard?
32
+ end
33
+ end
34
+
35
+ cleanup
36
+ Kcl.logger.info("Finish worker at #{object_id}")
37
+ rescue => e
38
+ Kcl.logger.error("#{e.class}: #{e.message}")
39
+ raise e
40
+ end
41
+
42
+ # Shutdown gracefully
43
+ def shutdown(signal = :NONE)
44
+ unless @timer.nil?
45
+ @timer.cancel
46
+ @timer = nil
47
+ end
48
+ EM.stop
49
+
50
+ Kcl.logger.info("Shutdown worker with signal #{signal} at #{object_id}")
51
+ rescue => e
52
+ Kcl.logger.error("#{e.class}: #{e.message}")
53
+ raise e
54
+ end
55
+
56
+ # Cleanup resources
57
+ def cleanup
58
+ @live_shards = {}
59
+ @shards = {}
60
+ @kinesis = nil
61
+ @checkpointer = nil
62
+ end
63
+
64
+ # Add new shards and delete unused shards
65
+ def sync_shards!
66
+ @live_shards.transform_values! { |_| false }
67
+
68
+ kinesis.shards.each do |shard|
69
+ @live_shards[shard.shard_id] = true
70
+ next if @shards[shard.shard_id]
71
+ @shards[shard.shard_id] = Kcl::Workers::ShardInfo.new(
72
+ shard.shard_id,
73
+ shard.parent_shard_id,
74
+ shard.sequence_number_range
75
+ )
76
+ Kcl.logger.info("Found new shard at shard_id: #{shard.shard_id}")
77
+ end
78
+
79
+ @live_shards.each do |shard_id, alive|
80
+ next if alive
81
+ checkpointer.remove_lease(@shards[shard_id])
82
+ @shards.delete(shard_id)
83
+ Kcl.logger.info("Remove shard at shard_id: #{shard_id}")
84
+ end
85
+
86
+ @shards
87
+ end
88
+
89
+ # Count the number of leases hold by worker excluding the processed shard
90
+ # @return [Boolean]
91
+ def available_lease_shard?
92
+ leased_count = @shards.values.inject(0) do |num, shard|
93
+ shard.lease_owner == @id && !shard.completed? ? num + 1 : num
94
+ end
95
+ Kcl.config.max_lease_count > leased_count
96
+ end
97
+
98
+ # Process records by shard
99
+ def consume_shards!
100
+ threads = []
101
+ @shards.each do |shard_id, shard|
102
+ # already owner of the shard
103
+ next if shard.lease_owner == @id
104
+
105
+ begin
106
+ shard = checkpointer.fetch_checkpoint(shard)
107
+ rescue Kcl::Errors::CheckpointNotFoundError
108
+ Kcl.logger.info("Not found checkpoint of shard at #{shard.to_h}")
109
+ next
110
+ end
111
+ # shard is closed and processed all records
112
+ next if shard.completed?
113
+
114
+ shard = checkpointer.lease(shard, @id)
115
+
116
+ threads << Thread.new do
117
+ begin
118
+ consumer = Kcl::Workers::Consumer.new(
119
+ shard,
120
+ @record_processor_factory.create_processor,
121
+ kinesis,
122
+ checkpointer
123
+ )
124
+ consumer.consume!
125
+ ensure
126
+ shard = checkpointer.remove_lease_owner(shard)
127
+ Kcl.logger.info("Finish to consume shard at shard_id: #{shard_id}")
128
+ end
129
+ end
130
+ end
131
+ threads.each(&:join)
132
+ end
133
+
134
+ private
135
+
136
+ def kinesis
137
+ if @kinesis.nil?
138
+ @kinesis = Kcl::Proxies::KinesisProxy.new(Kcl.config)
139
+ Kcl.logger.info('Created Kinesis session in worker')
140
+ end
141
+ @kinesis
142
+ end
143
+
144
+ def checkpointer
145
+ if @checkpointer.nil?
146
+ @checkpointer = Kcl::Checkpointer.new(Kcl.config)
147
+ Kcl.logger.info('Created Checkpoint in worker')
148
+ end
149
+ @checkpointer
150
+ end
151
+
152
+ def trap_signals
153
+ [:HUP, :INT, :TERM].each do |signal|
154
+ trap signal do
155
+ EM.add_timer(0) { shutdown(signal) }
156
+ end
157
+ end
158
+ end
159
+ end