kcl-rb 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/main.yml +58 -0
  3. data/.gitignore +11 -0
  4. data/.rubocop.yml +93 -0
  5. data/.ruby-version +1 -0
  6. data/Gemfile +4 -0
  7. data/Gemfile.lock +90 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +130 -0
  10. data/Rakefile +2 -0
  11. data/aws/config +3 -0
  12. data/aws/credentials +3 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +8 -0
  15. data/demo/Gemfile +5 -0
  16. data/demo/Gemfile.lock +60 -0
  17. data/demo/README.md +38 -0
  18. data/demo/Rakefile +31 -0
  19. data/demo/aws/config +3 -0
  20. data/demo/aws/credentials +3 -0
  21. data/demo/docker-compose.yml +23 -0
  22. data/demo/lib/kcl_demo.rb +49 -0
  23. data/demo/lib/kcl_demo/demo_record_processor.rb +43 -0
  24. data/demo/lib/kcl_demo/demo_record_processor_factory.rb +7 -0
  25. data/demo/terraform/main.tf +35 -0
  26. data/docker-compose.yml +22 -0
  27. data/kcl-rb.gemspec +36 -0
  28. data/lib/kcl.rb +32 -0
  29. data/lib/kcl/checkpointer.rb +179 -0
  30. data/lib/kcl/checkpoints/sentinel.rb +17 -0
  31. data/lib/kcl/config.rb +35 -0
  32. data/lib/kcl/errors.rb +6 -0
  33. data/lib/kcl/logger.rb +3 -0
  34. data/lib/kcl/proxies/dynamo_db_proxy.rb +132 -0
  35. data/lib/kcl/proxies/kinesis_proxy.rb +56 -0
  36. data/lib/kcl/record_processor.rb +13 -0
  37. data/lib/kcl/record_processor_factory.rb +5 -0
  38. data/lib/kcl/types/extended_sequence_number.rb +89 -0
  39. data/lib/kcl/types/initialization_input.rb +13 -0
  40. data/lib/kcl/types/records_input.rb +15 -0
  41. data/lib/kcl/types/shutdown_input.rb +13 -0
  42. data/lib/kcl/version.rb +3 -0
  43. data/lib/kcl/worker.rb +159 -0
  44. data/lib/kcl/workers/consumer.rb +80 -0
  45. data/lib/kcl/workers/record_checkpointer.rb +14 -0
  46. data/lib/kcl/workers/shard_info.rb +47 -0
  47. data/lib/kcl/workers/shutdown_reason.rb +6 -0
  48. data/terraform/main.tf +35 -0
  49. metadata +191 -0
@@ -0,0 +1,17 @@
1
+ # Enumeration of the sentinel values of checkpoints.
2
+ # Used during initialization of ShardConsumers to determine the starting point
3
+ # in the shard and to flag that a shard has been completely processed.
4
+ module Kcl::Checkpoints
5
+ module Sentinel
6
+ # Start from the first available record in the shard.
7
+ TRIM_HORIZON = 'TRIM_HORIZON'.freeze
8
+ # Start from the latest record in the shard.
9
+ LATEST = 'LATEST'.freeze
10
+ # We've completely processed all records in this shard.
11
+ SHARD_END = 'SHARD_END'.freeze
12
+ # Start from the record at or after the specified server-side timestamp.
13
+ AT_TIMESTAMP = 'AT_TIMESTAMP'.freeze
14
+ # Continue from the sequence number in the shard.
15
+ AFTER_SEQUENCE_NUMBER = 'AFTER_SEQUENCE_NUMBER'.freeze
16
+ end
17
+ end
@@ -0,0 +1,35 @@
1
+ class Kcl::Config
2
+ attr_accessor :aws_region,
3
+ :aws_access_key_id,
4
+ :aws_secret_access_key,
5
+ :dynamodb_endpoint,
6
+ :dynamodb_table_name,
7
+ :dynamodb_read_capacity,
8
+ :dynamodb_write_capacity,
9
+ :dynamodb_failover_seconds,
10
+ :kinesis_endpoint,
11
+ :kinesis_stream_name,
12
+ :logger,
13
+ :log_level,
14
+ :max_lease_count,
15
+ :use_ssl,
16
+ :worker_count
17
+
18
+ # Set default values
19
+ def initialize
20
+ @aws_region = nil
21
+ @aws_access_key_id = nil
22
+ @aws_secret_access_key = nil
23
+ @dynamodb_endpoint = 'https://localhost:4566'
24
+ @dynamodb_table_name = nil
25
+ @dynamodb_read_capacity = 10
26
+ @dynamodb_write_capacity = 10
27
+ @dynamodb_failover_seconds = 10
28
+ @kinesis_endpoint = 'https://localhost:4566'
29
+ @kinesis_stream_name = nil
30
+ @logger = nil
31
+ @max_lease_count = 1
32
+ @use_ssl = false
33
+ @worker_count = 1
34
+ end
35
+ end
@@ -0,0 +1,6 @@
1
+ module Kcl::Errors
2
+ class IllegalArgumentError < StandardError; end
3
+ class CheckpointNotFoundError < StandardError; end
4
+ class SequenceNumberNotFoundError < StandardError; end
5
+ class LeaseNotAquiredError < StandardError; end
6
+ end
@@ -0,0 +1,3 @@
1
+ require 'logger'
2
+
3
+ class Kcl::Logger < ::Logger; end
@@ -0,0 +1,132 @@
1
+ require 'aws-sdk-dynamodb'
2
+
3
+ module Kcl::Proxies
4
+ class DynamoDbProxy
5
+ attr_reader :client
6
+
7
+ def initialize(config)
8
+ @client = Aws::DynamoDB::Client.new(
9
+ {
10
+ access_key_id: config.aws_access_key_id,
11
+ secret_access_key: config.aws_secret_access_key,
12
+ region: config.aws_region,
13
+ endpoint: config.dynamodb_endpoint,
14
+ ssl_verify_peer: config.use_ssl
15
+ }
16
+ )
17
+ end
18
+
19
+ # @params [String] table_name
20
+ def exists?(table_name)
21
+ @client.describe_table({ table_name: table_name })
22
+ true
23
+ rescue Aws::DynamoDB::Errors::NotFound,
24
+ Aws::DynamoDB::Errors::ResourceNotFoundException
25
+ false
26
+ end
27
+
28
+ # @params [String] table_name
29
+ # @params [Array] attributes
30
+ # @params [Array] schema
31
+ # @params [Hash] throughputs
32
+ def create_table(table_name, attributes = [], schema = [], throughputs = {})
33
+ @client.create_table(
34
+ {
35
+ table_name: table_name,
36
+ attribute_definitions: attributes,
37
+ key_schema: schema,
38
+ provisioned_throughput: throughputs
39
+ }
40
+ )
41
+ end
42
+
43
+ # @params [String] table_name
44
+ def delete_table(table_name)
45
+ @client.delete_table({ table_name: table_name })
46
+ true
47
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
48
+ false
49
+ end
50
+
51
+ # @params [String] table_name
52
+ # @params [Hash] conditions
53
+ # @return [Hash]
54
+ def get_item(table_name, conditions)
55
+ response = @client.get_item(
56
+ {
57
+ table_name: table_name,
58
+ key: conditions
59
+ }
60
+ )
61
+ response.item
62
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
63
+ nil
64
+ end
65
+
66
+ # @params [String] table_name
67
+ # @params [Hash] item
68
+ # @return [Boolean]
69
+ def put_item(table_name, item)
70
+ @client.put_item(
71
+ {
72
+ table_name: table_name,
73
+ item: item
74
+ }
75
+ )
76
+ true
77
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
78
+ false
79
+ end
80
+
81
+ # @params [String] table_name
82
+ # @params [Hash] conditions
83
+ # @params [String] update_expression
84
+ # @return [Boolean]
85
+ def update_item(table_name, conditions, update_expression)
86
+ @client.update_item(
87
+ {
88
+ table_name: table_name,
89
+ key: conditions,
90
+ update_expression: update_expression
91
+ }
92
+ )
93
+ true
94
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
95
+ false
96
+ end
97
+
98
+ # @params [String] table_name
99
+ # @params [Hash] item
100
+ # @params [String] condition_expression
101
+ # @params [Hash] expression_attributes
102
+ # @return [Boolean]
103
+ def conditional_update_item(table_name, item, condition_expression, expression_attributes)
104
+ @client.put_item(
105
+ {
106
+ table_name: table_name,
107
+ item: item,
108
+ condition_expression: condition_expression,
109
+ expression_attribute_values: expression_attributes
110
+ }
111
+ )
112
+ true
113
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
114
+ false
115
+ end
116
+
117
+ # @params [String] table_name
118
+ # @params [Hash] conditions
119
+ # @return [Boolean]
120
+ def remove_item(table_name, conditions)
121
+ @client.delete_item(
122
+ {
123
+ table_name: table_name,
124
+ key: conditions
125
+ }
126
+ )
127
+ true
128
+ rescue Aws::DynamoDB::Errors::ResourceNotFoundException
129
+ false
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,56 @@
1
+ require 'aws-sdk-kinesis'
2
+
3
+ module Kcl::Proxies
4
+ class KinesisProxy
5
+ attr_reader :client
6
+
7
+ def initialize(config)
8
+ @client = Aws::Kinesis::Client.new(
9
+ {
10
+ access_key_id: config.aws_access_key_id,
11
+ secret_access_key: config.aws_secret_access_key,
12
+ region: config.aws_region,
13
+ endpoint: config.kinesis_endpoint,
14
+ ssl_verify_peer: config.use_ssl
15
+ }
16
+ )
17
+ @stream_name = config.kinesis_stream_name
18
+ end
19
+
20
+ # @return [Array]
21
+ def shards
22
+ res = @client.describe_stream({ stream_name: @stream_name })
23
+ res.stream_description.shards
24
+ end
25
+
26
+ # @param [String] shard_id
27
+ # @param [String] shard_iterator_type
28
+ # @return [String]
29
+ def get_shard_iterator(shard_id, shard_iterator_type = nil, sequence_number = nil)
30
+ params = {
31
+ stream_name: @stream_name,
32
+ shard_id: shard_id,
33
+ shard_iterator_type: shard_iterator_type || Kcl::Checkpoints::Sentinel::LATEST
34
+ }
35
+ if shard_iterator_type == Kcl::Checkpoints::Sentinel::AFTER_SEQUENCE_NUMBER
36
+ params[:starting_sequence_number] = sequence_number
37
+ end
38
+ res = @client.get_shard_iterator(params)
39
+ res.shard_iterator
40
+ end
41
+
42
+ # @param [String] shard_iterator
43
+ # @return [Hash]
44
+ def get_records(shard_iterator)
45
+ res = @client.get_records({ shard_iterator: shard_iterator })
46
+ { records: res.records, next_shard_iterator: res.next_shard_iterator }
47
+ end
48
+
49
+ # @param [Hash] data
50
+ # @return [Hash]
51
+ def put_record(data)
52
+ res = @client.put_record(data)
53
+ { shard_id: res.shard_id, sequence_number: res.sequence_number }
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,13 @@
1
+ class Kcl::RecordProcessor
2
+ def after_initialize(_initialization_input)
3
+ raise NotImplementedError.new("You must implement #{self.class}##{__method__}")
4
+ end
5
+
6
+ def process_records(_records_input)
7
+ raise NotImplementedError.new("You must implement #{self.class}##{__method__}")
8
+ end
9
+
10
+ def shutdown(_shutdown_input)
11
+ raise NotImplementedError.new("You must implement #{self.class}##{__method__}")
12
+ end
13
+ end
@@ -0,0 +1,5 @@
1
+ class Kcl::RecordProcessorFactory
2
+ def create_processor
3
+ raise NotImplementedError.new("You must implement #{self.class}##{__method__}")
4
+ end
5
+ end
@@ -0,0 +1,89 @@
1
+ require 'bigdecimal'
2
+
3
+ module Kcl::Types
4
+ class ExtendedSequenceNumber
5
+ attr_reader :sequence_number, :sub_sequence_number
6
+
7
+ TRIM_HORIZON_VALUE = BigDecimal(-2)
8
+ LATEST_VALUE = BigDecimal(-1)
9
+ AT_TIMESTAMP_VALUE = BigDecimal(-3)
10
+
11
+ # @return [Kcl::Types::ExtendedSequenceNumber]
12
+ def self.latest
13
+ @_latest ||= self.new(Kcl::Checkpoints::Sentinel::LATEST)
14
+ end
15
+
16
+ # @return [Kcl::Types::ExtendedSequenceNumber]
17
+ def self.shard_end
18
+ @_shard_end ||= self.new(Kcl::Checkpoints::Sentinel::SHARD_END)
19
+ end
20
+
21
+ # @return [Kcl::Types::ExtendedSequenceNumber]
22
+ def self.trim_horizon
23
+ @_trim_horizon ||= self.new(Kcl::Checkpoints::Sentinel::TRIM_HORIZON)
24
+ end
25
+
26
+ # @param [String] str
27
+ # @return [Boolean]
28
+ def self.digits_or_sentinel?(str)
29
+ digits?(str) || sentinel?(str)
30
+ end
31
+
32
+ # @param [String] str
33
+ # @return [Boolean]
34
+ def self.sentinel?(str)
35
+ case str
36
+ when Kcl::Checkpoints::Sentinel::TRIM_HORIZON,
37
+ Kcl::Checkpoints::Sentinel::LATEST,
38
+ Kcl::Checkpoints::Sentinel::SHARD_END,
39
+ Kcl::Checkpoints::Sentinel::AT_TIMESTAMP
40
+ true
41
+ else
42
+ false
43
+ end
44
+ end
45
+
46
+ # @param [String] str
47
+ # @return [Boolean]
48
+ def self.digits?(str)
49
+ return false if str.nil? || str.empty?
50
+ (str =~ /\A[0-9]+\z/) != nil
51
+ end
52
+
53
+ # @param [String] sequence_number
54
+ # @param [Number] sub_sequence_number
55
+ def initialize(sequence_number, sub_sequence_number = 0)
56
+ @sequence_number = sequence_number
57
+ @sub_sequence_number = sub_sequence_number
58
+ end
59
+
60
+ # @return [BigDecimal]
61
+ def value
62
+ if self.class.digits?(@sequence_number)
63
+ return BigDecimal(@sequence_number)
64
+ end
65
+
66
+ case @sequence_number
67
+ when Kcl::Checkpoints::Sentinel::LATEST
68
+ LATEST_VALUE
69
+ when Kcl::Checkpoints::Sentinel::TRIM_HORIZON
70
+ TRIM_HORIZON_VALUE
71
+ when Kcl::Checkpoints::Sentinel::AT_TIMESTAMP
72
+ AT_TIMESTAMP_VALUE
73
+ else
74
+ raise Kcl::Errors::IllegalArgumentError.new(
75
+ 'Expected a string of digits, TRIM_HORIZON, LATEST or AT_TIMESTAMP but received ' + @sequence_number
76
+ )
77
+ end
78
+ end
79
+
80
+ # @param [Kcl::Types::ExtendedSequenceNumber] extended_sequence_number
81
+ # @return [Boolean]
82
+ def equals(extended_sequence_number)
83
+ if @sequence_number != extended_sequence_number.sequence_number
84
+ return false
85
+ end
86
+ @sub_sequence_number == extended_sequence_number.sub_sequence_number
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,13 @@
1
+ module Kcl::Types
2
+ # Container for the parameters to the RecordProcessor's method.
3
+ class InitializationInput
4
+ attr_reader :shard_id, :extended_sequence_number
5
+
6
+ # @param [String] shard_id
7
+ # @param [Kcl::Types::ExtendedSequenceNumber] extended_sequence_number
8
+ def initialize(shard_id, extended_sequence_number)
9
+ @shard_id = shard_id
10
+ @extended_sequence_number = extended_sequence_number
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ module Kcl::Types
2
+ # Container for the parameters to the IRecordProcessor's method.
3
+ class RecordsInput
4
+ attr_reader :records, :millis_behind_latest, :record_checkpointer
5
+
6
+ # @param [Array] records
7
+ # @param [Number] millis_behind_latest
8
+ # @param [Kcl::Workers::RecordCheckpointer] record_checkpointer
9
+ def initialize(records, millis_behind_latest, record_checkpointer)
10
+ @records = records
11
+ @millis_behind_latest = millis_behind_latest
12
+ @record_checkpointer = record_checkpointer
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,13 @@
1
+ module Kcl::Types
2
+ # Container for the parameters to the IRecordProcessor's method.
3
+ class ShutdownInput
4
+ attr_reader :shutdown_reason, :record_checkpointer
5
+
6
+ # @param [Kcl::Worker::ShutdownReason] shutdown_reason
7
+ # @param [Kcl::Workers::RecordCheckpointer] record_checkpointer
8
+ def initialize(shutdown_reason, record_checkpointer)
9
+ @shutdown_reason = shutdown_reason
10
+ @record_checkpointer = record_checkpointer
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module Kcl
2
+ VERSION = '1.0.0'.freeze
3
+ end
@@ -0,0 +1,159 @@
1
+ require 'eventmachine'
2
+
3
+ class Kcl::Worker
4
+ PROCESS_INTERVAL = 1 # by sec
5
+
6
+ def self.run(id, record_processor_factory)
7
+ worker = self.new(id, record_processor_factory)
8
+ worker.start
9
+ end
10
+
11
+ def initialize(id, record_processor_factory)
12
+ @id = id
13
+ @record_processor_factory = record_processor_factory
14
+ @live_shards = {} # Map<String, Boolean>
15
+ @shards = {} # Map<String, Kcl::Workers::ShardInfo>
16
+ @kinesis = nil # Kcl::Proxies::KinesisProxy
17
+ @checkpointer = nil # Kcl::Checkpointer
18
+ @timer = nil
19
+ end
20
+
21
+ # Start consuming data from the stream,
22
+ # and pass it to the application record processors.
23
+ def start
24
+ Kcl.logger.info("Start worker at #{object_id}")
25
+
26
+ EM.run do
27
+ trap_signals
28
+
29
+ @timer = EM::PeriodicTimer.new(PROCESS_INTERVAL) do
30
+ sync_shards!
31
+ consume_shards! if available_lease_shard?
32
+ end
33
+ end
34
+
35
+ cleanup
36
+ Kcl.logger.info("Finish worker at #{object_id}")
37
+ rescue => e
38
+ Kcl.logger.error("#{e.class}: #{e.message}")
39
+ raise e
40
+ end
41
+
42
+ # Shutdown gracefully
43
+ def shutdown(signal = :NONE)
44
+ unless @timer.nil?
45
+ @timer.cancel
46
+ @timer = nil
47
+ end
48
+ EM.stop
49
+
50
+ Kcl.logger.info("Shutdown worker with signal #{signal} at #{object_id}")
51
+ rescue => e
52
+ Kcl.logger.error("#{e.class}: #{e.message}")
53
+ raise e
54
+ end
55
+
56
+ # Cleanup resources
57
+ def cleanup
58
+ @live_shards = {}
59
+ @shards = {}
60
+ @kinesis = nil
61
+ @checkpointer = nil
62
+ end
63
+
64
+ # Add new shards and delete unused shards
65
+ def sync_shards!
66
+ @live_shards.transform_values! { |_| false }
67
+
68
+ kinesis.shards.each do |shard|
69
+ @live_shards[shard.shard_id] = true
70
+ next if @shards[shard.shard_id]
71
+ @shards[shard.shard_id] = Kcl::Workers::ShardInfo.new(
72
+ shard.shard_id,
73
+ shard.parent_shard_id,
74
+ shard.sequence_number_range
75
+ )
76
+ Kcl.logger.info("Found new shard at shard_id: #{shard.shard_id}")
77
+ end
78
+
79
+ @live_shards.each do |shard_id, alive|
80
+ next if alive
81
+ checkpointer.remove_lease(@shards[shard_id])
82
+ @shards.delete(shard_id)
83
+ Kcl.logger.info("Remove shard at shard_id: #{shard_id}")
84
+ end
85
+
86
+ @shards
87
+ end
88
+
89
+ # Count the number of leases hold by worker excluding the processed shard
90
+ # @return [Boolean]
91
+ def available_lease_shard?
92
+ leased_count = @shards.values.inject(0) do |num, shard|
93
+ shard.lease_owner == @id && !shard.completed? ? num + 1 : num
94
+ end
95
+ Kcl.config.max_lease_count > leased_count
96
+ end
97
+
98
+ # Process records by shard
99
+ def consume_shards!
100
+ threads = []
101
+ @shards.each do |shard_id, shard|
102
+ # already owner of the shard
103
+ next if shard.lease_owner == @id
104
+
105
+ begin
106
+ shard = checkpointer.fetch_checkpoint(shard)
107
+ rescue Kcl::Errors::CheckpointNotFoundError
108
+ Kcl.logger.info("Not found checkpoint of shard at #{shard.to_h}")
109
+ next
110
+ end
111
+ # shard is closed and processed all records
112
+ next if shard.completed?
113
+
114
+ shard = checkpointer.lease(shard, @id)
115
+
116
+ threads << Thread.new do
117
+ begin
118
+ consumer = Kcl::Workers::Consumer.new(
119
+ shard,
120
+ @record_processor_factory.create_processor,
121
+ kinesis,
122
+ checkpointer
123
+ )
124
+ consumer.consume!
125
+ ensure
126
+ shard = checkpointer.remove_lease_owner(shard)
127
+ Kcl.logger.info("Finish to consume shard at shard_id: #{shard_id}")
128
+ end
129
+ end
130
+ end
131
+ threads.each(&:join)
132
+ end
133
+
134
+ private
135
+
136
+ def kinesis
137
+ if @kinesis.nil?
138
+ @kinesis = Kcl::Proxies::KinesisProxy.new(Kcl.config)
139
+ Kcl.logger.info('Created Kinesis session in worker')
140
+ end
141
+ @kinesis
142
+ end
143
+
144
+ def checkpointer
145
+ if @checkpointer.nil?
146
+ @checkpointer = Kcl::Checkpointer.new(Kcl.config)
147
+ Kcl.logger.info('Created Checkpoint in worker')
148
+ end
149
+ @checkpointer
150
+ end
151
+
152
+ def trap_signals
153
+ [:HUP, :INT, :TERM].each do |signal|
154
+ trap signal do
155
+ EM.add_timer(0) { shutdown(signal) }
156
+ end
157
+ end
158
+ end
159
+ end