kcl-rb 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/main.yml +58 -0
- data/.gitignore +11 -0
- data/.rubocop.yml +93 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +90 -0
- data/LICENSE.txt +21 -0
- data/README.md +130 -0
- data/Rakefile +2 -0
- data/aws/config +3 -0
- data/aws/credentials +3 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/demo/Gemfile +5 -0
- data/demo/Gemfile.lock +60 -0
- data/demo/README.md +38 -0
- data/demo/Rakefile +31 -0
- data/demo/aws/config +3 -0
- data/demo/aws/credentials +3 -0
- data/demo/docker-compose.yml +23 -0
- data/demo/lib/kcl_demo.rb +49 -0
- data/demo/lib/kcl_demo/demo_record_processor.rb +43 -0
- data/demo/lib/kcl_demo/demo_record_processor_factory.rb +7 -0
- data/demo/terraform/main.tf +35 -0
- data/docker-compose.yml +22 -0
- data/kcl-rb.gemspec +36 -0
- data/lib/kcl.rb +32 -0
- data/lib/kcl/checkpointer.rb +179 -0
- data/lib/kcl/checkpoints/sentinel.rb +17 -0
- data/lib/kcl/config.rb +35 -0
- data/lib/kcl/errors.rb +6 -0
- data/lib/kcl/logger.rb +3 -0
- data/lib/kcl/proxies/dynamo_db_proxy.rb +132 -0
- data/lib/kcl/proxies/kinesis_proxy.rb +56 -0
- data/lib/kcl/record_processor.rb +13 -0
- data/lib/kcl/record_processor_factory.rb +5 -0
- data/lib/kcl/types/extended_sequence_number.rb +89 -0
- data/lib/kcl/types/initialization_input.rb +13 -0
- data/lib/kcl/types/records_input.rb +15 -0
- data/lib/kcl/types/shutdown_input.rb +13 -0
- data/lib/kcl/version.rb +3 -0
- data/lib/kcl/worker.rb +159 -0
- data/lib/kcl/workers/consumer.rb +80 -0
- data/lib/kcl/workers/record_checkpointer.rb +14 -0
- data/lib/kcl/workers/shard_info.rb +47 -0
- data/lib/kcl/workers/shutdown_reason.rb +6 -0
- data/terraform/main.tf +35 -0
- metadata +191 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
# Enumeration of the sentinel values of checkpoints.
|
2
|
+
# Used during initialization of ShardConsumers to determine the starting point
|
3
|
+
# in the shard and to flag that a shard has been completely processed.
|
4
|
+
module Kcl::Checkpoints
|
5
|
+
module Sentinel
|
6
|
+
# Start from the first available record in the shard.
|
7
|
+
TRIM_HORIZON = 'TRIM_HORIZON'.freeze
|
8
|
+
# Start from the latest record in the shard.
|
9
|
+
LATEST = 'LATEST'.freeze
|
10
|
+
# We've completely processed all records in this shard.
|
11
|
+
SHARD_END = 'SHARD_END'.freeze
|
12
|
+
# Start from the record at or after the specified server-side timestamp.
|
13
|
+
AT_TIMESTAMP = 'AT_TIMESTAMP'.freeze
|
14
|
+
# Continue from the sequence number in the shard.
|
15
|
+
AFTER_SEQUENCE_NUMBER = 'AFTER_SEQUENCE_NUMBER'.freeze
|
16
|
+
end
|
17
|
+
end
|
data/lib/kcl/config.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
class Kcl::Config
|
2
|
+
attr_accessor :aws_region,
|
3
|
+
:aws_access_key_id,
|
4
|
+
:aws_secret_access_key,
|
5
|
+
:dynamodb_endpoint,
|
6
|
+
:dynamodb_table_name,
|
7
|
+
:dynamodb_read_capacity,
|
8
|
+
:dynamodb_write_capacity,
|
9
|
+
:dynamodb_failover_seconds,
|
10
|
+
:kinesis_endpoint,
|
11
|
+
:kinesis_stream_name,
|
12
|
+
:logger,
|
13
|
+
:log_level,
|
14
|
+
:max_lease_count,
|
15
|
+
:use_ssl,
|
16
|
+
:worker_count
|
17
|
+
|
18
|
+
# Set default values
|
19
|
+
def initialize
|
20
|
+
@aws_region = nil
|
21
|
+
@aws_access_key_id = nil
|
22
|
+
@aws_secret_access_key = nil
|
23
|
+
@dynamodb_endpoint = 'https://localhost:4566'
|
24
|
+
@dynamodb_table_name = nil
|
25
|
+
@dynamodb_read_capacity = 10
|
26
|
+
@dynamodb_write_capacity = 10
|
27
|
+
@dynamodb_failover_seconds = 10
|
28
|
+
@kinesis_endpoint = 'https://localhost:4566'
|
29
|
+
@kinesis_stream_name = nil
|
30
|
+
@logger = nil
|
31
|
+
@max_lease_count = 1
|
32
|
+
@use_ssl = false
|
33
|
+
@worker_count = 1
|
34
|
+
end
|
35
|
+
end
|
data/lib/kcl/errors.rb
ADDED
data/lib/kcl/logger.rb
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
require 'aws-sdk-dynamodb'
|
2
|
+
|
3
|
+
module Kcl::Proxies
|
4
|
+
class DynamoDbProxy
|
5
|
+
attr_reader :client
|
6
|
+
|
7
|
+
def initialize(config)
|
8
|
+
@client = Aws::DynamoDB::Client.new(
|
9
|
+
{
|
10
|
+
access_key_id: config.aws_access_key_id,
|
11
|
+
secret_access_key: config.aws_secret_access_key,
|
12
|
+
region: config.aws_region,
|
13
|
+
endpoint: config.dynamodb_endpoint,
|
14
|
+
ssl_verify_peer: config.use_ssl
|
15
|
+
}
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
# @params [String] table_name
|
20
|
+
def exists?(table_name)
|
21
|
+
@client.describe_table({ table_name: table_name })
|
22
|
+
true
|
23
|
+
rescue Aws::DynamoDB::Errors::NotFound,
|
24
|
+
Aws::DynamoDB::Errors::ResourceNotFoundException
|
25
|
+
false
|
26
|
+
end
|
27
|
+
|
28
|
+
# @params [String] table_name
|
29
|
+
# @params [Array] attributes
|
30
|
+
# @params [Array] schema
|
31
|
+
# @params [Hash] throughputs
|
32
|
+
def create_table(table_name, attributes = [], schema = [], throughputs = {})
|
33
|
+
@client.create_table(
|
34
|
+
{
|
35
|
+
table_name: table_name,
|
36
|
+
attribute_definitions: attributes,
|
37
|
+
key_schema: schema,
|
38
|
+
provisioned_throughput: throughputs
|
39
|
+
}
|
40
|
+
)
|
41
|
+
end
|
42
|
+
|
43
|
+
# @params [String] table_name
|
44
|
+
def delete_table(table_name)
|
45
|
+
@client.delete_table({ table_name: table_name })
|
46
|
+
true
|
47
|
+
rescue Aws::DynamoDB::Errors::ResourceNotFoundException
|
48
|
+
false
|
49
|
+
end
|
50
|
+
|
51
|
+
# @params [String] table_name
|
52
|
+
# @params [Hash] conditions
|
53
|
+
# @return [Hash]
|
54
|
+
def get_item(table_name, conditions)
|
55
|
+
response = @client.get_item(
|
56
|
+
{
|
57
|
+
table_name: table_name,
|
58
|
+
key: conditions
|
59
|
+
}
|
60
|
+
)
|
61
|
+
response.item
|
62
|
+
rescue Aws::DynamoDB::Errors::ResourceNotFoundException
|
63
|
+
nil
|
64
|
+
end
|
65
|
+
|
66
|
+
# @params [String] table_name
|
67
|
+
# @params [Hash] item
|
68
|
+
# @return [Boolean]
|
69
|
+
def put_item(table_name, item)
|
70
|
+
@client.put_item(
|
71
|
+
{
|
72
|
+
table_name: table_name,
|
73
|
+
item: item
|
74
|
+
}
|
75
|
+
)
|
76
|
+
true
|
77
|
+
rescue Aws::DynamoDB::Errors::ResourceNotFoundException
|
78
|
+
false
|
79
|
+
end
|
80
|
+
|
81
|
+
# @params [String] table_name
|
82
|
+
# @params [Hash] conditions
|
83
|
+
# @params [String] update_expression
|
84
|
+
# @return [Boolean]
|
85
|
+
def update_item(table_name, conditions, update_expression)
|
86
|
+
@client.update_item(
|
87
|
+
{
|
88
|
+
table_name: table_name,
|
89
|
+
key: conditions,
|
90
|
+
update_expression: update_expression
|
91
|
+
}
|
92
|
+
)
|
93
|
+
true
|
94
|
+
rescue Aws::DynamoDB::Errors::ResourceNotFoundException
|
95
|
+
false
|
96
|
+
end
|
97
|
+
|
98
|
+
# @params [String] table_name
|
99
|
+
# @params [Hash] item
|
100
|
+
# @params [String] condition_expression
|
101
|
+
# @params [Hash] expression_attributes
|
102
|
+
# @return [Boolean]
|
103
|
+
def conditional_update_item(table_name, item, condition_expression, expression_attributes)
|
104
|
+
@client.put_item(
|
105
|
+
{
|
106
|
+
table_name: table_name,
|
107
|
+
item: item,
|
108
|
+
condition_expression: condition_expression,
|
109
|
+
expression_attribute_values: expression_attributes
|
110
|
+
}
|
111
|
+
)
|
112
|
+
true
|
113
|
+
rescue Aws::DynamoDB::Errors::ResourceNotFoundException
|
114
|
+
false
|
115
|
+
end
|
116
|
+
|
117
|
+
# @params [String] table_name
|
118
|
+
# @params [Hash] conditions
|
119
|
+
# @return [Boolean]
|
120
|
+
def remove_item(table_name, conditions)
|
121
|
+
@client.delete_item(
|
122
|
+
{
|
123
|
+
table_name: table_name,
|
124
|
+
key: conditions
|
125
|
+
}
|
126
|
+
)
|
127
|
+
true
|
128
|
+
rescue Aws::DynamoDB::Errors::ResourceNotFoundException
|
129
|
+
false
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'aws-sdk-kinesis'
|
2
|
+
|
3
|
+
module Kcl::Proxies
|
4
|
+
class KinesisProxy
|
5
|
+
attr_reader :client
|
6
|
+
|
7
|
+
def initialize(config)
|
8
|
+
@client = Aws::Kinesis::Client.new(
|
9
|
+
{
|
10
|
+
access_key_id: config.aws_access_key_id,
|
11
|
+
secret_access_key: config.aws_secret_access_key,
|
12
|
+
region: config.aws_region,
|
13
|
+
endpoint: config.kinesis_endpoint,
|
14
|
+
ssl_verify_peer: config.use_ssl
|
15
|
+
}
|
16
|
+
)
|
17
|
+
@stream_name = config.kinesis_stream_name
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [Array]
|
21
|
+
def shards
|
22
|
+
res = @client.describe_stream({ stream_name: @stream_name })
|
23
|
+
res.stream_description.shards
|
24
|
+
end
|
25
|
+
|
26
|
+
# @param [String] shard_id
|
27
|
+
# @param [String] shard_iterator_type
|
28
|
+
# @return [String]
|
29
|
+
def get_shard_iterator(shard_id, shard_iterator_type = nil, sequence_number = nil)
|
30
|
+
params = {
|
31
|
+
stream_name: @stream_name,
|
32
|
+
shard_id: shard_id,
|
33
|
+
shard_iterator_type: shard_iterator_type || Kcl::Checkpoints::Sentinel::LATEST
|
34
|
+
}
|
35
|
+
if shard_iterator_type == Kcl::Checkpoints::Sentinel::AFTER_SEQUENCE_NUMBER
|
36
|
+
params[:starting_sequence_number] = sequence_number
|
37
|
+
end
|
38
|
+
res = @client.get_shard_iterator(params)
|
39
|
+
res.shard_iterator
|
40
|
+
end
|
41
|
+
|
42
|
+
# @param [String] shard_iterator
|
43
|
+
# @return [Hash]
|
44
|
+
def get_records(shard_iterator)
|
45
|
+
res = @client.get_records({ shard_iterator: shard_iterator })
|
46
|
+
{ records: res.records, next_shard_iterator: res.next_shard_iterator }
|
47
|
+
end
|
48
|
+
|
49
|
+
# @param [Hash] data
|
50
|
+
# @return [Hash]
|
51
|
+
def put_record(data)
|
52
|
+
res = @client.put_record(data)
|
53
|
+
{ shard_id: res.shard_id, sequence_number: res.sequence_number }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class Kcl::RecordProcessor
|
2
|
+
def after_initialize(_initialization_input)
|
3
|
+
raise NotImplementedError.new("You must implement #{self.class}##{__method__}")
|
4
|
+
end
|
5
|
+
|
6
|
+
def process_records(_records_input)
|
7
|
+
raise NotImplementedError.new("You must implement #{self.class}##{__method__}")
|
8
|
+
end
|
9
|
+
|
10
|
+
def shutdown(_shutdown_input)
|
11
|
+
raise NotImplementedError.new("You must implement #{self.class}##{__method__}")
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'bigdecimal'
|
2
|
+
|
3
|
+
module Kcl::Types
|
4
|
+
class ExtendedSequenceNumber
|
5
|
+
attr_reader :sequence_number, :sub_sequence_number
|
6
|
+
|
7
|
+
TRIM_HORIZON_VALUE = BigDecimal(-2)
|
8
|
+
LATEST_VALUE = BigDecimal(-1)
|
9
|
+
AT_TIMESTAMP_VALUE = BigDecimal(-3)
|
10
|
+
|
11
|
+
# @return [Kcl::Types::ExtendedSequenceNumber]
|
12
|
+
def self.latest
|
13
|
+
@_latest ||= self.new(Kcl::Checkpoints::Sentinel::LATEST)
|
14
|
+
end
|
15
|
+
|
16
|
+
# @return [Kcl::Types::ExtendedSequenceNumber]
|
17
|
+
def self.shard_end
|
18
|
+
@_shard_end ||= self.new(Kcl::Checkpoints::Sentinel::SHARD_END)
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [Kcl::Types::ExtendedSequenceNumber]
|
22
|
+
def self.trim_horizon
|
23
|
+
@_trim_horizon ||= self.new(Kcl::Checkpoints::Sentinel::TRIM_HORIZON)
|
24
|
+
end
|
25
|
+
|
26
|
+
# @param [String] str
|
27
|
+
# @return [Boolean]
|
28
|
+
def self.digits_or_sentinel?(str)
|
29
|
+
digits?(str) || sentinel?(str)
|
30
|
+
end
|
31
|
+
|
32
|
+
# @param [String] str
|
33
|
+
# @return [Boolean]
|
34
|
+
def self.sentinel?(str)
|
35
|
+
case str
|
36
|
+
when Kcl::Checkpoints::Sentinel::TRIM_HORIZON,
|
37
|
+
Kcl::Checkpoints::Sentinel::LATEST,
|
38
|
+
Kcl::Checkpoints::Sentinel::SHARD_END,
|
39
|
+
Kcl::Checkpoints::Sentinel::AT_TIMESTAMP
|
40
|
+
true
|
41
|
+
else
|
42
|
+
false
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# @param [String] str
|
47
|
+
# @return [Boolean]
|
48
|
+
def self.digits?(str)
|
49
|
+
return false if str.nil? || str.empty?
|
50
|
+
(str =~ /\A[0-9]+\z/) != nil
|
51
|
+
end
|
52
|
+
|
53
|
+
# @param [String] sequence_number
|
54
|
+
# @param [Number] sub_sequence_number
|
55
|
+
def initialize(sequence_number, sub_sequence_number = 0)
|
56
|
+
@sequence_number = sequence_number
|
57
|
+
@sub_sequence_number = sub_sequence_number
|
58
|
+
end
|
59
|
+
|
60
|
+
# @return [BigDecimal]
|
61
|
+
def value
|
62
|
+
if self.class.digits?(@sequence_number)
|
63
|
+
return BigDecimal(@sequence_number)
|
64
|
+
end
|
65
|
+
|
66
|
+
case @sequence_number
|
67
|
+
when Kcl::Checkpoints::Sentinel::LATEST
|
68
|
+
LATEST_VALUE
|
69
|
+
when Kcl::Checkpoints::Sentinel::TRIM_HORIZON
|
70
|
+
TRIM_HORIZON_VALUE
|
71
|
+
when Kcl::Checkpoints::Sentinel::AT_TIMESTAMP
|
72
|
+
AT_TIMESTAMP_VALUE
|
73
|
+
else
|
74
|
+
raise Kcl::Errors::IllegalArgumentError.new(
|
75
|
+
'Expected a string of digits, TRIM_HORIZON, LATEST or AT_TIMESTAMP but received ' + @sequence_number
|
76
|
+
)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# @param [Kcl::Types::ExtendedSequenceNumber] extended_sequence_number
|
81
|
+
# @return [Boolean]
|
82
|
+
def equals(extended_sequence_number)
|
83
|
+
if @sequence_number != extended_sequence_number.sequence_number
|
84
|
+
return false
|
85
|
+
end
|
86
|
+
@sub_sequence_number == extended_sequence_number.sub_sequence_number
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Kcl::Types
|
2
|
+
# Container for the parameters to the RecordProcessor's method.
|
3
|
+
class InitializationInput
|
4
|
+
attr_reader :shard_id, :extended_sequence_number
|
5
|
+
|
6
|
+
# @param [String] shard_id
|
7
|
+
# @param [Kcl::Types::ExtendedSequenceNumber] extended_sequence_number
|
8
|
+
def initialize(shard_id, extended_sequence_number)
|
9
|
+
@shard_id = shard_id
|
10
|
+
@extended_sequence_number = extended_sequence_number
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Kcl::Types
|
2
|
+
# Container for the parameters to the IRecordProcessor's method.
|
3
|
+
class RecordsInput
|
4
|
+
attr_reader :records, :millis_behind_latest, :record_checkpointer
|
5
|
+
|
6
|
+
# @param [Array] records
|
7
|
+
# @param [Number] millis_behind_latest
|
8
|
+
# @param [Kcl::Workers::RecordCheckpointer] record_checkpointer
|
9
|
+
def initialize(records, millis_behind_latest, record_checkpointer)
|
10
|
+
@records = records
|
11
|
+
@millis_behind_latest = millis_behind_latest
|
12
|
+
@record_checkpointer = record_checkpointer
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Kcl::Types
|
2
|
+
# Container for the parameters to the IRecordProcessor's method.
|
3
|
+
class ShutdownInput
|
4
|
+
attr_reader :shutdown_reason, :record_checkpointer
|
5
|
+
|
6
|
+
# @param [Kcl::Worker::ShutdownReason] shutdown_reason
|
7
|
+
# @param [Kcl::Workers::RecordCheckpointer] record_checkpointer
|
8
|
+
def initialize(shutdown_reason, record_checkpointer)
|
9
|
+
@shutdown_reason = shutdown_reason
|
10
|
+
@record_checkpointer = record_checkpointer
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/kcl/version.rb
ADDED
data/lib/kcl/worker.rb
ADDED
@@ -0,0 +1,159 @@
|
|
1
|
+
require 'eventmachine'
|
2
|
+
|
3
|
+
class Kcl::Worker
|
4
|
+
PROCESS_INTERVAL = 1 # by sec
|
5
|
+
|
6
|
+
def self.run(id, record_processor_factory)
|
7
|
+
worker = self.new(id, record_processor_factory)
|
8
|
+
worker.start
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(id, record_processor_factory)
|
12
|
+
@id = id
|
13
|
+
@record_processor_factory = record_processor_factory
|
14
|
+
@live_shards = {} # Map<String, Boolean>
|
15
|
+
@shards = {} # Map<String, Kcl::Workers::ShardInfo>
|
16
|
+
@kinesis = nil # Kcl::Proxies::KinesisProxy
|
17
|
+
@checkpointer = nil # Kcl::Checkpointer
|
18
|
+
@timer = nil
|
19
|
+
end
|
20
|
+
|
21
|
+
# Start consuming data from the stream,
|
22
|
+
# and pass it to the application record processors.
|
23
|
+
def start
|
24
|
+
Kcl.logger.info("Start worker at #{object_id}")
|
25
|
+
|
26
|
+
EM.run do
|
27
|
+
trap_signals
|
28
|
+
|
29
|
+
@timer = EM::PeriodicTimer.new(PROCESS_INTERVAL) do
|
30
|
+
sync_shards!
|
31
|
+
consume_shards! if available_lease_shard?
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
cleanup
|
36
|
+
Kcl.logger.info("Finish worker at #{object_id}")
|
37
|
+
rescue => e
|
38
|
+
Kcl.logger.error("#{e.class}: #{e.message}")
|
39
|
+
raise e
|
40
|
+
end
|
41
|
+
|
42
|
+
# Shutdown gracefully
|
43
|
+
def shutdown(signal = :NONE)
|
44
|
+
unless @timer.nil?
|
45
|
+
@timer.cancel
|
46
|
+
@timer = nil
|
47
|
+
end
|
48
|
+
EM.stop
|
49
|
+
|
50
|
+
Kcl.logger.info("Shutdown worker with signal #{signal} at #{object_id}")
|
51
|
+
rescue => e
|
52
|
+
Kcl.logger.error("#{e.class}: #{e.message}")
|
53
|
+
raise e
|
54
|
+
end
|
55
|
+
|
56
|
+
# Cleanup resources
|
57
|
+
def cleanup
|
58
|
+
@live_shards = {}
|
59
|
+
@shards = {}
|
60
|
+
@kinesis = nil
|
61
|
+
@checkpointer = nil
|
62
|
+
end
|
63
|
+
|
64
|
+
# Add new shards and delete unused shards
|
65
|
+
def sync_shards!
|
66
|
+
@live_shards.transform_values! { |_| false }
|
67
|
+
|
68
|
+
kinesis.shards.each do |shard|
|
69
|
+
@live_shards[shard.shard_id] = true
|
70
|
+
next if @shards[shard.shard_id]
|
71
|
+
@shards[shard.shard_id] = Kcl::Workers::ShardInfo.new(
|
72
|
+
shard.shard_id,
|
73
|
+
shard.parent_shard_id,
|
74
|
+
shard.sequence_number_range
|
75
|
+
)
|
76
|
+
Kcl.logger.info("Found new shard at shard_id: #{shard.shard_id}")
|
77
|
+
end
|
78
|
+
|
79
|
+
@live_shards.each do |shard_id, alive|
|
80
|
+
next if alive
|
81
|
+
checkpointer.remove_lease(@shards[shard_id])
|
82
|
+
@shards.delete(shard_id)
|
83
|
+
Kcl.logger.info("Remove shard at shard_id: #{shard_id}")
|
84
|
+
end
|
85
|
+
|
86
|
+
@shards
|
87
|
+
end
|
88
|
+
|
89
|
+
# Count the number of leases hold by worker excluding the processed shard
|
90
|
+
# @return [Boolean]
|
91
|
+
def available_lease_shard?
|
92
|
+
leased_count = @shards.values.inject(0) do |num, shard|
|
93
|
+
shard.lease_owner == @id && !shard.completed? ? num + 1 : num
|
94
|
+
end
|
95
|
+
Kcl.config.max_lease_count > leased_count
|
96
|
+
end
|
97
|
+
|
98
|
+
# Process records by shard
|
99
|
+
def consume_shards!
|
100
|
+
threads = []
|
101
|
+
@shards.each do |shard_id, shard|
|
102
|
+
# already owner of the shard
|
103
|
+
next if shard.lease_owner == @id
|
104
|
+
|
105
|
+
begin
|
106
|
+
shard = checkpointer.fetch_checkpoint(shard)
|
107
|
+
rescue Kcl::Errors::CheckpointNotFoundError
|
108
|
+
Kcl.logger.info("Not found checkpoint of shard at #{shard.to_h}")
|
109
|
+
next
|
110
|
+
end
|
111
|
+
# shard is closed and processed all records
|
112
|
+
next if shard.completed?
|
113
|
+
|
114
|
+
shard = checkpointer.lease(shard, @id)
|
115
|
+
|
116
|
+
threads << Thread.new do
|
117
|
+
begin
|
118
|
+
consumer = Kcl::Workers::Consumer.new(
|
119
|
+
shard,
|
120
|
+
@record_processor_factory.create_processor,
|
121
|
+
kinesis,
|
122
|
+
checkpointer
|
123
|
+
)
|
124
|
+
consumer.consume!
|
125
|
+
ensure
|
126
|
+
shard = checkpointer.remove_lease_owner(shard)
|
127
|
+
Kcl.logger.info("Finish to consume shard at shard_id: #{shard_id}")
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
threads.each(&:join)
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def kinesis
|
137
|
+
if @kinesis.nil?
|
138
|
+
@kinesis = Kcl::Proxies::KinesisProxy.new(Kcl.config)
|
139
|
+
Kcl.logger.info('Created Kinesis session in worker')
|
140
|
+
end
|
141
|
+
@kinesis
|
142
|
+
end
|
143
|
+
|
144
|
+
def checkpointer
|
145
|
+
if @checkpointer.nil?
|
146
|
+
@checkpointer = Kcl::Checkpointer.new(Kcl.config)
|
147
|
+
Kcl.logger.info('Created Checkpoint in worker')
|
148
|
+
end
|
149
|
+
@checkpointer
|
150
|
+
end
|
151
|
+
|
152
|
+
def trap_signals
|
153
|
+
[:HUP, :INT, :TERM].each do |signal|
|
154
|
+
trap signal do
|
155
|
+
EM.add_timer(0) { shutdown(signal) }
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|