ruby-kafka-ec2 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -9
- data/lib/kafka/ec2/mixed_instance_assignment_strategy.rb +36 -36
- data/lib/kafka/ec2/version.rb +1 -1
- data/lib/kafka/ec2.rb +1 -17
- metadata +6 -9
- data/lib/kafka/ec2/ext/consumer_group.rb +0 -33
- data/lib/kafka/ec2/ext/protocol/join_group_request.rb +0 -39
- data/lib/kafka/ec2/mixed_instance_assignment_strategy_factory.rb +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e7732059807b7aad8dfe8df2758fa0dc9ad8f8063adaf9cf71b615c8384e74aa
|
4
|
+
data.tar.gz: feb725eb274ff28e6b3e5827f02d9c1b406c127f7109c6ec6a4001054665b125
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eade4b284de35a438d52b4f18928cb9e287fe65064c1a7baec50545627bd7996835150326e88499b2ee13d4499dd1aa1093471fdd829b8b2a545eecee1799a5b
|
7
|
+
data.tar.gz: fed5703514978a1986a720678d3590c29fcc9359802fe9b98a3a6632f7dcd28cf0f43151f19a8c5a1e1d9a609667a13598b3780589ff57480d928955f6219f8f
|
data/README.md
CHANGED
@@ -24,9 +24,9 @@ Or install it yourself as:
|
|
24
24
|
|
25
25
|
### Kafka::EC2::MixedInstanceAssignmentStrategy
|
26
26
|
|
27
|
-
`Kafka::EC2::MixedInstanceAssignmentStrategy` is an assignor for auto-scaling groups with mixed instance policies. The throughputs of consumers usually depend on instance families and availability zones. For example, if your application writes data to a database, the throughputs of consumers running on the same availability zone as the writer DB instance is higher.
|
27
|
+
`Kafka::EC2::MixedInstanceAssignmentStrategy` is an assignor for auto-scaling groups with mixed instance policies. The throughputs of consumers usually depend on instance families and availability zones. For example, if your application writes data to a database, the throughputs of consumers running on the same availability zone as that of the writer DB instance is higher.
|
28
28
|
|
29
|
-
To assign more partitions to consumers with high throughputs, you have to
|
29
|
+
To assign more partitions to consumers with high throughputs, you have to initialize `Kafka::EC2::MixedInstanceAssignmentStrategy` first like below:
|
30
30
|
|
31
31
|
```ruby
|
32
32
|
require "aws-sdk-rds"
|
@@ -34,7 +34,7 @@ require "kafka"
|
|
34
34
|
require "kafka/ec2"
|
35
35
|
|
36
36
|
rds = Aws::RDS::Client.new(region: "ap-northeast-1")
|
37
|
-
|
37
|
+
assignment_strategy = Kafka::EC2::MixedInstanceAssignmentStrategy.new(
|
38
38
|
instance_family_weights: {
|
39
39
|
"r4" => 1.00,
|
40
40
|
"r5" => 1.20,
|
@@ -68,19 +68,17 @@ assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory
|
|
68
68
|
|
69
69
|
In the preceding example, consumers running on c5 instances will have 1.5x as many partitions compared to consumers running on r4 instances. In a similar way, if the writer DB instance is in ap-northeast-1a, consumers in ap-northeast-1a will have 4x as many partitions compared to consumers in ap-northeast-1c.
|
70
70
|
|
71
|
-
You can use `Kafka::EC2::MixedInstanceAssignmentStrategy` by specifying
|
71
|
+
You can use `Kafka::EC2::MixedInstanceAssignmentStrategy` by specifying it to `Kafka#consumer`:
|
72
72
|
|
73
73
|
|
74
74
|
```ruby
|
75
|
-
consumer =
|
76
|
-
kafka.consumer(group_id: ENV["KAFKA_CONSUMER_GROUP_ID"])
|
77
|
-
end
|
75
|
+
consumer = kafka.consumer(group_id: ENV["KAFKA_CONSUMER_GROUP_ID"], assignment_strategy: assignment_strategy)
|
78
76
|
```
|
79
77
|
|
80
78
|
You can also specify weights for each combination of availability zones and instance families:
|
81
79
|
|
82
80
|
```ruby
|
83
|
-
|
81
|
+
assignment_strategy = Kafka::EC2::MixedInstanceAssignmentStrategy.new(
|
84
82
|
weights: ->() {
|
85
83
|
db_cluster = rds.describe_db_clusters(filters: [
|
86
84
|
{ name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
|
@@ -121,7 +119,7 @@ assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory
|
|
121
119
|
The strategy also has the option `partition_weights`. This is useful when the topic has some skewed partitions. Suppose the partition with ID 0 of the topic "foo" receives twice as many records as other partitions. To reduce the number of partitions assigned to the consumer that consumes the partition with ID 0, specify `partition_weights` like below:
|
122
120
|
|
123
121
|
```ruby
|
124
|
-
|
122
|
+
assignment_strategy = Kafka::EC2::MixedInstanceAssignmentStrategy.new(
|
125
123
|
partition_weights: {
|
126
124
|
"foo" => {
|
127
125
|
0 => 2,
|
@@ -6,10 +6,8 @@ require "kafka/protocol/member_assignment"
|
|
6
6
|
module Kafka
|
7
7
|
class EC2
|
8
8
|
class MixedInstanceAssignmentStrategy
|
9
|
-
|
10
|
-
attr_accessor :member_id_to_metadata
|
9
|
+
DELIMITER = ","
|
11
10
|
|
12
|
-
# @param cluster [Kafka::Cluster]
|
13
11
|
# @param instance_family_weights [Hash{String => Numeric}, Proc] a hash whose the key
|
14
12
|
# is the instance family and whose value is the weight. If the object is a proc,
|
15
13
|
# it must returns such a hash and the proc is called every time the method "assign"
|
@@ -23,22 +21,35 @@ module Kafka
|
|
23
21
|
# instance_family_weights or availability_zone_weights. If the object is a proc,
|
24
22
|
# it must returns such a hash and the proc is called every time the method "assign"
|
25
23
|
# is called.
|
26
|
-
def initialize(
|
27
|
-
@cluster = cluster
|
24
|
+
def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
|
28
25
|
@instance_family_weights = instance_family_weights
|
29
26
|
@availability_zone_weights = availability_zone_weights
|
30
27
|
@weights = weights
|
31
28
|
@partition_weights = partition_weights
|
32
29
|
end
|
33
30
|
|
31
|
+
def protocol_name
|
32
|
+
"mixedinstance"
|
33
|
+
end
|
34
|
+
|
35
|
+
def user_data
|
36
|
+
Net::HTTP.start("169.254.169.254", 80) do |http|
|
37
|
+
[
|
38
|
+
http.get("/latest/meta-data/instance-id").body,
|
39
|
+
http.get("/latest/meta-data/instance-type").body,
|
40
|
+
http.get("/latest/meta-data/placement/availability-zone").body,
|
41
|
+
].join(DELIMITER)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
34
45
|
# Assign the topic partitions to the group members.
|
35
46
|
#
|
36
47
|
# @param members [Array<String>] member ids
|
37
48
|
# @param topics [Array<String>] topics
|
38
49
|
# @return [Hash{String => Protocol::MemberAssignment}] a hash mapping member
|
39
50
|
# ids to assignments.
|
40
|
-
def
|
41
|
-
|
51
|
+
def call(cluster:, members:, partitions:)
|
52
|
+
member_id_to_partitions = Hash.new { |h, k| h[k] = [] }
|
42
53
|
instance_id_to_capacity = Hash.new(0)
|
43
54
|
instance_id_to_member_ids = Hash.new { |h, k| h[k] = [] }
|
44
55
|
total_capacity = 0
|
@@ -47,10 +58,8 @@ module Kafka
|
|
47
58
|
instance_family_to_capacity = @instance_family_weights.is_a?(Proc) ? @instance_family_weights.call() : @instance_family_weights
|
48
59
|
az_to_capacity = @availability_zone_weights.is_a?(Proc) ? @availability_zone_weights.call() : @availability_zone_weights
|
49
60
|
weights = @weights.is_a?(Proc) ? @weights.call() : @weights
|
50
|
-
members.each do |member_id|
|
51
|
-
|
52
|
-
|
53
|
-
instance_id, instance_type, az = member_id_to_metadata[member_id].split(",")
|
61
|
+
members.each do |member_id, metadata|
|
62
|
+
instance_id, instance_type, az = metadata.user_data.split(DELIMITER)
|
54
63
|
instance_id_to_member_ids[instance_id] << member_id
|
55
64
|
member_id_to_instance_id[member_id] = instance_id
|
56
65
|
capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
|
@@ -58,17 +67,8 @@ module Kafka
|
|
58
67
|
total_capacity += capacity
|
59
68
|
end
|
60
69
|
|
61
|
-
|
62
|
-
|
63
|
-
partitions = @cluster.partitions_for(topic).map(&:partition_id)
|
64
|
-
rescue UnknownTopicOrPartition
|
65
|
-
raise UnknownTopicOrPartition, "unknown topic #{topic}"
|
66
|
-
end
|
67
|
-
Array.new(partitions.count) { topic }.zip(partitions)
|
68
|
-
end
|
69
|
-
|
70
|
-
partition_weights = build_partition_weights(topics)
|
71
|
-
partition_weight_per_capacity = topic_partitions.sum { |topic, partition| partition_weights.dig(topic, partition) } / total_capacity
|
70
|
+
partition_weights = build_partition_weights(partitions)
|
71
|
+
partition_weight_per_capacity = partitions.sum { |partition| partition_weights.dig(partition.topic, partition.partition_id) } / total_capacity
|
72
72
|
|
73
73
|
last_index = 0
|
74
74
|
member_id_to_acceptable_partition_weight = {}
|
@@ -77,12 +77,12 @@ module Kafka
|
|
77
77
|
member_ids = instance_id_to_member_ids[instance_id]
|
78
78
|
member_ids.each do |member_id|
|
79
79
|
acceptable_partition_weight = capacity * partition_weight_per_capacity / member_ids.size
|
80
|
-
while last_index <
|
81
|
-
|
82
|
-
partition_weight = partition_weights.dig(topic, partition)
|
80
|
+
while last_index < partitions.size
|
81
|
+
partition = partitions[last_index]
|
82
|
+
partition_weight = partition_weights.dig(partition.topic, partition.partition_id)
|
83
83
|
break if acceptable_partition_weight - partition_weight < 0
|
84
84
|
|
85
|
-
|
85
|
+
member_id_to_partitions[member_id] << partition
|
86
86
|
acceptable_partition_weight -= partition_weight
|
87
87
|
|
88
88
|
last_index += 1
|
@@ -93,7 +93,7 @@ module Kafka
|
|
93
93
|
end
|
94
94
|
end
|
95
95
|
|
96
|
-
while last_index <
|
96
|
+
while last_index < partitions.size
|
97
97
|
max_acceptable_partition_weight = member_id_to_acceptable_partition_weight.values.max
|
98
98
|
member_ids = member_id_to_acceptable_partition_weight.select { |_, w| w == max_acceptable_partition_weight }.keys
|
99
99
|
if member_ids.size == 1
|
@@ -101,17 +101,17 @@ module Kafka
|
|
101
101
|
else
|
102
102
|
member_id = member_ids.max_by { |id| instance_id_to_total_acceptable_partition_weight[member_id_to_instance_id[id]] }
|
103
103
|
end
|
104
|
-
|
105
|
-
|
104
|
+
partition = partitions[last_index]
|
105
|
+
member_id_to_partitions[member_id] << partition
|
106
106
|
|
107
|
-
partition_weight = partition_weights.dig(topic, partition)
|
107
|
+
partition_weight = partition_weights.dig(partition.topic, partition.partition_id)
|
108
108
|
member_id_to_acceptable_partition_weight[member_id] -= partition_weight
|
109
109
|
instance_id_to_total_acceptable_partition_weight[member_id_to_instance_id[member_id]] -= partition_weight
|
110
110
|
|
111
111
|
last_index += 1
|
112
112
|
end
|
113
113
|
|
114
|
-
|
114
|
+
member_id_to_partitions
|
115
115
|
rescue Kafka::LeaderNotAvailable
|
116
116
|
sleep 1
|
117
117
|
retry
|
@@ -126,12 +126,12 @@ module Kafka
|
|
126
126
|
(capacity || instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)).to_f
|
127
127
|
end
|
128
128
|
|
129
|
-
def build_partition_weights(
|
129
|
+
def build_partition_weights(partitions)
|
130
130
|
# Duplicate the weights to not destruct @partition_weights or the return value of @partition_weights
|
131
|
-
weights = (@partition_weights.is_a?(Proc) ? @partition_weights.call
|
132
|
-
|
133
|
-
weights[
|
134
|
-
weights[
|
131
|
+
weights = (@partition_weights.is_a?(Proc) ? @partition_weights.call : @partition_weights).dup
|
132
|
+
partitions.map(&:topic).uniq.each do |topic|
|
133
|
+
weights[topic] = weights[topic].dup || {}
|
134
|
+
weights[topic].default = 1
|
135
135
|
end
|
136
136
|
|
137
137
|
weights
|
data/lib/kafka/ec2/version.rb
CHANGED
data/lib/kafka/ec2.rb
CHANGED
@@ -1,23 +1,7 @@
|
|
1
|
-
require "kafka/ec2/
|
2
|
-
require "kafka/ec2/ext/protocol/join_group_request"
|
3
|
-
require "kafka/ec2/mixed_instance_assignment_strategy_factory"
|
1
|
+
require "kafka/ec2/mixed_instance_assignment_strategy"
|
4
2
|
require "kafka/ec2/version"
|
5
3
|
|
6
4
|
module Kafka
|
7
5
|
class EC2
|
8
|
-
class << self
|
9
|
-
attr_reader :assignment_strategy_factory
|
10
|
-
|
11
|
-
def with_assignment_strategy_factory(factory)
|
12
|
-
@assignment_strategy_factory = factory
|
13
|
-
yield
|
14
|
-
ensure
|
15
|
-
@assignment_strategy_factory = nil
|
16
|
-
end
|
17
|
-
|
18
|
-
def assignment_strategy_classes
|
19
|
-
@assignment_strategy_classes ||= {}
|
20
|
-
end
|
21
|
-
end
|
22
6
|
end
|
23
7
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka-ec2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- abicky
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-kafka
|
@@ -89,10 +89,7 @@ files:
|
|
89
89
|
- bin/console
|
90
90
|
- bin/setup
|
91
91
|
- lib/kafka/ec2.rb
|
92
|
-
- lib/kafka/ec2/ext/consumer_group.rb
|
93
|
-
- lib/kafka/ec2/ext/protocol/join_group_request.rb
|
94
92
|
- lib/kafka/ec2/mixed_instance_assignment_strategy.rb
|
95
|
-
- lib/kafka/ec2/mixed_instance_assignment_strategy_factory.rb
|
96
93
|
- lib/kafka/ec2/version.rb
|
97
94
|
- ruby-kafka-ec2.gemspec
|
98
95
|
homepage: https://github.com/abicky/ruby-kafka-ec2
|
@@ -101,7 +98,7 @@ licenses:
|
|
101
98
|
metadata:
|
102
99
|
homepage_uri: https://github.com/abicky/ruby-kafka-ec2
|
103
100
|
source_code_uri: https://github.com/abicky/ruby-kafka-ec2
|
104
|
-
post_install_message:
|
101
|
+
post_install_message:
|
105
102
|
rdoc_options: []
|
106
103
|
require_paths:
|
107
104
|
- lib
|
@@ -116,8 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
113
|
- !ruby/object:Gem::Version
|
117
114
|
version: '0'
|
118
115
|
requirements: []
|
119
|
-
rubygems_version: 3.
|
120
|
-
signing_key:
|
116
|
+
rubygems_version: 3.2.22
|
117
|
+
signing_key:
|
121
118
|
specification_version: 4
|
122
119
|
summary: An extension of ruby-kafka for EC2
|
123
120
|
test_files: []
|
@@ -1,33 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "kafka/consumer_group"
|
4
|
-
require "kafka/ec2/mixed_instance_assignment_strategy"
|
5
|
-
|
6
|
-
module Kafka
|
7
|
-
class EC2
|
8
|
-
module Ext
|
9
|
-
module ConsumerGroup
|
10
|
-
def initialize(*args, **kwargs)
|
11
|
-
super
|
12
|
-
if Kafka::EC2.assignment_strategy_factory
|
13
|
-
@assignment_strategy = Kafka::EC2.assignment_strategy_factory.create(cluster: @cluster)
|
14
|
-
end
|
15
|
-
Kafka::EC2.assignment_strategy_classes[@group_id] = @assignment_strategy.class
|
16
|
-
end
|
17
|
-
|
18
|
-
def join_group
|
19
|
-
super
|
20
|
-
if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
|
21
|
-
@assignment_strategy.member_id_to_metadata = @members
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
module Kafka
|
30
|
-
class ConsumerGroup
|
31
|
-
prepend Kafka::EC2::Ext::ConsumerGroup
|
32
|
-
end
|
33
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "net/http"
|
4
|
-
|
5
|
-
require "kafka/protocol/consumer_group_protocol"
|
6
|
-
require "kafka/protocol/join_group_request"
|
7
|
-
|
8
|
-
module Kafka
|
9
|
-
class EC2
|
10
|
-
module Ext
|
11
|
-
module Protocol
|
12
|
-
module JoinGroupRequest
|
13
|
-
def initialize(*args, topics: [], **kwargs)
|
14
|
-
super
|
15
|
-
if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
|
16
|
-
user_data = Net::HTTP.start("169.254.169.254", 80) do |http|
|
17
|
-
instance_id = http.get("/latest/meta-data/instance-id").body
|
18
|
-
instance_type = http.get("/latest/meta-data/instance-type").body
|
19
|
-
az = http.get("/latest/meta-data/placement/availability-zone").body
|
20
|
-
"|#{instance_id},#{instance_type},#{az}"
|
21
|
-
end
|
22
|
-
@group_protocols = {
|
23
|
-
"mixedinstance" => Kafka::Protocol::ConsumerGroupProtocol.new(topics: topics, user_data: user_data),
|
24
|
-
}
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
module Kafka
|
34
|
-
module Protocol
|
35
|
-
class JoinGroupRequest
|
36
|
-
prepend Kafka::EC2::Ext::Protocol::JoinGroupRequest
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
@@ -1,30 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "kafka/ec2/mixed_instance_assignment_strategy"
|
4
|
-
|
5
|
-
module Kafka
|
6
|
-
class EC2
|
7
|
-
class MixedInstanceAssignmentStrategyFactory
|
8
|
-
# @param instance_family_weights [Hash, Proc]
|
9
|
-
# @param availability_zone_weights [Hash, Proc]
|
10
|
-
# @param weights [Hash, Proc]
|
11
|
-
# @see Kafka::EC2::MixedInstanceAssignmentStrategy#initialize
|
12
|
-
def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
|
13
|
-
@instance_family_weights = instance_family_weights
|
14
|
-
@availability_zone_weights = availability_zone_weights
|
15
|
-
@weights = weights
|
16
|
-
@partition_weights = partition_weights
|
17
|
-
end
|
18
|
-
|
19
|
-
def create(cluster:)
|
20
|
-
Kafka::EC2::MixedInstanceAssignmentStrategy.new(
|
21
|
-
cluster: cluster,
|
22
|
-
instance_family_weights: @instance_family_weights,
|
23
|
-
availability_zone_weights: @availability_zone_weights,
|
24
|
-
weights: @weights,
|
25
|
-
partition_weights: @partition_weights,
|
26
|
-
)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|