ruby-kafka-ec2 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -9
- data/lib/kafka/ec2/mixed_instance_assignment_strategy.rb +57 -44
- data/lib/kafka/ec2/version.rb +1 -1
- data/lib/kafka/ec2.rb +1 -17
- metadata +6 -9
- data/lib/kafka/ec2/ext/consumer_group.rb +0 -33
- data/lib/kafka/ec2/ext/protocol/join_group_request.rb +0 -39
- data/lib/kafka/ec2/mixed_instance_assignment_strategy_factory.rb +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e7732059807b7aad8dfe8df2758fa0dc9ad8f8063adaf9cf71b615c8384e74aa
|
4
|
+
data.tar.gz: feb725eb274ff28e6b3e5827f02d9c1b406c127f7109c6ec6a4001054665b125
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eade4b284de35a438d52b4f18928cb9e287fe65064c1a7baec50545627bd7996835150326e88499b2ee13d4499dd1aa1093471fdd829b8b2a545eecee1799a5b
|
7
|
+
data.tar.gz: fed5703514978a1986a720678d3590c29fcc9359802fe9b98a3a6632f7dcd28cf0f43151f19a8c5a1e1d9a609667a13598b3780589ff57480d928955f6219f8f
|
data/README.md
CHANGED
@@ -24,9 +24,9 @@ Or install it yourself as:
|
|
24
24
|
|
25
25
|
### Kafka::EC2::MixedInstanceAssignmentStrategy
|
26
26
|
|
27
|
-
`Kafka::EC2::MixedInstanceAssignmentStrategy` is an assignor for auto-scaling groups with mixed instance policies. The throughputs of consumers usually depend on instance families and availability zones. For example, if your application writes data to a database, the throughputs of consumers running on the same availability zone as the writer DB instance is higher.
|
27
|
+
`Kafka::EC2::MixedInstanceAssignmentStrategy` is an assignor for auto-scaling groups with mixed instance policies. The throughputs of consumers usually depend on instance families and availability zones. For example, if your application writes data to a database, the throughputs of consumers running on the same availability zone as that of the writer DB instance is higher.
|
28
28
|
|
29
|
-
To assign more partitions to consumers with high throughputs, you have to
|
29
|
+
To assign more partitions to consumers with high throughputs, you have to initialize `Kafka::EC2::MixedInstanceAssignmentStrategy` first like below:
|
30
30
|
|
31
31
|
```ruby
|
32
32
|
require "aws-sdk-rds"
|
@@ -34,7 +34,7 @@ require "kafka"
|
|
34
34
|
require "kafka/ec2"
|
35
35
|
|
36
36
|
rds = Aws::RDS::Client.new(region: "ap-northeast-1")
|
37
|
-
|
37
|
+
assignment_strategy = Kafka::EC2::MixedInstanceAssignmentStrategy.new(
|
38
38
|
instance_family_weights: {
|
39
39
|
"r4" => 1.00,
|
40
40
|
"r5" => 1.20,
|
@@ -68,19 +68,17 @@ assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory
|
|
68
68
|
|
69
69
|
In the preceding example, consumers running on c5 instances will have 1.5x as many partitions compared to consumers running on r4 instances. In a similar way, if the writer DB instance is in ap-northeast-1a, consumers in ap-northeast-1a will have 4x as many partitions compared to consumers in ap-northeast-1c.
|
70
70
|
|
71
|
-
You can use `Kafka::EC2::MixedInstanceAssignmentStrategy` by specifying
|
71
|
+
You can use `Kafka::EC2::MixedInstanceAssignmentStrategy` by specifying it to `Kafka#consumer`:
|
72
72
|
|
73
73
|
|
74
74
|
```ruby
|
75
|
-
consumer =
|
76
|
-
kafka.consumer(group_id: ENV["KAFKA_CONSUMER_GROUP_ID"])
|
77
|
-
end
|
75
|
+
consumer = kafka.consumer(group_id: ENV["KAFKA_CONSUMER_GROUP_ID"], assignment_strategy: assignment_strategy)
|
78
76
|
```
|
79
77
|
|
80
78
|
You can also specify weights for each combination of availability zones and instance families:
|
81
79
|
|
82
80
|
```ruby
|
83
|
-
|
81
|
+
assignment_strategy = Kafka::EC2::MixedInstanceAssignmentStrategy.new(
|
84
82
|
weights: ->() {
|
85
83
|
db_cluster = rds.describe_db_clusters(filters: [
|
86
84
|
{ name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
|
@@ -121,7 +119,7 @@ assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory
|
|
121
119
|
The strategy also has the option `partition_weights`. This is useful when the topic has some skewed partitions. Suppose the partition with ID 0 of the topic "foo" receives twice as many records as other partitions. To reduce the number of partitions assigned to the consumer that consumes the partition with ID 0, specify `partition_weights` like below:
|
122
120
|
|
123
121
|
```ruby
|
124
|
-
|
122
|
+
assignment_strategy = Kafka::EC2::MixedInstanceAssignmentStrategy.new(
|
125
123
|
partition_weights: {
|
126
124
|
"foo" => {
|
127
125
|
0 => 2,
|
@@ -6,10 +6,8 @@ require "kafka/protocol/member_assignment"
|
|
6
6
|
module Kafka
|
7
7
|
class EC2
|
8
8
|
class MixedInstanceAssignmentStrategy
|
9
|
-
|
10
|
-
attr_accessor :member_id_to_metadata
|
9
|
+
DELIMITER = ","
|
11
10
|
|
12
|
-
# @param cluster [Kafka::Cluster]
|
13
11
|
# @param instance_family_weights [Hash{String => Numeric}, Proc] a hash whose the key
|
14
12
|
# is the instance family and whose value is the weight. If the object is a proc,
|
15
13
|
# it must returns such a hash and the proc is called every time the method "assign"
|
@@ -23,82 +21,97 @@ module Kafka
|
|
23
21
|
# instance_family_weights or availability_zone_weights. If the object is a proc,
|
24
22
|
# it must returns such a hash and the proc is called every time the method "assign"
|
25
23
|
# is called.
|
26
|
-
def initialize(
|
27
|
-
@cluster = cluster
|
24
|
+
def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
|
28
25
|
@instance_family_weights = instance_family_weights
|
29
26
|
@availability_zone_weights = availability_zone_weights
|
30
27
|
@weights = weights
|
31
28
|
@partition_weights = partition_weights
|
32
29
|
end
|
33
30
|
|
31
|
+
def protocol_name
|
32
|
+
"mixedinstance"
|
33
|
+
end
|
34
|
+
|
35
|
+
def user_data
|
36
|
+
Net::HTTP.start("169.254.169.254", 80) do |http|
|
37
|
+
[
|
38
|
+
http.get("/latest/meta-data/instance-id").body,
|
39
|
+
http.get("/latest/meta-data/instance-type").body,
|
40
|
+
http.get("/latest/meta-data/placement/availability-zone").body,
|
41
|
+
].join(DELIMITER)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
34
45
|
# Assign the topic partitions to the group members.
|
35
46
|
#
|
36
47
|
# @param members [Array<String>] member ids
|
37
48
|
# @param topics [Array<String>] topics
|
38
49
|
# @return [Hash{String => Protocol::MemberAssignment}] a hash mapping member
|
39
50
|
# ids to assignments.
|
40
|
-
def
|
41
|
-
|
51
|
+
def call(cluster:, members:, partitions:)
|
52
|
+
member_id_to_partitions = Hash.new { |h, k| h[k] = [] }
|
42
53
|
instance_id_to_capacity = Hash.new(0)
|
43
54
|
instance_id_to_member_ids = Hash.new { |h, k| h[k] = [] }
|
44
55
|
total_capacity = 0
|
56
|
+
member_id_to_instance_id = {}
|
45
57
|
|
46
58
|
instance_family_to_capacity = @instance_family_weights.is_a?(Proc) ? @instance_family_weights.call() : @instance_family_weights
|
47
59
|
az_to_capacity = @availability_zone_weights.is_a?(Proc) ? @availability_zone_weights.call() : @availability_zone_weights
|
48
60
|
weights = @weights.is_a?(Proc) ? @weights.call() : @weights
|
49
|
-
members.each do |member_id|
|
50
|
-
|
51
|
-
|
52
|
-
instance_id, instance_type, az = member_id_to_metadata[member_id].split(",")
|
61
|
+
members.each do |member_id, metadata|
|
62
|
+
instance_id, instance_type, az = metadata.user_data.split(DELIMITER)
|
53
63
|
instance_id_to_member_ids[instance_id] << member_id
|
64
|
+
member_id_to_instance_id[member_id] = instance_id
|
54
65
|
capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
|
55
66
|
instance_id_to_capacity[instance_id] += capacity
|
56
67
|
total_capacity += capacity
|
57
68
|
end
|
58
69
|
|
59
|
-
|
60
|
-
|
61
|
-
partitions = @cluster.partitions_for(topic).map(&:partition_id)
|
62
|
-
rescue UnknownTopicOrPartition
|
63
|
-
raise UnknownTopicOrPartition, "unknown topic #{topic}"
|
64
|
-
end
|
65
|
-
Array.new(partitions.count) { topic }.zip(partitions)
|
66
|
-
end
|
67
|
-
|
68
|
-
partition_weights = build_partition_weights(topics)
|
69
|
-
partition_weight_per_capacity = topic_partitions.sum { |topic, partition| partition_weights.dig(topic, partition) } / total_capacity
|
70
|
+
partition_weights = build_partition_weights(partitions)
|
71
|
+
partition_weight_per_capacity = partitions.sum { |partition| partition_weights.dig(partition.topic, partition.partition_id) } / total_capacity
|
70
72
|
|
71
73
|
last_index = 0
|
72
74
|
member_id_to_acceptable_partition_weight = {}
|
75
|
+
instance_id_to_total_acceptable_partition_weight = Hash.new(0)
|
73
76
|
instance_id_to_capacity.each do |instance_id, capacity|
|
74
77
|
member_ids = instance_id_to_member_ids[instance_id]
|
75
78
|
member_ids.each do |member_id|
|
76
79
|
acceptable_partition_weight = capacity * partition_weight_per_capacity / member_ids.size
|
77
|
-
|
78
|
-
|
79
|
-
partition_weight = partition_weights.dig(topic, partition)
|
80
|
-
if
|
81
|
-
|
82
|
-
|
83
|
-
end
|
84
|
-
|
85
|
-
group_assignment[member_id].assign(topic, [partition])
|
86
|
-
last_index += 1
|
80
|
+
while last_index < partitions.size
|
81
|
+
partition = partitions[last_index]
|
82
|
+
partition_weight = partition_weights.dig(partition.topic, partition.partition_id)
|
83
|
+
break if acceptable_partition_weight - partition_weight < 0
|
84
|
+
|
85
|
+
member_id_to_partitions[member_id] << partition
|
87
86
|
acceptable_partition_weight -= partition_weight
|
87
|
+
|
88
|
+
last_index += 1
|
88
89
|
end
|
90
|
+
|
91
|
+
member_id_to_acceptable_partition_weight[member_id] = acceptable_partition_weight
|
92
|
+
instance_id_to_total_acceptable_partition_weight[instance_id] += acceptable_partition_weight
|
89
93
|
end
|
90
94
|
end
|
91
95
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
96
|
+
while last_index < partitions.size
|
97
|
+
max_acceptable_partition_weight = member_id_to_acceptable_partition_weight.values.max
|
98
|
+
member_ids = member_id_to_acceptable_partition_weight.select { |_, w| w == max_acceptable_partition_weight }.keys
|
99
|
+
if member_ids.size == 1
|
100
|
+
member_id = member_ids.first
|
101
|
+
else
|
102
|
+
member_id = member_ids.max_by { |id| instance_id_to_total_acceptable_partition_weight[member_id_to_instance_id[id]] }
|
98
103
|
end
|
104
|
+
partition = partitions[last_index]
|
105
|
+
member_id_to_partitions[member_id] << partition
|
106
|
+
|
107
|
+
partition_weight = partition_weights.dig(partition.topic, partition.partition_id)
|
108
|
+
member_id_to_acceptable_partition_weight[member_id] -= partition_weight
|
109
|
+
instance_id_to_total_acceptable_partition_weight[member_id_to_instance_id[member_id]] -= partition_weight
|
110
|
+
|
111
|
+
last_index += 1
|
99
112
|
end
|
100
113
|
|
101
|
-
|
114
|
+
member_id_to_partitions
|
102
115
|
rescue Kafka::LeaderNotAvailable
|
103
116
|
sleep 1
|
104
117
|
retry
|
@@ -113,12 +126,12 @@ module Kafka
|
|
113
126
|
(capacity || instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)).to_f
|
114
127
|
end
|
115
128
|
|
116
|
-
def build_partition_weights(
|
129
|
+
def build_partition_weights(partitions)
|
117
130
|
# Duplicate the weights to not destruct @partition_weights or the return value of @partition_weights
|
118
|
-
weights = (@partition_weights.is_a?(Proc) ? @partition_weights.call
|
119
|
-
|
120
|
-
weights[
|
121
|
-
weights[
|
131
|
+
weights = (@partition_weights.is_a?(Proc) ? @partition_weights.call : @partition_weights).dup
|
132
|
+
partitions.map(&:topic).uniq.each do |topic|
|
133
|
+
weights[topic] = weights[topic].dup || {}
|
134
|
+
weights[topic].default = 1
|
122
135
|
end
|
123
136
|
|
124
137
|
weights
|
data/lib/kafka/ec2/version.rb
CHANGED
data/lib/kafka/ec2.rb
CHANGED
@@ -1,23 +1,7 @@
|
|
1
|
-
require "kafka/ec2/
|
2
|
-
require "kafka/ec2/ext/protocol/join_group_request"
|
3
|
-
require "kafka/ec2/mixed_instance_assignment_strategy_factory"
|
1
|
+
require "kafka/ec2/mixed_instance_assignment_strategy"
|
4
2
|
require "kafka/ec2/version"
|
5
3
|
|
6
4
|
module Kafka
|
7
5
|
class EC2
|
8
|
-
class << self
|
9
|
-
attr_reader :assignment_strategy_factory
|
10
|
-
|
11
|
-
def with_assignment_strategy_factory(factory)
|
12
|
-
@assignment_strategy_factory = factory
|
13
|
-
yield
|
14
|
-
ensure
|
15
|
-
@assignment_strategy_factory = nil
|
16
|
-
end
|
17
|
-
|
18
|
-
def assignment_strategy_classes
|
19
|
-
@assignment_strategy_classes ||= {}
|
20
|
-
end
|
21
|
-
end
|
22
6
|
end
|
23
7
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka-ec2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- abicky
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-kafka
|
@@ -89,10 +89,7 @@ files:
|
|
89
89
|
- bin/console
|
90
90
|
- bin/setup
|
91
91
|
- lib/kafka/ec2.rb
|
92
|
-
- lib/kafka/ec2/ext/consumer_group.rb
|
93
|
-
- lib/kafka/ec2/ext/protocol/join_group_request.rb
|
94
92
|
- lib/kafka/ec2/mixed_instance_assignment_strategy.rb
|
95
|
-
- lib/kafka/ec2/mixed_instance_assignment_strategy_factory.rb
|
96
93
|
- lib/kafka/ec2/version.rb
|
97
94
|
- ruby-kafka-ec2.gemspec
|
98
95
|
homepage: https://github.com/abicky/ruby-kafka-ec2
|
@@ -101,7 +98,7 @@ licenses:
|
|
101
98
|
metadata:
|
102
99
|
homepage_uri: https://github.com/abicky/ruby-kafka-ec2
|
103
100
|
source_code_uri: https://github.com/abicky/ruby-kafka-ec2
|
104
|
-
post_install_message:
|
101
|
+
post_install_message:
|
105
102
|
rdoc_options: []
|
106
103
|
require_paths:
|
107
104
|
- lib
|
@@ -116,8 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
113
|
- !ruby/object:Gem::Version
|
117
114
|
version: '0'
|
118
115
|
requirements: []
|
119
|
-
rubygems_version: 3.
|
120
|
-
signing_key:
|
116
|
+
rubygems_version: 3.2.22
|
117
|
+
signing_key:
|
121
118
|
specification_version: 4
|
122
119
|
summary: An extension of ruby-kafka for EC2
|
123
120
|
test_files: []
|
@@ -1,33 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "kafka/consumer_group"
|
4
|
-
require "kafka/ec2/mixed_instance_assignment_strategy"
|
5
|
-
|
6
|
-
module Kafka
|
7
|
-
class EC2
|
8
|
-
module Ext
|
9
|
-
module ConsumerGroup
|
10
|
-
def initialize(*args, **kwargs)
|
11
|
-
super
|
12
|
-
if Kafka::EC2.assignment_strategy_factory
|
13
|
-
@assignment_strategy = Kafka::EC2.assignment_strategy_factory.create(cluster: @cluster)
|
14
|
-
end
|
15
|
-
Kafka::EC2.assignment_strategy_classes[@group_id] = @assignment_strategy.class
|
16
|
-
end
|
17
|
-
|
18
|
-
def join_group
|
19
|
-
super
|
20
|
-
if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
|
21
|
-
@assignment_strategy.member_id_to_metadata = @members
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
module Kafka
|
30
|
-
class ConsumerGroup
|
31
|
-
prepend Kafka::EC2::Ext::ConsumerGroup
|
32
|
-
end
|
33
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "net/http"
|
4
|
-
|
5
|
-
require "kafka/protocol/consumer_group_protocol"
|
6
|
-
require "kafka/protocol/join_group_request"
|
7
|
-
|
8
|
-
module Kafka
|
9
|
-
class EC2
|
10
|
-
module Ext
|
11
|
-
module Protocol
|
12
|
-
module JoinGroupRequest
|
13
|
-
def initialize(*args, topics: [], **kwargs)
|
14
|
-
super
|
15
|
-
if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
|
16
|
-
user_data = Net::HTTP.start("169.254.169.254", 80) do |http|
|
17
|
-
instance_id = http.get("/latest/meta-data/instance-id").body
|
18
|
-
instance_type = http.get("/latest/meta-data/instance-type").body
|
19
|
-
az = http.get("/latest/meta-data/placement/availability-zone").body
|
20
|
-
"|#{instance_id},#{instance_type},#{az}"
|
21
|
-
end
|
22
|
-
@group_protocols = {
|
23
|
-
"mixedinstance" => Kafka::Protocol::ConsumerGroupProtocol.new(topics: topics, user_data: user_data),
|
24
|
-
}
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
module Kafka
|
34
|
-
module Protocol
|
35
|
-
class JoinGroupRequest
|
36
|
-
prepend Kafka::EC2::Ext::Protocol::JoinGroupRequest
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
@@ -1,30 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "kafka/ec2/mixed_instance_assignment_strategy"
|
4
|
-
|
5
|
-
module Kafka
|
6
|
-
class EC2
|
7
|
-
class MixedInstanceAssignmentStrategyFactory
|
8
|
-
# @param instance_family_weights [Hash, Proc]
|
9
|
-
# @param availability_zone_weights [Hash, Proc]
|
10
|
-
# @param weights [Hash, Proc]
|
11
|
-
# @see Kafka::EC2::MixedInstanceAssignmentStrategy#initialize
|
12
|
-
def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
|
13
|
-
@instance_family_weights = instance_family_weights
|
14
|
-
@availability_zone_weights = availability_zone_weights
|
15
|
-
@weights = weights
|
16
|
-
@partition_weights = partition_weights
|
17
|
-
end
|
18
|
-
|
19
|
-
def create(cluster:)
|
20
|
-
Kafka::EC2::MixedInstanceAssignmentStrategy.new(
|
21
|
-
cluster: cluster,
|
22
|
-
instance_family_weights: @instance_family_weights,
|
23
|
-
availability_zone_weights: @availability_zone_weights,
|
24
|
-
weights: @weights,
|
25
|
-
partition_weights: @partition_weights,
|
26
|
-
)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|