ruby-kafka-ec2 0.1.0 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8dde731c3652090bf18202d68b916cbdcff9ed09673bd84d5f20470a37c63373
|
4
|
+
data.tar.gz: d1a95de4724b3b5f85230c55a70469cc5e6c1e6008423b83f74c415bf2c9d289
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f37b8fa41b773933aac85f170884adb75fc0e446faf9fb92c109aa039f5a869874194dbdf3a9099899e273ef8543f75c5f7aca0fd99cff1845bc43ac081bde50
|
7
|
+
data.tar.gz: 78bc5df7157441563d73e19f35804069ddbb2c1863bccfe2711594c27a6caed78a5209fdd2ca7f55a3cf2302bed326ec47302720bf4253a9f8b4df950e5a0d0f
|
data/README.md
CHANGED
@@ -77,6 +77,59 @@ consumer = Kafka::EC2.with_assignment_strategy_factory(assignment_strategy_facto
|
|
77
77
|
end
|
78
78
|
```
|
79
79
|
|
80
|
+
You can also specify weights for each combination of availability zones and instance families:
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
|
84
|
+
weights: ->() {
|
85
|
+
db_cluster = rds.describe_db_clusters(filters: [
|
86
|
+
{ name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
|
87
|
+
]).db_clusters.first
|
88
|
+
db_instance_id = db_cluster.db_cluster_members.find { |m| m.is_cluster_writer }.db_instance_identifier
|
89
|
+
db_instance = rds.describe_db_instances(filters: [
|
90
|
+
{ name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
|
91
|
+
{ name: "db-instance-id", values: [db_instance_id] },
|
92
|
+
]).db_instances.first
|
93
|
+
|
94
|
+
weights_for_writer_az = {
|
95
|
+
"r4" => 1.00,
|
96
|
+
"r5" => 1.20,
|
97
|
+
"m5" => 1.35,
|
98
|
+
"c5" => 1.50,
|
99
|
+
}
|
100
|
+
weights_for_other_az = {
|
101
|
+
"r4" => 0.40,
|
102
|
+
"r5" => 0.70,
|
103
|
+
"m5" => 0.80,
|
104
|
+
"c5" => 1.00,
|
105
|
+
}
|
106
|
+
if db_instance.availability_zone == "ap-northeast-1a"
|
107
|
+
{
|
108
|
+
"ap-northeast-1a" => weights_for_writer_az,
|
109
|
+
"ap-northeast-1c" => weights_for_other_az,
|
110
|
+
}
|
111
|
+
else
|
112
|
+
{
|
113
|
+
"ap-northeast-1a" => weights_for_other_az,
|
114
|
+
"ap-northeast-1c" => weights_for_writer_az,,
|
115
|
+
}
|
116
|
+
end
|
117
|
+
},
|
118
|
+
)
|
119
|
+
```
|
120
|
+
|
121
|
+
The strategy also has the option `partition_weights`. This is useful when the topic has some skewed partitions. Suppose the partition with ID 0 of the topic "foo" receives twice as many records as other partitions. To reduce the number of partitions assigned to the consumer that consumes the partition with ID 0, specify `partition_weights` like below:
|
122
|
+
|
123
|
+
```ruby
|
124
|
+
assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
|
125
|
+
partition_weights: {
|
126
|
+
"foo" => {
|
127
|
+
0 => 2,
|
128
|
+
},
|
129
|
+
}
|
130
|
+
)
|
131
|
+
```
|
132
|
+
|
80
133
|
## Development
|
81
134
|
|
82
135
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -10,7 +10,7 @@ module Kafka
|
|
10
10
|
module Ext
|
11
11
|
module Protocol
|
12
12
|
module JoinGroupRequest
|
13
|
-
def initialize(*args, topics
|
13
|
+
def initialize(*args, topics: [], **kwargs)
|
14
14
|
super
|
15
15
|
if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
|
16
16
|
user_data = Net::HTTP.start("169.254.169.254", 80) do |http|
|
@@ -18,10 +18,17 @@ module Kafka
|
|
18
18
|
# is the availability zone and whose value is the weight. If the object is a proc,
|
19
19
|
# it must returns such a hash and the proc is called every time the method "assign"
|
20
20
|
# is called.
|
21
|
-
|
21
|
+
# @param weights [Hash{String => Hash{String => Numeric}}, Proc] a hash whose the key
|
22
|
+
# is the availability zone or the instance family and whose value is the hash like
|
23
|
+
# instance_family_weights or availability_zone_weights. If the object is a proc,
|
24
|
+
# it must returns such a hash and the proc is called every time the method "assign"
|
25
|
+
# is called.
|
26
|
+
def initialize(cluster:, instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
|
22
27
|
@cluster = cluster
|
23
28
|
@instance_family_weights = instance_family_weights
|
24
29
|
@availability_zone_weights = availability_zone_weights
|
30
|
+
@weights = weights
|
31
|
+
@partition_weights = partition_weights
|
25
32
|
end
|
26
33
|
|
27
34
|
# Assign the topic partitions to the group members.
|
@@ -38,12 +45,13 @@ module Kafka
|
|
38
45
|
|
39
46
|
instance_family_to_capacity = @instance_family_weights.is_a?(Proc) ? @instance_family_weights.call() : @instance_family_weights
|
40
47
|
az_to_capacity = @availability_zone_weights.is_a?(Proc) ? @availability_zone_weights.call() : @availability_zone_weights
|
48
|
+
weights = @weights.is_a?(Proc) ? @weights.call() : @weights
|
41
49
|
members.each do |member_id|
|
42
50
|
group_assignment[member_id] = Protocol::MemberAssignment.new
|
43
51
|
|
44
52
|
instance_id, instance_type, az = member_id_to_metadata[member_id].split(",")
|
45
53
|
instance_id_to_member_ids[instance_id] << member_id
|
46
|
-
capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
|
54
|
+
capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
|
47
55
|
instance_id_to_capacity[instance_id] += capacity
|
48
56
|
total_capacity += capacity
|
49
57
|
end
|
@@ -57,24 +65,36 @@ module Kafka
|
|
57
65
|
Array.new(partitions.count) { topic }.zip(partitions)
|
58
66
|
end
|
59
67
|
|
60
|
-
|
68
|
+
partition_weights = build_partition_weights(topics)
|
69
|
+
partition_weight_per_capacity = topic_partitions.sum { |topic, partition| partition_weights.dig(topic, partition) } / total_capacity
|
70
|
+
|
61
71
|
last_index = 0
|
62
|
-
|
63
|
-
|
72
|
+
member_id_to_acceptable_partition_weight = {}
|
73
|
+
instance_id_to_capacity.each do |instance_id, capacity|
|
64
74
|
member_ids = instance_id_to_member_ids[instance_id]
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
75
|
+
member_ids.each do |member_id|
|
76
|
+
acceptable_partition_weight = capacity * partition_weight_per_capacity / member_ids.size
|
77
|
+
loop do
|
78
|
+
topic, partition = topic_partitions[last_index]
|
79
|
+
partition_weight = partition_weights.dig(topic, partition)
|
80
|
+
if last_index == topic_partitions.size || acceptable_partition_weight - partition_weight < 0
|
81
|
+
member_id_to_acceptable_partition_weight[member_id] = acceptable_partition_weight
|
82
|
+
break
|
83
|
+
end
|
69
84
|
|
70
|
-
|
85
|
+
group_assignment[member_id].assign(topic, [partition])
|
86
|
+
last_index += 1
|
87
|
+
acceptable_partition_weight -= partition_weight
|
88
|
+
end
|
89
|
+
end
|
71
90
|
end
|
72
91
|
|
73
92
|
if last_index < topic_partitions.size
|
74
|
-
|
75
|
-
|
76
|
-
member_id = member_ids[index % member_ids.size]
|
93
|
+
member_id_to_acceptable_partition_weight.sort_by { |_, remaining| -remaining }.each do |member_id, _|
|
94
|
+
topic, partition = topic_partitions[last_index]
|
77
95
|
group_assignment[member_id].assign(topic, [partition])
|
96
|
+
last_index += 1
|
97
|
+
break if last_index == topic_partitions.size
|
78
98
|
end
|
79
99
|
end
|
80
100
|
|
@@ -86,9 +106,22 @@ module Kafka
|
|
86
106
|
|
87
107
|
private
|
88
108
|
|
89
|
-
def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
|
109
|
+
def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
|
90
110
|
instance_family, _ = instance_type.split(".")
|
91
|
-
|
111
|
+
|
112
|
+
capacity = weights.dig(az, instance_family) || weights.dig(instance_family, az)
|
113
|
+
(capacity || instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)).to_f
|
114
|
+
end
|
115
|
+
|
116
|
+
def build_partition_weights(topics)
|
117
|
+
# Duplicate the weights to not destruct @partition_weights or the return value of @partition_weights
|
118
|
+
weights = (@partition_weights.is_a?(Proc) ? @partition_weights.call() : @partition_weights).dup
|
119
|
+
topics.each do |t|
|
120
|
+
weights[t] = weights[t].dup || {}
|
121
|
+
weights[t].default = 1
|
122
|
+
end
|
123
|
+
|
124
|
+
weights
|
92
125
|
end
|
93
126
|
end
|
94
127
|
end
|
@@ -7,9 +7,13 @@ module Kafka
|
|
7
7
|
class MixedInstanceAssignmentStrategyFactory
|
8
8
|
# @param instance_family_weights [Hash, Proc]
|
9
9
|
# @param availability_zone_weights [Hash, Proc]
|
10
|
-
|
10
|
+
# @param weights [Hash, Proc]
|
11
|
+
# @see Kafka::EC2::MixedInstanceAssignmentStrategy#initialize
|
12
|
+
def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
|
11
13
|
@instance_family_weights = instance_family_weights
|
12
14
|
@availability_zone_weights = availability_zone_weights
|
15
|
+
@weights = weights
|
16
|
+
@partition_weights = partition_weights
|
13
17
|
end
|
14
18
|
|
15
19
|
def create(cluster:)
|
@@ -17,6 +21,8 @@ module Kafka
|
|
17
21
|
cluster: cluster,
|
18
22
|
instance_family_weights: @instance_family_weights,
|
19
23
|
availability_zone_weights: @availability_zone_weights,
|
24
|
+
weights: @weights,
|
25
|
+
partition_weights: @partition_weights,
|
20
26
|
)
|
21
27
|
end
|
22
28
|
end
|
data/lib/kafka/ec2/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka-ec2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- abicky
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-kafka
|
@@ -116,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: '0'
|
118
118
|
requirements: []
|
119
|
-
rubygems_version: 3.
|
119
|
+
rubygems_version: 3.0.3
|
120
120
|
signing_key:
|
121
121
|
specification_version: 4
|
122
122
|
summary: An extension of ruby-kafka for EC2
|