ruby-kafka-ec2 0.1.1 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b87f937f29a107174e7a972b79e2a717780bdd28b28fa37e3601633ff1f075b0
|
4
|
+
data.tar.gz: 413c79ff75ccb9f9304dd5c57dfb2fb1f26b0b673eeae9cb7e7d86cb68d65e74
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 22dfa6ddf4e0e0f8eb5db7bd6cb9c54b25edb175186bba989b32df698e23c705754c8dff057e7d82df480a3835bea110a363177b36739bca73432206227ac680
|
7
|
+
data.tar.gz: 1deda3ac8da64c41f9d598e26049b667aec2d81956a98dc7ded279c8a13e517d7dc4042ab34f9f4e8a05365be46c570e274207bd5bd430210b7de458972cace3
|
data/README.md
CHANGED
@@ -77,6 +77,59 @@ consumer = Kafka::EC2.with_assignment_strategy_factory(assignment_strategy_facto
|
|
77
77
|
end
|
78
78
|
```
|
79
79
|
|
80
|
+
You can also specify weights for each combination of availability zones and instance families:
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
|
84
|
+
weights: ->() {
|
85
|
+
db_cluster = rds.describe_db_clusters(filters: [
|
86
|
+
{ name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
|
87
|
+
]).db_clusters.first
|
88
|
+
db_instance_id = db_cluster.db_cluster_members.find { |m| m.is_cluster_writer }.db_instance_identifier
|
89
|
+
db_instance = rds.describe_db_instances(filters: [
|
90
|
+
{ name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
|
91
|
+
{ name: "db-instance-id", values: [db_instance_id] },
|
92
|
+
]).db_instances.first
|
93
|
+
|
94
|
+
weights_for_writer_az = {
|
95
|
+
"r4" => 1.00,
|
96
|
+
"r5" => 1.20,
|
97
|
+
"m5" => 1.35,
|
98
|
+
"c5" => 1.50,
|
99
|
+
}
|
100
|
+
weights_for_other_az = {
|
101
|
+
"r4" => 0.40,
|
102
|
+
"r5" => 0.70,
|
103
|
+
"m5" => 0.80,
|
104
|
+
"c5" => 1.00,
|
105
|
+
}
|
106
|
+
if db_instance.availability_zone == "ap-northeast-1a"
|
107
|
+
{
|
108
|
+
"ap-northeast-1a" => weights_for_writer_az,
|
109
|
+
"ap-northeast-1c" => weights_for_other_az,
|
110
|
+
}
|
111
|
+
else
|
112
|
+
{
|
113
|
+
"ap-northeast-1a" => weights_for_other_az,
|
114
|
+
"ap-northeast-1c" => weights_for_writer_az,,
|
115
|
+
}
|
116
|
+
end
|
117
|
+
},
|
118
|
+
)
|
119
|
+
```
|
120
|
+
|
121
|
+
The strategy also has the option `partition_weights`. This is useful when the topic has some skewed partitions. Suppose the partition with ID 0 of the topic "foo" receives twice as many records as other partitions. To reduce the number of partitions assigned to the consumer that consumes the partition with ID 0, specify `partition_weights` like below:
|
122
|
+
|
123
|
+
```ruby
|
124
|
+
assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
|
125
|
+
partition_weights: {
|
126
|
+
"foo" => {
|
127
|
+
0 => 2,
|
128
|
+
},
|
129
|
+
}
|
130
|
+
)
|
131
|
+
```
|
132
|
+
|
80
133
|
## Development
|
81
134
|
|
82
135
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -18,10 +18,17 @@ module Kafka
|
|
18
18
|
# is the availability zone and whose value is the weight. If the object is a proc,
|
19
19
|
# it must returns such a hash and the proc is called every time the method "assign"
|
20
20
|
# is called.
|
21
|
-
|
21
|
+
# @param weights [Hash{String => Hash{String => Numeric}}, Proc] a hash whose the key
|
22
|
+
# is the availability zone or the instance family and whose value is the hash like
|
23
|
+
# instance_family_weights or availability_zone_weights. If the object is a proc,
|
24
|
+
# it must returns such a hash and the proc is called every time the method "assign"
|
25
|
+
# is called.
|
26
|
+
def initialize(cluster:, instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
|
22
27
|
@cluster = cluster
|
23
28
|
@instance_family_weights = instance_family_weights
|
24
29
|
@availability_zone_weights = availability_zone_weights
|
30
|
+
@weights = weights
|
31
|
+
@partition_weights = partition_weights
|
25
32
|
end
|
26
33
|
|
27
34
|
# Assign the topic partitions to the group members.
|
@@ -38,12 +45,13 @@ module Kafka
|
|
38
45
|
|
39
46
|
instance_family_to_capacity = @instance_family_weights.is_a?(Proc) ? @instance_family_weights.call() : @instance_family_weights
|
40
47
|
az_to_capacity = @availability_zone_weights.is_a?(Proc) ? @availability_zone_weights.call() : @availability_zone_weights
|
48
|
+
weights = @weights.is_a?(Proc) ? @weights.call() : @weights
|
41
49
|
members.each do |member_id|
|
42
50
|
group_assignment[member_id] = Protocol::MemberAssignment.new
|
43
51
|
|
44
52
|
instance_id, instance_type, az = member_id_to_metadata[member_id].split(",")
|
45
53
|
instance_id_to_member_ids[instance_id] << member_id
|
46
|
-
capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
|
54
|
+
capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
|
47
55
|
instance_id_to_capacity[instance_id] += capacity
|
48
56
|
total_capacity += capacity
|
49
57
|
end
|
@@ -57,27 +65,40 @@ module Kafka
|
|
57
65
|
Array.new(partitions.count) { topic }.zip(partitions)
|
58
66
|
end
|
59
67
|
|
60
|
-
|
68
|
+
partition_weights = build_partition_weights(topics)
|
69
|
+
partition_weight_per_capacity = topic_partitions.sum { |topic, partition| partition_weights.dig(topic, partition) } / total_capacity
|
70
|
+
|
61
71
|
last_index = 0
|
62
|
-
|
63
|
-
|
72
|
+
member_id_to_acceptable_partition_weight = {}
|
73
|
+
instance_id_to_capacity.each do |instance_id, capacity|
|
64
74
|
member_ids = instance_id_to_member_ids[instance_id]
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
75
|
+
member_ids.each do |member_id|
|
76
|
+
acceptable_partition_weight = capacity * partition_weight_per_capacity / member_ids.size
|
77
|
+
while last_index < topic_partitions.size
|
78
|
+
topic, partition = topic_partitions[last_index]
|
79
|
+
partition_weight = partition_weights.dig(topic, partition)
|
80
|
+
break if acceptable_partition_weight - partition_weight < 0
|
69
81
|
|
70
|
-
|
71
|
-
|
82
|
+
group_assignment[member_id].assign(topic, [partition])
|
83
|
+
acceptable_partition_weight -= partition_weight
|
84
|
+
|
85
|
+
last_index += 1
|
86
|
+
end
|
72
87
|
|
73
|
-
|
74
|
-
member_ids = instance_id_to_member_ids.values.flatten
|
75
|
-
topic_partitions[last_index, topic_partitions.size].each_with_index do |(topic, partition), index|
|
76
|
-
member_id = member_ids[index % member_ids.size]
|
77
|
-
group_assignment[member_id].assign(topic, [partition])
|
88
|
+
member_id_to_acceptable_partition_weight[member_id] = acceptable_partition_weight
|
78
89
|
end
|
79
90
|
end
|
80
91
|
|
92
|
+
while last_index < topic_partitions.size
|
93
|
+
member_id, _ = member_id_to_acceptable_partition_weight.max_by { |_, remaining| remaining }
|
94
|
+
topic, partition = topic_partitions[last_index]
|
95
|
+
group_assignment[member_id].assign(topic, [partition])
|
96
|
+
|
97
|
+
member_id_to_acceptable_partition_weight[member_id] -= partition_weights.dig(topic, partition)
|
98
|
+
|
99
|
+
last_index += 1
|
100
|
+
end
|
101
|
+
|
81
102
|
group_assignment
|
82
103
|
rescue Kafka::LeaderNotAvailable
|
83
104
|
sleep 1
|
@@ -86,9 +107,22 @@ module Kafka
|
|
86
107
|
|
87
108
|
private
|
88
109
|
|
89
|
-
def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
|
110
|
+
def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
|
90
111
|
instance_family, _ = instance_type.split(".")
|
91
|
-
|
112
|
+
|
113
|
+
capacity = weights.dig(az, instance_family) || weights.dig(instance_family, az)
|
114
|
+
(capacity || instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)).to_f
|
115
|
+
end
|
116
|
+
|
117
|
+
def build_partition_weights(topics)
|
118
|
+
# Duplicate the weights to not destruct @partition_weights or the return value of @partition_weights
|
119
|
+
weights = (@partition_weights.is_a?(Proc) ? @partition_weights.call() : @partition_weights).dup
|
120
|
+
topics.each do |t|
|
121
|
+
weights[t] = weights[t].dup || {}
|
122
|
+
weights[t].default = 1
|
123
|
+
end
|
124
|
+
|
125
|
+
weights
|
92
126
|
end
|
93
127
|
end
|
94
128
|
end
|
@@ -7,9 +7,13 @@ module Kafka
|
|
7
7
|
class MixedInstanceAssignmentStrategyFactory
|
8
8
|
# @param instance_family_weights [Hash, Proc]
|
9
9
|
# @param availability_zone_weights [Hash, Proc]
|
10
|
-
|
10
|
+
# @param weights [Hash, Proc]
|
11
|
+
# @see Kafka::EC2::MixedInstanceAssignmentStrategy#initialize
|
12
|
+
def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
|
11
13
|
@instance_family_weights = instance_family_weights
|
12
14
|
@availability_zone_weights = availability_zone_weights
|
15
|
+
@weights = weights
|
16
|
+
@partition_weights = partition_weights
|
13
17
|
end
|
14
18
|
|
15
19
|
def create(cluster:)
|
@@ -17,6 +21,8 @@ module Kafka
|
|
17
21
|
cluster: cluster,
|
18
22
|
instance_family_weights: @instance_family_weights,
|
19
23
|
availability_zone_weights: @availability_zone_weights,
|
24
|
+
weights: @weights,
|
25
|
+
partition_weights: @partition_weights,
|
20
26
|
)
|
21
27
|
end
|
22
28
|
end
|
data/lib/kafka/ec2/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka-ec2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- abicky
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-kafka
|
@@ -116,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: '0'
|
118
118
|
requirements: []
|
119
|
-
rubygems_version: 3.
|
119
|
+
rubygems_version: 3.0.3
|
120
120
|
signing_key:
|
121
121
|
specification_version: 4
|
122
122
|
summary: An extension of ruby-kafka for EC2
|