ruby-kafka-ec2 0.1.2 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 66c71213189c16f43593597889adfb2ef3d0f4757cbf6ae7bb600310a2f88855
|
4
|
+
data.tar.gz: 71a0256485b92b88ed891e76bd3c3cbd85420b70332835cd33d4aeaf748e207d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c88ff1e2fe4ebd92fe6b9a13a87fd5e9582c09228ae17b29cdbd4c0186f83e22d6c96bb6f193fbd5ae2ed624979fe3c33a80c6db88e72a0800b2f878ffbfc7b1
|
7
|
+
data.tar.gz: 78cd8b945be174b64cdd261686683521160ed7c2c7334c6586726d1803abea4e0aad96d8470304ef324fe7b04281b7048d1591794771178c2bf71d0597f07ca7
|
data/README.md
CHANGED
@@ -118,6 +118,17 @@ assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory
|
|
118
118
|
)
|
119
119
|
```
|
120
120
|
|
121
|
+
The strategy also has the option `partition_weights`. This is useful when the topic has some skewed partitions. Suppose the partition with ID 0 of the topic "foo" receives twice as many records as other partitions. To reduce the number of partitions assigned to the consumer that consumes the partition with ID 0, specify `partition_weights` like below:
|
122
|
+
|
123
|
+
```ruby
|
124
|
+
assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
|
125
|
+
partition_weights: {
|
126
|
+
"foo" => {
|
127
|
+
0 => 2,
|
128
|
+
},
|
129
|
+
}
|
130
|
+
)
|
131
|
+
```
|
121
132
|
|
122
133
|
## Development
|
123
134
|
|
@@ -23,11 +23,12 @@ module Kafka
|
|
23
23
|
# instance_family_weights or availability_zone_weights. If the object is a proc,
|
24
24
|
# it must returns such a hash and the proc is called every time the method "assign"
|
25
25
|
# is called.
|
26
|
-
def initialize(cluster:, instance_family_weights: {}, availability_zone_weights: {}, weights: {})
|
26
|
+
def initialize(cluster:, instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
|
27
27
|
@cluster = cluster
|
28
28
|
@instance_family_weights = instance_family_weights
|
29
29
|
@availability_zone_weights = availability_zone_weights
|
30
30
|
@weights = weights
|
31
|
+
@partition_weights = partition_weights
|
31
32
|
end
|
32
33
|
|
33
34
|
# Assign the topic partitions to the group members.
|
@@ -41,6 +42,7 @@ module Kafka
|
|
41
42
|
instance_id_to_capacity = Hash.new(0)
|
42
43
|
instance_id_to_member_ids = Hash.new { |h, k| h[k] = [] }
|
43
44
|
total_capacity = 0
|
45
|
+
member_id_to_instance_id = {}
|
44
46
|
|
45
47
|
instance_family_to_capacity = @instance_family_weights.is_a?(Proc) ? @instance_family_weights.call() : @instance_family_weights
|
46
48
|
az_to_capacity = @availability_zone_weights.is_a?(Proc) ? @availability_zone_weights.call() : @availability_zone_weights
|
@@ -50,6 +52,7 @@ module Kafka
|
|
50
52
|
|
51
53
|
instance_id, instance_type, az = member_id_to_metadata[member_id].split(",")
|
52
54
|
instance_id_to_member_ids[instance_id] << member_id
|
55
|
+
member_id_to_instance_id[member_id] = instance_id
|
53
56
|
capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
|
54
57
|
instance_id_to_capacity[instance_id] += capacity
|
55
58
|
total_capacity += capacity
|
@@ -64,25 +67,48 @@ module Kafka
|
|
64
67
|
Array.new(partitions.count) { topic }.zip(partitions)
|
65
68
|
end
|
66
69
|
|
67
|
-
|
70
|
+
partition_weights = build_partition_weights(topics)
|
71
|
+
partition_weight_per_capacity = topic_partitions.sum { |topic, partition| partition_weights.dig(topic, partition) } / total_capacity
|
72
|
+
|
68
73
|
last_index = 0
|
69
|
-
|
70
|
-
|
74
|
+
member_id_to_acceptable_partition_weight = {}
|
75
|
+
instance_id_to_total_acceptable_partition_weight = Hash.new(0)
|
76
|
+
instance_id_to_capacity.each do |instance_id, capacity|
|
71
77
|
member_ids = instance_id_to_member_ids[instance_id]
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
78
|
+
member_ids.each do |member_id|
|
79
|
+
acceptable_partition_weight = capacity * partition_weight_per_capacity / member_ids.size
|
80
|
+
while last_index < topic_partitions.size
|
81
|
+
topic, partition = topic_partitions[last_index]
|
82
|
+
partition_weight = partition_weights.dig(topic, partition)
|
83
|
+
break if acceptable_partition_weight - partition_weight < 0
|
84
|
+
|
85
|
+
group_assignment[member_id].assign(topic, [partition])
|
86
|
+
acceptable_partition_weight -= partition_weight
|
76
87
|
|
77
|
-
|
88
|
+
last_index += 1
|
89
|
+
end
|
90
|
+
|
91
|
+
member_id_to_acceptable_partition_weight[member_id] = acceptable_partition_weight
|
92
|
+
instance_id_to_total_acceptable_partition_weight[instance_id] += acceptable_partition_weight
|
93
|
+
end
|
78
94
|
end
|
79
95
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
96
|
+
while last_index < topic_partitions.size
|
97
|
+
max_acceptable_partition_weight = member_id_to_acceptable_partition_weight.values.max
|
98
|
+
member_ids = member_id_to_acceptable_partition_weight.select { |_, w| w == max_acceptable_partition_weight }.keys
|
99
|
+
if member_ids.size == 1
|
100
|
+
member_id = member_ids.first
|
101
|
+
else
|
102
|
+
member_id = member_ids.max_by { |id| instance_id_to_total_acceptable_partition_weight[member_id_to_instance_id[id]] }
|
85
103
|
end
|
104
|
+
topic, partition = topic_partitions[last_index]
|
105
|
+
group_assignment[member_id].assign(topic, [partition])
|
106
|
+
|
107
|
+
partition_weight = partition_weights.dig(topic, partition)
|
108
|
+
member_id_to_acceptable_partition_weight[member_id] -= partition_weight
|
109
|
+
instance_id_to_total_acceptable_partition_weight[member_id_to_instance_id[member_id]] -= partition_weight
|
110
|
+
|
111
|
+
last_index += 1
|
86
112
|
end
|
87
113
|
|
88
114
|
group_assignment
|
@@ -97,7 +123,18 @@ module Kafka
|
|
97
123
|
instance_family, _ = instance_type.split(".")
|
98
124
|
|
99
125
|
capacity = weights.dig(az, instance_family) || weights.dig(instance_family, az)
|
100
|
-
capacity || instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)
|
126
|
+
(capacity || instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)).to_f
|
127
|
+
end
|
128
|
+
|
129
|
+
def build_partition_weights(topics)
|
130
|
+
# Duplicate the weights to not destruct @partition_weights or the return value of @partition_weights
|
131
|
+
weights = (@partition_weights.is_a?(Proc) ? @partition_weights.call() : @partition_weights).dup
|
132
|
+
topics.each do |t|
|
133
|
+
weights[t] = weights[t].dup || {}
|
134
|
+
weights[t].default = 1
|
135
|
+
end
|
136
|
+
|
137
|
+
weights
|
101
138
|
end
|
102
139
|
end
|
103
140
|
end
|
@@ -9,9 +9,11 @@ module Kafka
|
|
9
9
|
# @param availability_zone_weights [Hash, Proc]
|
10
10
|
# @param weights [Hash, Proc]
|
11
11
|
# @see Kafka::EC2::MixedInstanceAssignmentStrategy#initialize
|
12
|
-
def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {})
|
12
|
+
def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
|
13
13
|
@instance_family_weights = instance_family_weights
|
14
14
|
@availability_zone_weights = availability_zone_weights
|
15
|
+
@weights = weights
|
16
|
+
@partition_weights = partition_weights
|
15
17
|
end
|
16
18
|
|
17
19
|
def create(cluster:)
|
@@ -19,6 +21,8 @@ module Kafka
|
|
19
21
|
cluster: cluster,
|
20
22
|
instance_family_weights: @instance_family_weights,
|
21
23
|
availability_zone_weights: @availability_zone_weights,
|
24
|
+
weights: @weights,
|
25
|
+
partition_weights: @partition_weights,
|
22
26
|
)
|
23
27
|
end
|
24
28
|
end
|
data/lib/kafka/ec2/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka-ec2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- abicky
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-kafka
|
@@ -116,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: '0'
|
118
118
|
requirements: []
|
119
|
-
rubygems_version: 3.1.
|
119
|
+
rubygems_version: 3.1.4
|
120
120
|
signing_key:
|
121
121
|
specification_version: 4
|
122
122
|
summary: An extension of ruby-kafka for EC2
|