ruby-kafka-ec2 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e6d39fb8a782c1075f0454a2bc0479fafb12d380ad0bd78ba938d218af57e23
4
- data.tar.gz: 2f339e9dc2ea096376b53fb4f9413a46da5f3bdcd7ff86c6b4854f3d8928cbf7
3
+ metadata.gz: 41d57e6a8d322551d668003fbba48e43507a4d54e73f3dd2d4918fe55bb6aaad
4
+ data.tar.gz: 1440f7a70cdbc3f47a524fc1847bcb34cbc1d55dbef5c04660c384c073018434
5
5
  SHA512:
6
- metadata.gz: f77547fc1fb5b92b12b12857114218b0d3d055863ffe8c3a972a3255cbc781d6e441ae11ce5e8772add6d5be26113e5f874a3a333da0c1a5d6483e0b187141cd
7
- data.tar.gz: 1a87e14365f4a49b139787d55aae75ac8e4131a4ed6fd766a9a55a297bcf4f613a2c5ecf062f84fb5fd69cfc792d2d623f8aef2a1285f2126f768da1e75566af
6
+ metadata.gz: 9a731656c0e0862559ea88c5395d547886cc738bb39ba4ae779056ea2d7376c8654bc20698f470b13f03bb79cea5384e3d78047c4a6703dfba43de830f54dfee
7
+ data.tar.gz: e78667585584f906f81c33a7eb09a254fc59034680b7ec4f1bcb39a0cb81e1b865d5adf1f17c55ddae0e179f680b309d592f04525d386549e10ba842eef2677c
data/README.md CHANGED
@@ -118,6 +118,17 @@ assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory
118
118
  )
119
119
  ```
120
120
 
121
+ The strategy also has the option `partition_weights`. This is useful when the topic has some skewed partitions. Suppose the partition with ID 0 of the topic "foo" receives twice as many records as other partitions. To reduce the number of partitions assigned to the consumer that consumes the partition with ID 0, specify `partition_weights` like below:
122
+
123
+ ```ruby
124
+ assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
125
+ partition_weights: {
126
+ "foo" => {
127
+ 0 => 2,
128
+ },
129
+ }
130
+ )
131
+ ```
121
132
 
122
133
  ## Development
123
134
 
@@ -23,11 +23,12 @@ module Kafka
23
23
  # instance_family_weights or availability_zone_weights. If the object is a proc,
24
24
  # it must returns such a hash and the proc is called every time the method "assign"
25
25
  # is called.
26
- def initialize(cluster:, instance_family_weights: {}, availability_zone_weights: {}, weights: {})
26
+ def initialize(cluster:, instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
27
27
  @cluster = cluster
28
28
  @instance_family_weights = instance_family_weights
29
29
  @availability_zone_weights = availability_zone_weights
30
30
  @weights = weights
31
+ @partition_weights = partition_weights
31
32
  end
32
33
 
33
34
  # Assign the topic partitions to the group members.
@@ -64,24 +65,35 @@ module Kafka
64
65
  Array.new(partitions.count) { topic }.zip(partitions)
65
66
  end
66
67
 
67
- partition_count_per_capacity = topic_partitions.size / total_capacity
68
+ partition_weights = build_partition_weights(topics)
69
+ partition_weight_per_capacity = topic_partitions.sum { |topic, partition| partition_weights.dig(topic, partition) } / total_capacity
70
+
68
71
  last_index = 0
69
- instance_id_to_capacity.sort_by { |_, capacity| -capacity }.each do |instance_id, capacity|
70
- partition_count = (capacity * partition_count_per_capacity).round
72
+ member_id_to_acceptable_partition_weight = {}
73
+ instance_id_to_capacity.each do |instance_id, capacity|
71
74
  member_ids = instance_id_to_member_ids[instance_id]
72
- topic_partitions[last_index, partition_count]&.each_with_index do |(topic, partition), index|
73
- member_id = member_ids[index % member_ids.size]
74
- group_assignment[member_id].assign(topic, [partition])
75
- end
75
+ member_ids.each do |member_id|
76
+ acceptable_partition_weight = capacity * partition_weight_per_capacity / member_ids.size
77
+ loop do
78
+ topic, partition = topic_partitions[last_index]
79
+ partition_weight = partition_weights.dig(topic, partition)
80
+ if last_index == topic_partitions.size || acceptable_partition_weight - partition_weight < 0
81
+ member_id_to_acceptable_partition_weight[member_id] = acceptable_partition_weight
82
+ break
83
+ end
76
84
 
77
- last_index += partition_count
85
+ group_assignment[member_id].assign(topic, [partition])
86
+ last_index += 1
87
+ acceptable_partition_weight -= partition_weight
88
+ end
89
+ end
78
90
  end
79
91
 
80
92
  if last_index < topic_partitions.size
81
- member_ids = instance_id_to_member_ids.values.flatten
82
- topic_partitions[last_index, topic_partitions.size].each_with_index do |(topic, partition), index|
83
- member_id = member_ids[index % member_ids.size]
93
+ member_id_to_acceptable_partition_weight.sort_by { |_, remaining| -remaining }.each do |member_id, _|
94
+ topic, partition = topic_partitions[last_index]
84
95
  group_assignment[member_id].assign(topic, [partition])
96
+ last_index += 1
85
97
  end
86
98
  end
87
99
 
@@ -97,7 +109,18 @@ module Kafka
97
109
  instance_family, _ = instance_type.split(".")
98
110
 
99
111
  capacity = weights.dig(az, instance_family) || weights.dig(instance_family, az)
100
- capacity || instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)
112
+ (capacity || instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)).to_f
113
+ end
114
+
115
+ def build_partition_weights(topics)
116
+ # Duplicate the weights to not destruct @partition_weights or the return value of @partition_weights
117
+ weights = (@partition_weights.is_a?(Proc) ? @partition_weights.call() : @partition_weights).dup
118
+ topics.each do |t|
119
+ weights[t] = weights[t].dup || {}
120
+ weights[t].default = 1
121
+ end
122
+
123
+ weights
101
124
  end
102
125
  end
103
126
  end
@@ -9,10 +9,11 @@ module Kafka
9
9
  # @param availability_zone_weights [Hash, Proc]
10
10
  # @param weights [Hash, Proc]
11
11
  # @see Kafka::EC2::MixedInstanceAssignmentStrategy#initialize
12
- def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {})
12
+ def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
13
13
  @instance_family_weights = instance_family_weights
14
14
  @availability_zone_weights = availability_zone_weights
15
15
  @weights = weights
16
+ @partition_weights = partition_weights
16
17
  end
17
18
 
18
19
  def create(cluster:)
@@ -21,6 +22,7 @@ module Kafka
21
22
  instance_family_weights: @instance_family_weights,
22
23
  availability_zone_weights: @availability_zone_weights,
23
24
  weights: @weights,
25
+ partition_weights: @partition_weights,
24
26
  )
25
27
  end
26
28
  end
@@ -1,5 +1,5 @@
1
1
  module Kafka
2
2
  class EC2
3
- VERSION = "0.1.3"
3
+ VERSION = "0.1.4"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka-ec2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - abicky
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-07 00:00:00.000000000 Z
11
+ date: 2020-10-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-kafka
@@ -116,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0'
118
118
  requirements: []
119
- rubygems_version: 3.1.2
119
+ rubygems_version: 3.0.3
120
120
  signing_key:
121
121
  specification_version: 4
122
122
  summary: An extension of ruby-kafka for EC2