ruby-kafka-ec2 0.1.0 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a216c0064e93662929aa08a27aee3caad6eb7d4a7eb02a658b6edef1ced9fa33
4
- data.tar.gz: 5982cfb402ea097fbc45580b3bf17c88d1ef6fef4c9f3fc3c06b71c0ba9fd8f7
3
+ metadata.gz: 8dde731c3652090bf18202d68b916cbdcff9ed09673bd84d5f20470a37c63373
4
+ data.tar.gz: d1a95de4724b3b5f85230c55a70469cc5e6c1e6008423b83f74c415bf2c9d289
5
5
  SHA512:
6
- metadata.gz: 47ef0c231763ba3b9c8ee95417eba748a205e8791bd973bdebdb7e4b30aa103f32c965fbbc47cc8aa157f6ae80bd8e4f141ac153264eb3514accb569a375f8d5
7
- data.tar.gz: '009ad55259d086a25252b8fd0aa6f9f3eecdbf0c25b79ac00c07a920b28fe7d4d1758043e7ef804364e56c869bd57e51b51690b44c16745fba45dc12ec2bdb40'
6
+ metadata.gz: f37b8fa41b773933aac85f170884adb75fc0e446faf9fb92c109aa039f5a869874194dbdf3a9099899e273ef8543f75c5f7aca0fd99cff1845bc43ac081bde50
7
+ data.tar.gz: 78bc5df7157441563d73e19f35804069ddbb2c1863bccfe2711594c27a6caed78a5209fdd2ca7f55a3cf2302bed326ec47302720bf4253a9f8b4df950e5a0d0f
data/README.md CHANGED
@@ -77,6 +77,59 @@ consumer = Kafka::EC2.with_assignment_strategy_factory(assignment_strategy_facto
77
77
  end
78
78
  ```
79
79
 
80
+ You can also specify weights for each combination of availability zones and instance families:
81
+
82
+ ```ruby
83
+ assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
84
+ weights: ->() {
85
+ db_cluster = rds.describe_db_clusters(filters: [
86
+ { name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
87
+ ]).db_clusters.first
88
+ db_instance_id = db_cluster.db_cluster_members.find { |m| m.is_cluster_writer }.db_instance_identifier
89
+ db_instance = rds.describe_db_instances(filters: [
90
+ { name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
91
+ { name: "db-instance-id", values: [db_instance_id] },
92
+ ]).db_instances.first
93
+
94
+ weights_for_writer_az = {
95
+ "r4" => 1.00,
96
+ "r5" => 1.20,
97
+ "m5" => 1.35,
98
+ "c5" => 1.50,
99
+ }
100
+ weights_for_other_az = {
101
+ "r4" => 0.40,
102
+ "r5" => 0.70,
103
+ "m5" => 0.80,
104
+ "c5" => 1.00,
105
+ }
106
+ if db_instance.availability_zone == "ap-northeast-1a"
107
+ {
108
+ "ap-northeast-1a" => weights_for_writer_az,
109
+ "ap-northeast-1c" => weights_for_other_az,
110
+ }
111
+ else
112
+ {
113
+ "ap-northeast-1a" => weights_for_other_az,
114
+ "ap-northeast-1c" => weights_for_writer_az,,
115
+ }
116
+ end
117
+ },
118
+ )
119
+ ```
120
+
121
+ The strategy also has the option `partition_weights`. This is useful when the topic has some skewed partitions. Suppose the partition with ID 0 of the topic "foo" receives twice as many records as other partitions. To reduce the number of partitions assigned to the consumer that consumes the partition with ID 0, specify `partition_weights` like below:
122
+
123
+ ```ruby
124
+ assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
125
+ partition_weights: {
126
+ "foo" => {
127
+ 0 => 2,
128
+ },
129
+ }
130
+ )
131
+ ```
132
+
80
133
  ## Development
81
134
 
82
135
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -10,7 +10,7 @@ module Kafka
10
10
  module Ext
11
11
  module Protocol
12
12
  module JoinGroupRequest
13
- def initialize(*args, topics:, **kwargs)
13
+ def initialize(*args, topics: [], **kwargs)
14
14
  super
15
15
  if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
16
16
  user_data = Net::HTTP.start("169.254.169.254", 80) do |http|
@@ -18,10 +18,17 @@ module Kafka
18
18
  # is the availability zone and whose value is the weight. If the object is a proc,
19
19
  # it must returns such a hash and the proc is called every time the method "assign"
20
20
  # is called.
21
- def initialize(cluster:, instance_family_weights:, availability_zone_weights:)
21
+ # @param weights [Hash{String => Hash{String => Numeric}}, Proc] a hash whose the key
22
+ # is the availability zone or the instance family and whose value is the hash like
23
+ # instance_family_weights or availability_zone_weights. If the object is a proc,
24
+ # it must returns such a hash and the proc is called every time the method "assign"
25
+ # is called.
26
+ def initialize(cluster:, instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
22
27
  @cluster = cluster
23
28
  @instance_family_weights = instance_family_weights
24
29
  @availability_zone_weights = availability_zone_weights
30
+ @weights = weights
31
+ @partition_weights = partition_weights
25
32
  end
26
33
 
27
34
  # Assign the topic partitions to the group members.
@@ -38,12 +45,13 @@ module Kafka
38
45
 
39
46
  instance_family_to_capacity = @instance_family_weights.is_a?(Proc) ? @instance_family_weights.call() : @instance_family_weights
40
47
  az_to_capacity = @availability_zone_weights.is_a?(Proc) ? @availability_zone_weights.call() : @availability_zone_weights
48
+ weights = @weights.is_a?(Proc) ? @weights.call() : @weights
41
49
  members.each do |member_id|
42
50
  group_assignment[member_id] = Protocol::MemberAssignment.new
43
51
 
44
52
  instance_id, instance_type, az = member_id_to_metadata[member_id].split(",")
45
53
  instance_id_to_member_ids[instance_id] << member_id
46
- capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
54
+ capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
47
55
  instance_id_to_capacity[instance_id] += capacity
48
56
  total_capacity += capacity
49
57
  end
@@ -57,24 +65,36 @@ module Kafka
57
65
  Array.new(partitions.count) { topic }.zip(partitions)
58
66
  end
59
67
 
60
- partition_count_per_capacity = topic_partitions.size / total_capacity
68
+ partition_weights = build_partition_weights(topics)
69
+ partition_weight_per_capacity = topic_partitions.sum { |topic, partition| partition_weights.dig(topic, partition) } / total_capacity
70
+
61
71
  last_index = 0
62
- instance_id_to_capacity.sort_by { |_, capacity| -capacity }.each do |instance_id, capacity|
63
- partition_count = (capacity * partition_count_per_capacity).round
72
+ member_id_to_acceptable_partition_weight = {}
73
+ instance_id_to_capacity.each do |instance_id, capacity|
64
74
  member_ids = instance_id_to_member_ids[instance_id]
65
- topic_partitions[last_index, partition_count]&.each_with_index do |(topic, partition), index|
66
- member_id = member_ids[index % member_ids.size]
67
- group_assignment[member_id].assign(topic, [partition])
68
- end
75
+ member_ids.each do |member_id|
76
+ acceptable_partition_weight = capacity * partition_weight_per_capacity / member_ids.size
77
+ loop do
78
+ topic, partition = topic_partitions[last_index]
79
+ partition_weight = partition_weights.dig(topic, partition)
80
+ if last_index == topic_partitions.size || acceptable_partition_weight - partition_weight < 0
81
+ member_id_to_acceptable_partition_weight[member_id] = acceptable_partition_weight
82
+ break
83
+ end
69
84
 
70
- last_index += partition_count
85
+ group_assignment[member_id].assign(topic, [partition])
86
+ last_index += 1
87
+ acceptable_partition_weight -= partition_weight
88
+ end
89
+ end
71
90
  end
72
91
 
73
92
  if last_index < topic_partitions.size
74
- member_ids = instance_id_to_member_ids.values.flatten
75
- topic_partitions[last_index, topic_partitions.size].each_with_index do |(topic, partition), index|
76
- member_id = member_ids[index % member_ids.size]
93
+ member_id_to_acceptable_partition_weight.sort_by { |_, remaining| -remaining }.each do |member_id, _|
94
+ topic, partition = topic_partitions[last_index]
77
95
  group_assignment[member_id].assign(topic, [partition])
96
+ last_index += 1
97
+ break if last_index == topic_partitions.size
78
98
  end
79
99
  end
80
100
 
@@ -86,9 +106,22 @@ module Kafka
86
106
 
87
107
  private
88
108
 
89
- def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
109
+ def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
90
110
  instance_family, _ = instance_type.split(".")
91
- instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)
111
+
112
+ capacity = weights.dig(az, instance_family) || weights.dig(instance_family, az)
113
+ (capacity || instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)).to_f
114
+ end
115
+
116
+ def build_partition_weights(topics)
117
+ # Duplicate the weights to not destruct @partition_weights or the return value of @partition_weights
118
+ weights = (@partition_weights.is_a?(Proc) ? @partition_weights.call() : @partition_weights).dup
119
+ topics.each do |t|
120
+ weights[t] = weights[t].dup || {}
121
+ weights[t].default = 1
122
+ end
123
+
124
+ weights
92
125
  end
93
126
  end
94
127
  end
@@ -7,9 +7,13 @@ module Kafka
7
7
  class MixedInstanceAssignmentStrategyFactory
8
8
  # @param instance_family_weights [Hash, Proc]
9
9
  # @param availability_zone_weights [Hash, Proc]
10
- def initialize(instance_family_weights: {}, availability_zone_weights: {})
10
+ # @param weights [Hash, Proc]
11
+ # @see Kafka::EC2::MixedInstanceAssignmentStrategy#initialize
12
+ def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
11
13
  @instance_family_weights = instance_family_weights
12
14
  @availability_zone_weights = availability_zone_weights
15
+ @weights = weights
16
+ @partition_weights = partition_weights
13
17
  end
14
18
 
15
19
  def create(cluster:)
@@ -17,6 +21,8 @@ module Kafka
17
21
  cluster: cluster,
18
22
  instance_family_weights: @instance_family_weights,
19
23
  availability_zone_weights: @availability_zone_weights,
24
+ weights: @weights,
25
+ partition_weights: @partition_weights,
20
26
  )
21
27
  end
22
28
  end
@@ -1,5 +1,5 @@
1
1
  module Kafka
2
2
  class EC2
3
- VERSION = "0.1.0"
3
+ VERSION = "0.1.5"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka-ec2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - abicky
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-06-21 00:00:00.000000000 Z
11
+ date: 2020-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-kafka
@@ -116,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0'
118
118
  requirements: []
119
- rubygems_version: 3.1.2
119
+ rubygems_version: 3.0.3
120
120
  signing_key:
121
121
  specification_version: 4
122
122
  summary: An extension of ruby-kafka for EC2