ruby-kafka-ec2 0.1.0 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a216c0064e93662929aa08a27aee3caad6eb7d4a7eb02a658b6edef1ced9fa33
4
- data.tar.gz: 5982cfb402ea097fbc45580b3bf17c88d1ef6fef4c9f3fc3c06b71c0ba9fd8f7
3
+ metadata.gz: 8dde731c3652090bf18202d68b916cbdcff9ed09673bd84d5f20470a37c63373
4
+ data.tar.gz: d1a95de4724b3b5f85230c55a70469cc5e6c1e6008423b83f74c415bf2c9d289
5
5
  SHA512:
6
- metadata.gz: 47ef0c231763ba3b9c8ee95417eba748a205e8791bd973bdebdb7e4b30aa103f32c965fbbc47cc8aa157f6ae80bd8e4f141ac153264eb3514accb569a375f8d5
7
- data.tar.gz: '009ad55259d086a25252b8fd0aa6f9f3eecdbf0c25b79ac00c07a920b28fe7d4d1758043e7ef804364e56c869bd57e51b51690b44c16745fba45dc12ec2bdb40'
6
+ metadata.gz: f37b8fa41b773933aac85f170884adb75fc0e446faf9fb92c109aa039f5a869874194dbdf3a9099899e273ef8543f75c5f7aca0fd99cff1845bc43ac081bde50
7
+ data.tar.gz: 78bc5df7157441563d73e19f35804069ddbb2c1863bccfe2711594c27a6caed78a5209fdd2ca7f55a3cf2302bed326ec47302720bf4253a9f8b4df950e5a0d0f
data/README.md CHANGED
@@ -77,6 +77,59 @@ consumer = Kafka::EC2.with_assignment_strategy_factory(assignment_strategy_facto
77
77
  end
78
78
  ```
79
79
 
80
+ You can also specify weights for each combination of availability zones and instance families:
81
+
82
+ ```ruby
83
+ assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
84
+ weights: ->() {
85
+ db_cluster = rds.describe_db_clusters(filters: [
86
+ { name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
87
+ ]).db_clusters.first
88
+ db_instance_id = db_cluster.db_cluster_members.find { |m| m.is_cluster_writer }.db_instance_identifier
89
+ db_instance = rds.describe_db_instances(filters: [
90
+ { name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
91
+ { name: "db-instance-id", values: [db_instance_id] },
92
+ ]).db_instances.first
93
+
94
+ weights_for_writer_az = {
95
+ "r4" => 1.00,
96
+ "r5" => 1.20,
97
+ "m5" => 1.35,
98
+ "c5" => 1.50,
99
+ }
100
+ weights_for_other_az = {
101
+ "r4" => 0.40,
102
+ "r5" => 0.70,
103
+ "m5" => 0.80,
104
+ "c5" => 1.00,
105
+ }
106
+ if db_instance.availability_zone == "ap-northeast-1a"
107
+ {
108
+ "ap-northeast-1a" => weights_for_writer_az,
109
+ "ap-northeast-1c" => weights_for_other_az,
110
+ }
111
+ else
112
+ {
113
+ "ap-northeast-1a" => weights_for_other_az,
114
+ "ap-northeast-1c" => weights_for_writer_az,,
115
+ }
116
+ end
117
+ },
118
+ )
119
+ ```
120
+
121
+ The strategy also has the option `partition_weights`. This is useful when the topic has some skewed partitions. Suppose the partition with ID 0 of the topic "foo" receives twice as many records as other partitions. To reduce the number of partitions assigned to the consumer that consumes the partition with ID 0, specify `partition_weights` like below:
122
+
123
+ ```ruby
124
+ assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
125
+ partition_weights: {
126
+ "foo" => {
127
+ 0 => 2,
128
+ },
129
+ }
130
+ )
131
+ ```
132
+
80
133
  ## Development
81
134
 
82
135
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -10,7 +10,7 @@ module Kafka
10
10
  module Ext
11
11
  module Protocol
12
12
  module JoinGroupRequest
13
- def initialize(*args, topics:, **kwargs)
13
+ def initialize(*args, topics: [], **kwargs)
14
14
  super
15
15
  if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
16
16
  user_data = Net::HTTP.start("169.254.169.254", 80) do |http|
@@ -18,10 +18,17 @@ module Kafka
18
18
  # is the availability zone and whose value is the weight. If the object is a proc,
19
19
  # it must returns such a hash and the proc is called every time the method "assign"
20
20
  # is called.
21
- def initialize(cluster:, instance_family_weights:, availability_zone_weights:)
21
+ # @param weights [Hash{String => Hash{String => Numeric}}, Proc] a hash whose the key
22
+ # is the availability zone or the instance family and whose value is the hash like
23
+ # instance_family_weights or availability_zone_weights. If the object is a proc,
24
+ # it must returns such a hash and the proc is called every time the method "assign"
25
+ # is called.
26
+ def initialize(cluster:, instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
22
27
  @cluster = cluster
23
28
  @instance_family_weights = instance_family_weights
24
29
  @availability_zone_weights = availability_zone_weights
30
+ @weights = weights
31
+ @partition_weights = partition_weights
25
32
  end
26
33
 
27
34
  # Assign the topic partitions to the group members.
@@ -38,12 +45,13 @@ module Kafka
38
45
 
39
46
  instance_family_to_capacity = @instance_family_weights.is_a?(Proc) ? @instance_family_weights.call() : @instance_family_weights
40
47
  az_to_capacity = @availability_zone_weights.is_a?(Proc) ? @availability_zone_weights.call() : @availability_zone_weights
48
+ weights = @weights.is_a?(Proc) ? @weights.call() : @weights
41
49
  members.each do |member_id|
42
50
  group_assignment[member_id] = Protocol::MemberAssignment.new
43
51
 
44
52
  instance_id, instance_type, az = member_id_to_metadata[member_id].split(",")
45
53
  instance_id_to_member_ids[instance_id] << member_id
46
- capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
54
+ capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
47
55
  instance_id_to_capacity[instance_id] += capacity
48
56
  total_capacity += capacity
49
57
  end
@@ -57,24 +65,36 @@ module Kafka
57
65
  Array.new(partitions.count) { topic }.zip(partitions)
58
66
  end
59
67
 
60
- partition_count_per_capacity = topic_partitions.size / total_capacity
68
+ partition_weights = build_partition_weights(topics)
69
+ partition_weight_per_capacity = topic_partitions.sum { |topic, partition| partition_weights.dig(topic, partition) } / total_capacity
70
+
61
71
  last_index = 0
62
- instance_id_to_capacity.sort_by { |_, capacity| -capacity }.each do |instance_id, capacity|
63
- partition_count = (capacity * partition_count_per_capacity).round
72
+ member_id_to_acceptable_partition_weight = {}
73
+ instance_id_to_capacity.each do |instance_id, capacity|
64
74
  member_ids = instance_id_to_member_ids[instance_id]
65
- topic_partitions[last_index, partition_count]&.each_with_index do |(topic, partition), index|
66
- member_id = member_ids[index % member_ids.size]
67
- group_assignment[member_id].assign(topic, [partition])
68
- end
75
+ member_ids.each do |member_id|
76
+ acceptable_partition_weight = capacity * partition_weight_per_capacity / member_ids.size
77
+ loop do
78
+ topic, partition = topic_partitions[last_index]
79
+ partition_weight = partition_weights.dig(topic, partition)
80
+ if last_index == topic_partitions.size || acceptable_partition_weight - partition_weight < 0
81
+ member_id_to_acceptable_partition_weight[member_id] = acceptable_partition_weight
82
+ break
83
+ end
69
84
 
70
- last_index += partition_count
85
+ group_assignment[member_id].assign(topic, [partition])
86
+ last_index += 1
87
+ acceptable_partition_weight -= partition_weight
88
+ end
89
+ end
71
90
  end
72
91
 
73
92
  if last_index < topic_partitions.size
74
- member_ids = instance_id_to_member_ids.values.flatten
75
- topic_partitions[last_index, topic_partitions.size].each_with_index do |(topic, partition), index|
76
- member_id = member_ids[index % member_ids.size]
93
+ member_id_to_acceptable_partition_weight.sort_by { |_, remaining| -remaining }.each do |member_id, _|
94
+ topic, partition = topic_partitions[last_index]
77
95
  group_assignment[member_id].assign(topic, [partition])
96
+ last_index += 1
97
+ break if last_index == topic_partitions.size
78
98
  end
79
99
  end
80
100
 
@@ -86,9 +106,22 @@ module Kafka
86
106
 
87
107
  private
88
108
 
89
- def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
109
+ def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
90
110
  instance_family, _ = instance_type.split(".")
91
- instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)
111
+
112
+ capacity = weights.dig(az, instance_family) || weights.dig(instance_family, az)
113
+ (capacity || instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)).to_f
114
+ end
115
+
116
+ def build_partition_weights(topics)
117
+ # Duplicate the weights to not destruct @partition_weights or the return value of @partition_weights
118
+ weights = (@partition_weights.is_a?(Proc) ? @partition_weights.call() : @partition_weights).dup
119
+ topics.each do |t|
120
+ weights[t] = weights[t].dup || {}
121
+ weights[t].default = 1
122
+ end
123
+
124
+ weights
92
125
  end
93
126
  end
94
127
  end
@@ -7,9 +7,13 @@ module Kafka
7
7
  class MixedInstanceAssignmentStrategyFactory
8
8
  # @param instance_family_weights [Hash, Proc]
9
9
  # @param availability_zone_weights [Hash, Proc]
10
- def initialize(instance_family_weights: {}, availability_zone_weights: {})
10
+ # @param weights [Hash, Proc]
11
+ # @see Kafka::EC2::MixedInstanceAssignmentStrategy#initialize
12
+ def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
11
13
  @instance_family_weights = instance_family_weights
12
14
  @availability_zone_weights = availability_zone_weights
15
+ @weights = weights
16
+ @partition_weights = partition_weights
13
17
  end
14
18
 
15
19
  def create(cluster:)
@@ -17,6 +21,8 @@ module Kafka
17
21
  cluster: cluster,
18
22
  instance_family_weights: @instance_family_weights,
19
23
  availability_zone_weights: @availability_zone_weights,
24
+ weights: @weights,
25
+ partition_weights: @partition_weights,
20
26
  )
21
27
  end
22
28
  end
@@ -1,5 +1,5 @@
1
1
  module Kafka
2
2
  class EC2
3
- VERSION = "0.1.0"
3
+ VERSION = "0.1.5"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka-ec2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - abicky
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-06-21 00:00:00.000000000 Z
11
+ date: 2020-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-kafka
@@ -116,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0'
118
118
  requirements: []
119
- rubygems_version: 3.1.2
119
+ rubygems_version: 3.0.3
120
120
  signing_key:
121
121
  specification_version: 4
122
122
  summary: An extension of ruby-kafka for EC2