RubyGems - ruby-kafka-ec2 - Versions diffs - 0.1.0 → 0.1.5 - Mend

ruby-kafka-ec2 0.1.0 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/README.md +53 -0
data/lib/kafka/ec2/ext/protocol/join_group_request.rb +1 -1
data/lib/kafka/ec2/mixed_instance_assignment_strategy.rb +48 -15
data/lib/kafka/ec2/mixed_instance_assignment_strategy_factory.rb +7 -1
data/lib/kafka/ec2/version.rb +1 -1
metadata +3 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a216c0064e93662929aa08a27aee3caad6eb7d4a7eb02a658b6edef1ced9fa33
-  data.tar.gz: 5982cfb402ea097fbc45580b3bf17c88d1ef6fef4c9f3fc3c06b71c0ba9fd8f7
+  metadata.gz: 8dde731c3652090bf18202d68b916cbdcff9ed09673bd84d5f20470a37c63373
+  data.tar.gz: d1a95de4724b3b5f85230c55a70469cc5e6c1e6008423b83f74c415bf2c9d289
 SHA512:
-  metadata.gz: 47ef0c231763ba3b9c8ee95417eba748a205e8791bd973bdebdb7e4b30aa103f32c965fbbc47cc8aa157f6ae80bd8e4f141ac153264eb3514accb569a375f8d5
-  data.tar.gz: '009ad55259d086a25252b8fd0aa6f9f3eecdbf0c25b79ac00c07a920b28fe7d4d1758043e7ef804364e56c869bd57e51b51690b44c16745fba45dc12ec2bdb40'
+  metadata.gz: f37b8fa41b773933aac85f170884adb75fc0e446faf9fb92c109aa039f5a869874194dbdf3a9099899e273ef8543f75c5f7aca0fd99cff1845bc43ac081bde50
+  data.tar.gz: 78bc5df7157441563d73e19f35804069ddbb2c1863bccfe2711594c27a6caed78a5209fdd2ca7f55a3cf2302bed326ec47302720bf4253a9f8b4df950e5a0d0f

data/README.md CHANGED

@@ -77,6 +77,59 @@ consumer = Kafka::EC2.with_assignment_strategy_factory(assignment_strategy_facto
 end
 ```
+You can also specify weights for each combination of availability zones and instance families:
+```ruby
+assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
+  weights: ->() {
+    db_cluster = rds.describe_db_clusters(filters: [
+      { name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
+    ]).db_clusters.first
+    db_instance_id = db_cluster.db_cluster_members.find { |m| m.is_cluster_writer }.db_instance_identifier
+    db_instance = rds.describe_db_instances(filters: [
+      { name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
+      { name: "db-instance-id", values: [db_instance_id] },
+    ]).db_instances.first
+    weights_for_writer_az = {
+      "r4" => 1.00,
+      "r5" => 1.20,
+      "m5" => 1.35,
+      "c5" => 1.50,
+    }
+    weights_for_other_az = {
+      "r4" => 0.40,
+      "r5" => 0.70,
+      "m5" => 0.80,
+      "c5" => 1.00,
+    }
+    if db_instance.availability_zone == "ap-northeast-1a"
+      {
+        "ap-northeast-1a" => weights_for_writer_az,
+        "ap-northeast-1c" => weights_for_other_az,
+      }
+    else
+      {
+        "ap-northeast-1a" => weights_for_other_az,
+        "ap-northeast-1c" => weights_for_writer_az,,
+      }
+    end
+  },
+)
+```
+The strategy also has the option `partition_weights`. This is useful when the topic has some skewed partitions. Suppose the partition with ID 0 of the topic "foo" receives twice as many records as other partitions. To reduce the number of partitions assigned to the consumer that consumes the partition with ID 0, specify `partition_weights` like below:
+```ruby
+assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
+  partition_weights: {
+    "foo" => {
+      0 => 2,
+    },
+  }
+)
+```
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

data/lib/kafka/ec2/ext/protocol/join_group_request.rb CHANGED

@@ -10,7 +10,7 @@ module Kafka
     module Ext
       module Protocol
         module JoinGroupRequest
-          def initialize(*args, topics:, **kwargs)
+          def initialize(*args, topics: [], **kwargs)
             super
             if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
               user_data = Net::HTTP.start("169.254.169.254", 80) do |http|

data/lib/kafka/ec2/mixed_instance_assignment_strategy.rb CHANGED

@@ -18,10 +18,17 @@ module Kafka
       #   is the availability zone and whose value is the weight. If the object is a proc,
       #   it must returns such a hash and the proc is called every time the method "assign"
       #   is called.
-      def initialize(cluster:, instance_family_weights:, availability_zone_weights:)
+      # @param weights [Hash{String => Hash{String => Numeric}}, Proc] a hash whose the key
+      #   is the availability zone or the instance family and whose value is the hash like
+      #   instance_family_weights or availability_zone_weights. If the object is a proc,
+      #   it must returns such a hash and the proc is called every time the method "assign"
+      #   is called.
+      def initialize(cluster:, instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
         @cluster = cluster
         @instance_family_weights = instance_family_weights
         @availability_zone_weights = availability_zone_weights
+        @weights = weights
+        @partition_weights = partition_weights
       end
       # Assign the topic partitions to the group members.
@@ -38,12 +45,13 @@ module Kafka
         instance_family_to_capacity = @instance_family_weights.is_a?(Proc) ? @instance_family_weights.call() : @instance_family_weights
         az_to_capacity = @availability_zone_weights.is_a?(Proc) ? @availability_zone_weights.call() : @availability_zone_weights
+        weights = @weights.is_a?(Proc) ? @weights.call() : @weights
         members.each do |member_id|
           group_assignment[member_id] = Protocol::MemberAssignment.new
           instance_id, instance_type, az = member_id_to_metadata[member_id].split(",")
           instance_id_to_member_ids[instance_id] << member_id
-          capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
+          capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
           instance_id_to_capacity[instance_id] += capacity
           total_capacity += capacity
         end
@@ -57,24 +65,36 @@ module Kafka
           Array.new(partitions.count) { topic }.zip(partitions)
         end
-        partition_count_per_capacity = topic_partitions.size / total_capacity
+        partition_weights = build_partition_weights(topics)
+        partition_weight_per_capacity = topic_partitions.sum { |topic, partition| partition_weights.dig(topic, partition) } / total_capacity
         last_index = 0
-        instance_id_to_capacity.sort_by { |_, capacity| -capacity }.each do |instance_id, capacity|
-          partition_count = (capacity * partition_count_per_capacity).round
+        member_id_to_acceptable_partition_weight = {}
+        instance_id_to_capacity.each do |instance_id, capacity|
           member_ids = instance_id_to_member_ids[instance_id]
-          topic_partitions[last_index, partition_count]&.each_with_index do |(topic, partition), index|
-            member_id = member_ids[index % member_ids.size]
-            group_assignment[member_id].assign(topic, [partition])
-          end
+          member_ids.each do |member_id|
+            acceptable_partition_weight = capacity * partition_weight_per_capacity / member_ids.size
+            loop do
+              topic, partition = topic_partitions[last_index]
+              partition_weight = partition_weights.dig(topic, partition)
+              if last_index == topic_partitions.size || acceptable_partition_weight - partition_weight < 0
+                member_id_to_acceptable_partition_weight[member_id] = acceptable_partition_weight
+                break
+              end
-          last_index += partition_count
+              group_assignment[member_id].assign(topic, [partition])
+              last_index += 1
+              acceptable_partition_weight -= partition_weight
+            end
+          end
         end
         if last_index < topic_partitions.size
-          member_ids = instance_id_to_member_ids.values.flatten
-          topic_partitions[last_index, topic_partitions.size].each_with_index do |(topic, partition), index|
-            member_id = member_ids[index % member_ids.size]
+          member_id_to_acceptable_partition_weight.sort_by { |_, remaining| -remaining }.each do |member_id, _|
+            topic, partition = topic_partitions[last_index]
             group_assignment[member_id].assign(topic, [partition])
+            last_index += 1
+            break if last_index == topic_partitions.size
           end
         end
@@ -86,9 +106,22 @@ module Kafka
       private
-      def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
+      def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity, weights)
         instance_family, _ = instance_type.split(".")
-        instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)
+        capacity = weights.dig(az, instance_family) || weights.dig(instance_family, az)
+        (capacity || instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)).to_f
+      end
+      def build_partition_weights(topics)
+        # Duplicate the weights to not destruct @partition_weights or the return value of @partition_weights
+        weights = (@partition_weights.is_a?(Proc) ? @partition_weights.call() : @partition_weights).dup
+        topics.each do |t|
+          weights[t] = weights[t].dup || {}
+          weights[t].default = 1
+        end
+        weights
       end
     end
   end

data/lib/kafka/ec2/mixed_instance_assignment_strategy_factory.rb CHANGED

@@ -7,9 +7,13 @@ module Kafka
     class MixedInstanceAssignmentStrategyFactory
       # @param instance_family_weights [Hash, Proc]
       # @param availability_zone_weights [Hash, Proc]
-      def initialize(instance_family_weights: {}, availability_zone_weights: {})
+      # @param weights [Hash, Proc]
+      # @see Kafka::EC2::MixedInstanceAssignmentStrategy#initialize
+      def initialize(instance_family_weights: {}, availability_zone_weights: {}, weights: {}, partition_weights: {})
         @instance_family_weights = instance_family_weights
         @availability_zone_weights = availability_zone_weights
+        @weights = weights
+        @partition_weights = partition_weights
       end
       def create(cluster:)
@@ -17,6 +21,8 @@ module Kafka
           cluster: cluster,
           instance_family_weights: @instance_family_weights,
           availability_zone_weights: @availability_zone_weights,
+          weights: @weights,
+          partition_weights: @partition_weights,
         )
       end
     end

data/lib/kafka/ec2/version.rb CHANGED

@@ -1,5 +1,5 @@
 module Kafka
   class EC2
-    VERSION = "0.1.0"
+    VERSION = "0.1.5"
   end
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ruby-kafka-ec2
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.5
 platform: ruby
 authors:
 - abicky
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-06-21 00:00:00.000000000 Z
+date: 2020-10-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ruby-kafka
@@ -116,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.1.2
+rubygems_version: 3.0.3
 signing_key:
 specification_version: 4
 summary: An extension of ruby-kafka for EC2