karafka 2.0.6 → 2.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f7c672af31943cc38566d944bc4cb0466145e98a2130295f3ca24c275dccdc64
4
- data.tar.gz: 7eeaea273e18f31a29c79591a2627c2f79f0915663157cc9e31e1dd354c9deb0
3
+ metadata.gz: 0abed3f97a58be6b48f640468f7d7e6d48bc0960596b21d022b4616dd047be28
4
+ data.tar.gz: 48143253beee640e25e47a81474767c179e715e855d6173b59566483a57af5a8
5
5
  SHA512:
6
- metadata.gz: '032765549abe7a8afb1866d6b8ac89ba711a8a04f1b7370018663fc140aa0d30a3966151d1524f49d12b1dfe7a4fafb72a008d0af2fe39ffc959e34c385f5469'
7
- data.tar.gz: 10194fcd2ada3015b1bcd549ce286dafd5f94051b98a1752e2b5d9767d7c99ceaff47ce84429e345a99fb920f51cfabd4b5779d29da99b1e11fa6981e33b437f
6
+ metadata.gz: 9c9f8c170ac82fc0f1eb6ea41698dcd82cc525006931a59443d004c94eb18b56ffcb67eb1eb45fcc1fd557fee22e6e63ceb7a8a001245469e3e574d87c88c8e8
7
+ data.tar.gz: 47bc7e7dfe5ca3d503a3cb18da4e4b95c076197dc26b5633195e169d3f4d94da4effaf27bd4360ddff1481031b1ee20f61e465e24f6984570f6067ca4fbd51ea
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.7 (Unreleased)
4
+ - [Breaking change] Redefine the Virtual Partitions routing DSL to accept concurrency
5
+ - Allow for `concurrency` setting in Virtual Partitions to extend or limit number of jobs per regular partition. This allows to make sure, we do not use all the threads on virtual partitions jobs
6
+ - Allow for creation of as many Virtual Partitions as needed, without taking global `concurrency` into consideration
7
+
3
8
  ## 2.0.6 (2022-09-02)
4
9
  - Improve client closing.
5
10
  - Fix for: Multiple LRJ topics fetched concurrently block ability for LRJ to kick in (#1002)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.6)
4
+ karafka (2.0.7)
5
5
  karafka-core (>= 2.0.2, < 3.0.0)
6
6
  rdkafka (>= 0.12)
7
7
  thor (>= 0.20)
data/config/errors.yml CHANGED
@@ -55,3 +55,5 @@ en:
55
55
 
56
56
  pro_consumer_group_topic:
57
57
  consumer_format: needs to inherit from Karafka::Pro::BaseConsumer and not Karafka::Consumer
58
+ virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
59
+ virtual_partitions.concurrency_format: needs to be equl or more than 1
@@ -35,7 +35,7 @@ module Karafka
35
35
 
36
36
  # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
37
37
  # this could create random markings
38
- next if topic.virtual_partitioner?
38
+ next if topic.virtual_partitions?
39
39
 
40
40
  mark_as_consumed(message)
41
41
  end
@@ -22,11 +22,31 @@ module Karafka
22
22
  ).fetch('en').fetch('validations').fetch('pro_consumer_group_topic')
23
23
  end
24
24
 
25
- virtual do |data|
25
+ nested(:virtual_partitions) do
26
+ required(:active) { |val| [true, false].include?(val) }
27
+ required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
28
+ required(:concurrency) { |val| val.is_a?(Integer) && val >= 1 }
29
+ end
30
+
31
+ virtual do |data, errors|
32
+ next unless errors.empty?
26
33
  next if data[:consumer] < Karafka::Pro::BaseConsumer
27
34
 
28
35
  [[%i[consumer], :consumer_format]]
29
36
  end
37
+
38
+ # When virtual partitions are defined, partitioner needs to respond to `#call` and it
39
+ # cannot be nil
40
+ virtual do |data, errors|
41
+ next unless errors.empty?
42
+
43
+ virtual_partitions = data[:virtual_partitions]
44
+
45
+ next unless virtual_partitions[:active]
46
+ next if virtual_partitions[:partitioner].respond_to?(:call)
47
+
48
+ [[%i[virtual_partitions partitioner], :respond_to_call]]
49
+ end
30
50
  end
31
51
  end
32
52
  end
@@ -67,7 +67,7 @@ module Karafka
67
67
 
68
68
  # Loads routing extensions
69
69
  def load_routing_extensions
70
- ::Karafka::Routing::Topic.include(Routing::TopicExtensions)
70
+ ::Karafka::Routing::Topic.prepend(Routing::TopicExtensions)
71
71
  ::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
72
72
  end
73
73
  end
@@ -21,17 +21,15 @@ module Karafka
21
21
  def call(topic, messages)
22
22
  ktopic = @subscription_group.topics.find(topic)
23
23
 
24
- @concurrency ||= ::Karafka::App.config.concurrency
25
-
26
24
  # We only partition work if we have a virtual partitioner and more than one thread to
27
25
  # process the data. With one thread it is not worth partitioning the work as the work
28
26
  # itself will be assigned to one thread (pointless work)
29
- if ktopic.virtual_partitioner? && @concurrency > 1
27
+ if ktopic.virtual_partitions? && ktopic.virtual_partitions.concurrency > 1
30
28
  # We need to reduce it to number of threads, so the group_id is not a direct effect
31
29
  # of the end user action. Otherwise the persistence layer for consumers would cache
32
30
  # it forever and it would cause memory leaks
33
31
  groupings = messages
34
- .group_by { |msg| ktopic.virtual_partitioner.call(msg) }
32
+ .group_by { |msg| ktopic.virtual_partitions.partitioner.call(msg) }
35
33
  .values
36
34
 
37
35
  # Reduce the max concurrency to a size that matches the concurrency
@@ -41,7 +39,7 @@ module Karafka
41
39
  # The algorithm here is simple, we assume that the most costly in terms of processing,
42
40
  # will be processing of the biggest group and we reduce the smallest once to have
43
41
  # max of groups equal to concurrency
44
- while groupings.size > @concurrency
42
+ while groupings.size > ktopic.virtual_partitions.concurrency
45
43
  groupings.sort_by! { |grouping| -grouping.size }
46
44
 
47
45
  # Offset order needs to be maintained for virtual partitions
@@ -15,23 +15,59 @@ module Karafka
15
15
  module Routing
16
16
  # Routing extensions that allow to configure some extra PRO routing options
17
17
  module TopicExtensions
18
+ # Internal representation of the virtual partitions settings and configuration
19
+ # This allows us to abstract away things in a nice manner
20
+ #
21
+ # For features with more options than just on/off we use this approach as it simplifies
22
+ # the code. We do not use it for all not to create unneeded complexity
23
+ VirtualPartitions = Struct.new(
24
+ :active,
25
+ :partitioner,
26
+ :concurrency,
27
+ keyword_init: true
28
+ ) { alias_method :active?, :active }
29
+
18
30
  class << self
19
31
  # @param base [Class] class we extend
20
- def included(base)
32
+ def prepended(base)
21
33
  base.attr_accessor :long_running_job
22
- base.attr_accessor :virtual_partitioner
23
34
  end
24
35
  end
25
36
 
26
- # @return [Boolean] true if virtual partitioner is defined, false otherwise
27
- def virtual_partitioner?
28
- virtual_partitioner != nil
37
+ # @param concurrency [Integer] max number of virtual partitions that can come out of the
38
+ # single distribution flow. When set to more than the Karafka threading, will create
39
+ # more work than workers. When less, can ensure we have spare resources to process other
40
+ # things in parallel.
41
+ # @param partitioner [nil, #call] nil or callable partitioner
42
+ # @return [VirtualPartitions] method that allows to set the virtual partitions details
43
+ # during the routing configuration and then allows to retrieve it
44
+ def virtual_partitions(
45
+ concurrency: Karafka::App.config.concurrency,
46
+ partitioner: nil
47
+ )
48
+ @virtual_partitions ||= VirtualPartitions.new(
49
+ active: !partitioner.nil?,
50
+ concurrency: concurrency,
51
+ partitioner: partitioner
52
+ )
53
+ end
54
+
55
+ # @return [Boolean] are virtual partitions enabled for given topic
56
+ def virtual_partitions?
57
+ virtual_partitions.active?
29
58
  end
30
59
 
31
60
  # @return [Boolean] is a given job on a topic a long-running one
32
61
  def long_running_job?
33
62
  @long_running_job || false
34
63
  end
64
+
65
+ # @return [Hash] hash with topic details and the extensions details
66
+ def to_h
67
+ super.merge(
68
+ virtual_partitions: virtual_partitions.to_h
69
+ )
70
+ end
35
71
  end
36
72
  end
37
73
  end
@@ -7,15 +7,6 @@ module Karafka
7
7
  class Proxy
8
8
  attr_reader :target
9
9
 
10
- # We should proxy only non ? and = methods as we want to have a regular dsl
11
- IGNORED_POSTFIXES = %w[
12
- ?
13
- =
14
- !
15
- ].freeze
16
-
17
- private_constant :IGNORED_POSTFIXES
18
-
19
10
  # @param target [Object] target object to which we proxy any DSL call
20
11
  # @param block [Proc] block that we want to evaluate in the proxy context
21
12
  def initialize(target, &block)
@@ -25,21 +16,23 @@ module Karafka
25
16
 
26
17
  # Translates the no "=" DSL of routing into elements assignments on target
27
18
  # @param method_name [Symbol] name of the missing method
28
- # @param arguments [Array] array with it's arguments
29
- # @param block [Proc] block provided to the method
30
- def method_missing(method_name, *arguments, &block)
19
+ def method_missing(method_name, ...)
31
20
  return super unless respond_to_missing?(method_name)
32
21
 
33
- @target.public_send(:"#{method_name}=", *arguments, &block)
22
+ if @target.respond_to?(:"#{method_name}=")
23
+ @target.public_send(:"#{method_name}=", ...)
24
+ else
25
+ @target.public_send(method_name, ...)
26
+ end
34
27
  end
35
28
 
36
29
  # Tells whether or not a given element exists on the target
37
30
  # @param method_name [Symbol] name of the missing method
38
31
  # @param include_private [Boolean] should we include private in the check as well
39
32
  def respond_to_missing?(method_name, include_private = false)
40
- return false if IGNORED_POSTFIXES.any? { |postfix| method_name.to_s.end_with?(postfix) }
41
-
42
- @target.respond_to?(:"#{method_name}=", include_private) || super
33
+ @target.respond_to?(:"#{method_name}=", include_private) ||
34
+ @target.respond_to?(method_name, include_private) ||
35
+ super
43
36
  end
44
37
  end
45
38
  end
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.6'
6
+ VERSION = '2.0.7'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.6
4
+ version: 2.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
36
36
  MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
37
37
  -----END CERTIFICATE-----
38
- date: 2022-09-02 00:00:00.000000000 Z
38
+ date: 2022-09-05 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
metadata.gz.sig CHANGED
Binary file