karafka 2.0.6 → 2.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f7c672af31943cc38566d944bc4cb0466145e98a2130295f3ca24c275dccdc64
4
- data.tar.gz: 7eeaea273e18f31a29c79591a2627c2f79f0915663157cc9e31e1dd354c9deb0
3
+ metadata.gz: 1a646d63d674e6ecb80625097426f48294fe8e8b3f3e9e020cf92645a5728251
4
+ data.tar.gz: 461c58c355f84b81c3396cb4164a426c3f1e87747b3101de1e7c6fbf2a876778
5
5
  SHA512:
6
- metadata.gz: '032765549abe7a8afb1866d6b8ac89ba711a8a04f1b7370018663fc140aa0d30a3966151d1524f49d12b1dfe7a4fafb72a008d0af2fe39ffc959e34c385f5469'
7
- data.tar.gz: 10194fcd2ada3015b1bcd549ce286dafd5f94051b98a1752e2b5d9767d7c99ceaff47ce84429e345a99fb920f51cfabd4b5779d29da99b1e11fa6981e33b437f
6
+ metadata.gz: 623832478b11b1fa61c7906f423417500838e30ef97256a59290d73dcfe17670cf7eb11adcf33d422ae590c6ce5f19215340c89d369f8ec5b4e609af07d0befb
7
+ data.tar.gz: 97e50ed131c939d09de884d3ddba826381e7d314d3bc4660ecf947fa6582d9e6f16e023dad121612eadffcd441a73874c96e287e5ec1885e008d843357e94b93
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.8 (2022-09-19)
4
+ - [Breaking change] Rename Virtual Partitions `concurrency` to `max_partitions` to avoid confusion (#1023).
5
+ - Allow for block based subscription groups management (#1030).
6
+
7
+ ## 2.0.7 (2022-09-05)
8
+ - [Breaking change] Redefine the Virtual Partitions routing DSL to accept concurrency
9
+ - Allow for `concurrency` setting in Virtual Partitions to extend or limit number of jobs per regular partition. This allows to make sure, we do not use all the threads on virtual partitions jobs
10
+ - Allow for creation of as many Virtual Partitions as needed, without taking global `concurrency` into consideration
11
+
3
12
  ## 2.0.6 (2022-09-02)
4
13
  - Improve client closing.
5
14
  - Fix for: Multiple LRJ topics fetched concurrently block ability for LRJ to kick in (#1002)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.6)
4
+ karafka (2.0.8)
5
5
  karafka-core (>= 2.0.2, < 3.0.0)
6
6
  rdkafka (>= 0.12)
7
7
  thor (>= 0.20)
data/config/errors.yml CHANGED
@@ -55,3 +55,5 @@ en:
55
55
 
56
56
  pro_consumer_group_topic:
57
57
  consumer_format: needs to inherit from Karafka::Pro::BaseConsumer and not Karafka::Consumer
58
+ virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
59
+ virtual_partitions.max_partitions_format: needs to be equl or more than 1
@@ -124,7 +124,7 @@ module Karafka
124
124
 
125
125
  # Method that will perform business logic and on data received from Kafka (it will consume
126
126
  # the data)
127
- # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
127
+ # @note This method needs to be implemented in a subclass. We stub it here as a failover if
128
128
  # someone forgets about it or makes on with typo
129
129
  def consume
130
130
  raise NotImplementedError, 'Implement this in a subclass'
@@ -35,7 +35,7 @@ module Karafka
35
35
 
36
36
  # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
37
37
  # this could create random markings
38
- next if topic.virtual_partitioner?
38
+ next if topic.virtual_partitions?
39
39
 
40
40
  mark_as_consumed(message)
41
41
  end
@@ -102,7 +102,7 @@ module Karafka
102
102
  else
103
103
  # If processing failed, we need to pause
104
104
  # For long running job this will overwrite the default never-ending pause and will cause
105
- # the processing th keep going after the error backoff
105
+ # the processing to keep going after the error backoff
106
106
  pause(@seek_offset || first_message.offset)
107
107
  end
108
108
  end
@@ -22,11 +22,31 @@ module Karafka
22
22
  ).fetch('en').fetch('validations').fetch('pro_consumer_group_topic')
23
23
  end
24
24
 
25
- virtual do |data|
25
+ nested(:virtual_partitions) do
26
+ required(:active) { |val| [true, false].include?(val) }
27
+ required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
28
+ required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
29
+ end
30
+
31
+ virtual do |data, errors|
32
+ next unless errors.empty?
26
33
  next if data[:consumer] < Karafka::Pro::BaseConsumer
27
34
 
28
35
  [[%i[consumer], :consumer_format]]
29
36
  end
37
+
38
+ # When virtual partitions are defined, partitioner needs to respond to `#call` and it
39
+ # cannot be nil
40
+ virtual do |data, errors|
41
+ next unless errors.empty?
42
+
43
+ virtual_partitions = data[:virtual_partitions]
44
+
45
+ next unless virtual_partitions[:active]
46
+ next if virtual_partitions[:partitioner].respond_to?(:call)
47
+
48
+ [[%i[virtual_partitions partitioner], :respond_to_call]]
49
+ end
30
50
  end
31
51
  end
32
52
  end
@@ -67,7 +67,7 @@ module Karafka
67
67
 
68
68
  # Loads routing extensions
69
69
  def load_routing_extensions
70
- ::Karafka::Routing::Topic.include(Routing::TopicExtensions)
70
+ ::Karafka::Routing::Topic.prepend(Routing::TopicExtensions)
71
71
  ::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
72
72
  end
73
73
  end
@@ -21,27 +21,25 @@ module Karafka
21
21
  def call(topic, messages)
22
22
  ktopic = @subscription_group.topics.find(topic)
23
23
 
24
- @concurrency ||= ::Karafka::App.config.concurrency
25
-
26
24
  # We only partition work if we have a virtual partitioner and more than one thread to
27
25
  # process the data. With one thread it is not worth partitioning the work as the work
28
26
  # itself will be assigned to one thread (pointless work)
29
- if ktopic.virtual_partitioner? && @concurrency > 1
27
+ if ktopic.virtual_partitions? && ktopic.virtual_partitions.max_partitions > 1
30
28
  # We need to reduce it to number of threads, so the group_id is not a direct effect
31
29
  # of the end user action. Otherwise the persistence layer for consumers would cache
32
30
  # it forever and it would cause memory leaks
33
31
  groupings = messages
34
- .group_by { |msg| ktopic.virtual_partitioner.call(msg) }
32
+ .group_by { |msg| ktopic.virtual_partitions.partitioner.call(msg) }
35
33
  .values
36
34
 
37
- # Reduce the max concurrency to a size that matches the concurrency
35
+ # Reduce the number of virtual partitions to a size that matches the max_partitions
38
36
  # As mentioned above we cannot use the partitioning keys directly as it could cause
39
37
  # memory leaks
40
38
  #
41
39
  # The algorithm here is simple, we assume that the most costly in terms of processing,
42
40
  # will be processing of the biggest group and we reduce the smallest once to have
43
- # max of groups equal to concurrency
44
- while groupings.size > @concurrency
41
+ # max of groups equal to max_partitions
42
+ while groupings.size > ktopic.virtual_partitions.max_partitions
45
43
  groupings.sort_by! { |grouping| -grouping.size }
46
44
 
47
45
  # Offset order needs to be maintained for virtual partitions
@@ -15,23 +15,59 @@ module Karafka
15
15
  module Routing
16
16
  # Routing extensions that allow to configure some extra PRO routing options
17
17
  module TopicExtensions
18
+ # Internal representation of the virtual partitions settings and configuration
19
+ # This allows us to abstract away things in a nice manner
20
+ #
21
+ # For features with more options than just on/off we use this approach as it simplifies
22
+ # the code. We do not use it for all not to create unneeded complexity
23
+ VirtualPartitions = Struct.new(
24
+ :active,
25
+ :partitioner,
26
+ :max_partitions,
27
+ keyword_init: true
28
+ ) { alias_method :active?, :active }
29
+
18
30
  class << self
19
31
  # @param base [Class] class we extend
20
- def included(base)
32
+ def prepended(base)
21
33
  base.attr_accessor :long_running_job
22
- base.attr_accessor :virtual_partitioner
23
34
  end
24
35
  end
25
36
 
26
- # @return [Boolean] true if virtual partitioner is defined, false otherwise
27
- def virtual_partitioner?
28
- virtual_partitioner != nil
37
+ # @param max_partitions [Integer] max number of virtual partitions that can come out of the
38
+ # single distribution flow. When set to more than the Karafka threading, will create
39
+ # more work than workers. When less, can ensure we have spare resources to process other
40
+ # things in parallel.
41
+ # @param partitioner [nil, #call] nil or callable partitioner
42
+ # @return [VirtualPartitions] method that allows to set the virtual partitions details
43
+ # during the routing configuration and then allows to retrieve it
44
+ def virtual_partitions(
45
+ max_partitions: Karafka::App.config.concurrency,
46
+ partitioner: nil
47
+ )
48
+ @virtual_partitions ||= VirtualPartitions.new(
49
+ active: !partitioner.nil?,
50
+ max_partitions: max_partitions,
51
+ partitioner: partitioner
52
+ )
53
+ end
54
+
55
+ # @return [Boolean] are virtual partitions enabled for given topic
56
+ def virtual_partitions?
57
+ virtual_partitions.active?
29
58
  end
30
59
 
31
60
  # @return [Boolean] is a given job on a topic a long-running one
32
61
  def long_running_job?
33
62
  @long_running_job || false
34
63
  end
64
+
65
+ # @return [Hash] hash with topic details and the extensions details
66
+ def to_h
67
+ super.merge(
68
+ virtual_partitions: virtual_partitions.to_h
69
+ )
70
+ end
35
71
  end
36
72
  end
37
73
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Karafka
4
4
  module Processing
5
- # Namespace for all the jobs that are suppose to run in workers.
5
+ # Namespace for all the jobs that are supposed to run in workers.
6
6
  module Jobs
7
7
  # Base class for all the jobs types that are suppose to run in workers threads.
8
8
  # Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
@@ -10,7 +10,7 @@ module Karafka
10
10
  class Base
11
11
  extend Forwardable
12
12
 
13
- # @note Since one job has always one executer, we use the jobs id and group id as reference
13
+ # @note Since one job has always one executor, we use the jobs id and group id as reference
14
14
  def_delegators :executor, :id, :group_id
15
15
 
16
16
  attr_reader :executor
@@ -13,7 +13,7 @@ module Karafka
13
13
  # @return [Karafka::Processing::JobsQueue]
14
14
  def initialize
15
15
  @queue = Queue.new
16
- # Those queues will act as a semaphores internally. Since we need an indicator for waiting
16
+ # Those queues will act as semaphores internally. Since we need an indicator for waiting
17
17
  # we could use Thread.pass but this is expensive. Instead we can just lock until any
18
18
  # of the workers finishes their work and we can re-check. This means that in the worse
19
19
  # scenario, we will context switch 10 times per poll instead of getting this thread
@@ -66,6 +66,16 @@ module Karafka
66
66
  end
67
67
  end
68
68
 
69
+ # Handles the simple routing case where we create one consumer group and allow for further
70
+ # subscription group customization
71
+ # @param subscription_group_name [String, Symbol] subscription group id
72
+ # @param block [Proc] further topics definitions
73
+ def subscription_group(subscription_group_name, &block)
74
+ consumer_group('app') do
75
+ target.public_send(:subscription_group=, subscription_group_name, &block)
76
+ end
77
+ end
78
+
69
79
  # In case we use simple style of routing, all topics will be assigned to the same consumer
70
80
  # group that will be based on the client_id
71
81
  #
@@ -10,6 +10,12 @@ module Karafka
10
10
  class ConsumerGroup
11
11
  attr_reader :id, :topics, :name
12
12
 
13
+ # This is a "virtual" attribute that is not building subscription groups.
14
+ # It allows us to store the "current" subscription group defined in the routing
15
+ # This subscription group id is then injected into topics, so we can compute the subscription
16
+ # groups
17
+ attr_accessor :current_subscription_group_name
18
+
13
19
  # @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
14
20
  # yet have an application client_id namespace, this will be added here by default.
15
21
  # We add it to make a multi-system development easier for people that don't use
@@ -32,7 +38,27 @@ module Karafka
32
38
  def topic=(name, &block)
33
39
  topic = Topic.new(name, self)
34
40
  @topics << Proxy.new(topic, &block).target
35
- @topics.last
41
+ built_topic = @topics.last
42
+ # We overwrite it conditionally in case it was not set by the user inline in the topic
43
+ # block definition
44
+ built_topic.subscription_group ||= current_subscription_group_name
45
+ built_topic
46
+ end
47
+
48
+ # Assigns the current subscription group id based on the defined one and allows for further
49
+ # topic definition
50
+ # @param name [String, Symbol]
51
+ # @param block [Proc] block that may include topics definitions
52
+ def subscription_group=(name, &block)
53
+ # We cast it here, so the routing supports symbol based but that's anyhow later on
54
+ # validated as a string
55
+ self.current_subscription_group_name = name.to_s
56
+
57
+ Proxy.new(self, &block)
58
+
59
+ # We need to reset the current subscription group after it is used, so it won't leak
60
+ # outside to other topics that would be defined without a defined subscription group
61
+ self.current_subscription_group_name = nil
36
62
  end
37
63
 
38
64
  # @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on
@@ -7,15 +7,6 @@ module Karafka
7
7
  class Proxy
8
8
  attr_reader :target
9
9
 
10
- # We should proxy only non ? and = methods as we want to have a regular dsl
11
- IGNORED_POSTFIXES = %w[
12
- ?
13
- =
14
- !
15
- ].freeze
16
-
17
- private_constant :IGNORED_POSTFIXES
18
-
19
10
  # @param target [Object] target object to which we proxy any DSL call
20
11
  # @param block [Proc] block that we want to evaluate in the proxy context
21
12
  def initialize(target, &block)
@@ -25,21 +16,23 @@ module Karafka
25
16
 
26
17
  # Translates the no "=" DSL of routing into elements assignments on target
27
18
  # @param method_name [Symbol] name of the missing method
28
- # @param arguments [Array] array with it's arguments
29
- # @param block [Proc] block provided to the method
30
- def method_missing(method_name, *arguments, &block)
19
+ def method_missing(method_name, ...)
31
20
  return super unless respond_to_missing?(method_name)
32
21
 
33
- @target.public_send(:"#{method_name}=", *arguments, &block)
22
+ if @target.respond_to?(:"#{method_name}=")
23
+ @target.public_send(:"#{method_name}=", ...)
24
+ else
25
+ @target.public_send(method_name, ...)
26
+ end
34
27
  end
35
28
 
36
29
  # Tells whether or not a given element exists on the target
37
30
  # @param method_name [Symbol] name of the missing method
38
31
  # @param include_private [Boolean] should we include private in the check as well
39
32
  def respond_to_missing?(method_name, include_private = false)
40
- return false if IGNORED_POSTFIXES.any? { |postfix| method_name.to_s.end_with?(postfix) }
41
-
42
- @target.respond_to?(:"#{method_name}=", include_private) || super
33
+ @target.respond_to?(:"#{method_name}=", include_private) ||
34
+ @target.respond_to?(method_name, include_private) ||
35
+ super
43
36
  end
44
37
  end
45
38
  end
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.6'
6
+ VERSION = '2.0.8'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.6
4
+ version: 2.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
36
36
  MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
37
37
  -----END CERTIFICATE-----
38
- date: 2022-09-02 00:00:00.000000000 Z
38
+ date: 2022-09-19 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
metadata.gz.sig CHANGED
Binary file