karafka 2.0.6 → 2.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +1 -1
- data/config/errors.yml +2 -0
- data/lib/karafka/base_consumer.rb +1 -1
- data/lib/karafka/pro/active_job/consumer.rb +1 -1
- data/lib/karafka/pro/base_consumer.rb +1 -1
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +21 -1
- data/lib/karafka/pro/loader.rb +1 -1
- data/lib/karafka/pro/processing/partitioner.rb +5 -7
- data/lib/karafka/pro/routing/topic_extensions.rb +41 -5
- data/lib/karafka/processing/jobs/base.rb +2 -2
- data/lib/karafka/processing/jobs_queue.rb +1 -1
- data/lib/karafka/routing/builder.rb +10 -0
- data/lib/karafka/routing/consumer_group.rb +27 -1
- data/lib/karafka/routing/proxy.rb +9 -16
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a646d63d674e6ecb80625097426f48294fe8e8b3f3e9e020cf92645a5728251
|
4
|
+
data.tar.gz: 461c58c355f84b81c3396cb4164a426c3f1e87747b3101de1e7c6fbf2a876778
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 623832478b11b1fa61c7906f423417500838e30ef97256a59290d73dcfe17670cf7eb11adcf33d422ae590c6ce5f19215340c89d369f8ec5b4e609af07d0befb
|
7
|
+
data.tar.gz: 97e50ed131c939d09de884d3ddba826381e7d314d3bc4660ecf947fa6582d9e6f16e023dad121612eadffcd441a73874c96e287e5ec1885e008d843357e94b93
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.0.8 (2022-09-19)
|
4
|
+
- [Breaking change] Rename Virtual Partitions `concurrency` to `max_partitions` to avoid confusion (#1023).
|
5
|
+
- Allow for block based subscription groups management (#1030).
|
6
|
+
|
7
|
+
## 2.0.7 (2022-09-05)
|
8
|
+
- [Breaking change] Redefine the Virtual Partitions routing DSL to accept concurrency
|
9
|
+
- Allow for `concurrency` setting in Virtual Partitions to extend or limit number of jobs per regular partition. This allows to make sure, we do not use all the threads on virtual partitions jobs
|
10
|
+
- Allow for creation of as many Virtual Partitions as needed, without taking global `concurrency` into consideration
|
11
|
+
|
3
12
|
## 2.0.6 (2022-09-02)
|
4
13
|
- Improve client closing.
|
5
14
|
- Fix for: Multiple LRJ topics fetched concurrently block ability for LRJ to kick in (#1002)
|
data/Gemfile.lock
CHANGED
data/config/errors.yml
CHANGED
@@ -55,3 +55,5 @@ en:
|
|
55
55
|
|
56
56
|
pro_consumer_group_topic:
|
57
57
|
consumer_format: needs to inherit from Karafka::Pro::BaseConsumer and not Karafka::Consumer
|
58
|
+
virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
|
59
|
+
virtual_partitions.max_partitions_format: needs to be equl or more than 1
|
@@ -124,7 +124,7 @@ module Karafka
|
|
124
124
|
|
125
125
|
# Method that will perform business logic and on data received from Kafka (it will consume
|
126
126
|
# the data)
|
127
|
-
# @note This method needs
|
127
|
+
# @note This method needs to be implemented in a subclass. We stub it here as a failover if
|
128
128
|
# someone forgets about it or makes on with typo
|
129
129
|
def consume
|
130
130
|
raise NotImplementedError, 'Implement this in a subclass'
|
@@ -102,7 +102,7 @@ module Karafka
|
|
102
102
|
else
|
103
103
|
# If processing failed, we need to pause
|
104
104
|
# For long running job this will overwrite the default never-ending pause and will cause
|
105
|
-
# the processing
|
105
|
+
# the processing to keep going after the error backoff
|
106
106
|
pause(@seek_offset || first_message.offset)
|
107
107
|
end
|
108
108
|
end
|
@@ -22,11 +22,31 @@ module Karafka
|
|
22
22
|
).fetch('en').fetch('validations').fetch('pro_consumer_group_topic')
|
23
23
|
end
|
24
24
|
|
25
|
-
|
25
|
+
nested(:virtual_partitions) do
|
26
|
+
required(:active) { |val| [true, false].include?(val) }
|
27
|
+
required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
|
28
|
+
required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
|
29
|
+
end
|
30
|
+
|
31
|
+
virtual do |data, errors|
|
32
|
+
next unless errors.empty?
|
26
33
|
next if data[:consumer] < Karafka::Pro::BaseConsumer
|
27
34
|
|
28
35
|
[[%i[consumer], :consumer_format]]
|
29
36
|
end
|
37
|
+
|
38
|
+
# When virtual partitions are defined, partitioner needs to respond to `#call` and it
|
39
|
+
# cannot be nil
|
40
|
+
virtual do |data, errors|
|
41
|
+
next unless errors.empty?
|
42
|
+
|
43
|
+
virtual_partitions = data[:virtual_partitions]
|
44
|
+
|
45
|
+
next unless virtual_partitions[:active]
|
46
|
+
next if virtual_partitions[:partitioner].respond_to?(:call)
|
47
|
+
|
48
|
+
[[%i[virtual_partitions partitioner], :respond_to_call]]
|
49
|
+
end
|
30
50
|
end
|
31
51
|
end
|
32
52
|
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -67,7 +67,7 @@ module Karafka
|
|
67
67
|
|
68
68
|
# Loads routing extensions
|
69
69
|
def load_routing_extensions
|
70
|
-
::Karafka::Routing::Topic.
|
70
|
+
::Karafka::Routing::Topic.prepend(Routing::TopicExtensions)
|
71
71
|
::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
|
72
72
|
end
|
73
73
|
end
|
@@ -21,27 +21,25 @@ module Karafka
|
|
21
21
|
def call(topic, messages)
|
22
22
|
ktopic = @subscription_group.topics.find(topic)
|
23
23
|
|
24
|
-
@concurrency ||= ::Karafka::App.config.concurrency
|
25
|
-
|
26
24
|
# We only partition work if we have a virtual partitioner and more than one thread to
|
27
25
|
# process the data. With one thread it is not worth partitioning the work as the work
|
28
26
|
# itself will be assigned to one thread (pointless work)
|
29
|
-
if ktopic.
|
27
|
+
if ktopic.virtual_partitions? && ktopic.virtual_partitions.max_partitions > 1
|
30
28
|
# We need to reduce it to number of threads, so the group_id is not a direct effect
|
31
29
|
# of the end user action. Otherwise the persistence layer for consumers would cache
|
32
30
|
# it forever and it would cause memory leaks
|
33
31
|
groupings = messages
|
34
|
-
.group_by { |msg| ktopic.
|
32
|
+
.group_by { |msg| ktopic.virtual_partitions.partitioner.call(msg) }
|
35
33
|
.values
|
36
34
|
|
37
|
-
# Reduce the
|
35
|
+
# Reduce the number of virtual partitions to a size that matches the max_partitions
|
38
36
|
# As mentioned above we cannot use the partitioning keys directly as it could cause
|
39
37
|
# memory leaks
|
40
38
|
#
|
41
39
|
# The algorithm here is simple, we assume that the most costly in terms of processing,
|
42
40
|
# will be processing of the biggest group and we reduce the smallest once to have
|
43
|
-
# max of groups equal to
|
44
|
-
while groupings.size >
|
41
|
+
# max of groups equal to max_partitions
|
42
|
+
while groupings.size > ktopic.virtual_partitions.max_partitions
|
45
43
|
groupings.sort_by! { |grouping| -grouping.size }
|
46
44
|
|
47
45
|
# Offset order needs to be maintained for virtual partitions
|
@@ -15,23 +15,59 @@ module Karafka
|
|
15
15
|
module Routing
|
16
16
|
# Routing extensions that allow to configure some extra PRO routing options
|
17
17
|
module TopicExtensions
|
18
|
+
# Internal representation of the virtual partitions settings and configuration
|
19
|
+
# This allows us to abstract away things in a nice manner
|
20
|
+
#
|
21
|
+
# For features with more options than just on/off we use this approach as it simplifies
|
22
|
+
# the code. We do not use it for all not to create unneeded complexity
|
23
|
+
VirtualPartitions = Struct.new(
|
24
|
+
:active,
|
25
|
+
:partitioner,
|
26
|
+
:max_partitions,
|
27
|
+
keyword_init: true
|
28
|
+
) { alias_method :active?, :active }
|
29
|
+
|
18
30
|
class << self
|
19
31
|
# @param base [Class] class we extend
|
20
|
-
def
|
32
|
+
def prepended(base)
|
21
33
|
base.attr_accessor :long_running_job
|
22
|
-
base.attr_accessor :virtual_partitioner
|
23
34
|
end
|
24
35
|
end
|
25
36
|
|
26
|
-
# @
|
27
|
-
|
28
|
-
|
37
|
+
# @param max_partitions [Integer] max number of virtual partitions that can come out of the
|
38
|
+
# single distribution flow. When set to more than the Karafka threading, will create
|
39
|
+
# more work than workers. When less, can ensure we have spare resources to process other
|
40
|
+
# things in parallel.
|
41
|
+
# @param partitioner [nil, #call] nil or callable partitioner
|
42
|
+
# @return [VirtualPartitions] method that allows to set the virtual partitions details
|
43
|
+
# during the routing configuration and then allows to retrieve it
|
44
|
+
def virtual_partitions(
|
45
|
+
max_partitions: Karafka::App.config.concurrency,
|
46
|
+
partitioner: nil
|
47
|
+
)
|
48
|
+
@virtual_partitions ||= VirtualPartitions.new(
|
49
|
+
active: !partitioner.nil?,
|
50
|
+
max_partitions: max_partitions,
|
51
|
+
partitioner: partitioner
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
# @return [Boolean] are virtual partitions enabled for given topic
|
56
|
+
def virtual_partitions?
|
57
|
+
virtual_partitions.active?
|
29
58
|
end
|
30
59
|
|
31
60
|
# @return [Boolean] is a given job on a topic a long-running one
|
32
61
|
def long_running_job?
|
33
62
|
@long_running_job || false
|
34
63
|
end
|
64
|
+
|
65
|
+
# @return [Hash] hash with topic details and the extensions details
|
66
|
+
def to_h
|
67
|
+
super.merge(
|
68
|
+
virtual_partitions: virtual_partitions.to_h
|
69
|
+
)
|
70
|
+
end
|
35
71
|
end
|
36
72
|
end
|
37
73
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Karafka
|
4
4
|
module Processing
|
5
|
-
# Namespace for all the jobs that are
|
5
|
+
# Namespace for all the jobs that are supposed to run in workers.
|
6
6
|
module Jobs
|
7
7
|
# Base class for all the jobs types that are suppose to run in workers threads.
|
8
8
|
# Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
|
@@ -10,7 +10,7 @@ module Karafka
|
|
10
10
|
class Base
|
11
11
|
extend Forwardable
|
12
12
|
|
13
|
-
# @note Since one job has always one
|
13
|
+
# @note Since one job has always one executor, we use the jobs id and group id as reference
|
14
14
|
def_delegators :executor, :id, :group_id
|
15
15
|
|
16
16
|
attr_reader :executor
|
@@ -13,7 +13,7 @@ module Karafka
|
|
13
13
|
# @return [Karafka::Processing::JobsQueue]
|
14
14
|
def initialize
|
15
15
|
@queue = Queue.new
|
16
|
-
# Those queues will act as
|
16
|
+
# Those queues will act as semaphores internally. Since we need an indicator for waiting
|
17
17
|
# we could use Thread.pass but this is expensive. Instead we can just lock until any
|
18
18
|
# of the workers finishes their work and we can re-check. This means that in the worse
|
19
19
|
# scenario, we will context switch 10 times per poll instead of getting this thread
|
@@ -66,6 +66,16 @@ module Karafka
|
|
66
66
|
end
|
67
67
|
end
|
68
68
|
|
69
|
+
# Handles the simple routing case where we create one consumer group and allow for further
|
70
|
+
# subscription group customization
|
71
|
+
# @param subscription_group_name [String, Symbol] subscription group id
|
72
|
+
# @param block [Proc] further topics definitions
|
73
|
+
def subscription_group(subscription_group_name, &block)
|
74
|
+
consumer_group('app') do
|
75
|
+
target.public_send(:subscription_group=, subscription_group_name, &block)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
69
79
|
# In case we use simple style of routing, all topics will be assigned to the same consumer
|
70
80
|
# group that will be based on the client_id
|
71
81
|
#
|
@@ -10,6 +10,12 @@ module Karafka
|
|
10
10
|
class ConsumerGroup
|
11
11
|
attr_reader :id, :topics, :name
|
12
12
|
|
13
|
+
# This is a "virtual" attribute that is not building subscription groups.
|
14
|
+
# It allows us to store the "current" subscription group defined in the routing
|
15
|
+
# This subscription group id is then injected into topics, so we can compute the subscription
|
16
|
+
# groups
|
17
|
+
attr_accessor :current_subscription_group_name
|
18
|
+
|
13
19
|
# @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
|
14
20
|
# yet have an application client_id namespace, this will be added here by default.
|
15
21
|
# We add it to make a multi-system development easier for people that don't use
|
@@ -32,7 +38,27 @@ module Karafka
|
|
32
38
|
def topic=(name, &block)
|
33
39
|
topic = Topic.new(name, self)
|
34
40
|
@topics << Proxy.new(topic, &block).target
|
35
|
-
@topics.last
|
41
|
+
built_topic = @topics.last
|
42
|
+
# We overwrite it conditionally in case it was not set by the user inline in the topic
|
43
|
+
# block definition
|
44
|
+
built_topic.subscription_group ||= current_subscription_group_name
|
45
|
+
built_topic
|
46
|
+
end
|
47
|
+
|
48
|
+
# Assigns the current subscription group id based on the defined one and allows for further
|
49
|
+
# topic definition
|
50
|
+
# @param name [String, Symbol]
|
51
|
+
# @param block [Proc] block that may include topics definitions
|
52
|
+
def subscription_group=(name, &block)
|
53
|
+
# We cast it here, so the routing supports symbol based but that's anyhow later on
|
54
|
+
# validated as a string
|
55
|
+
self.current_subscription_group_name = name.to_s
|
56
|
+
|
57
|
+
Proxy.new(self, &block)
|
58
|
+
|
59
|
+
# We need to reset the current subscription group after it is used, so it won't leak
|
60
|
+
# outside to other topics that would be defined without a defined subscription group
|
61
|
+
self.current_subscription_group_name = nil
|
36
62
|
end
|
37
63
|
|
38
64
|
# @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on
|
@@ -7,15 +7,6 @@ module Karafka
|
|
7
7
|
class Proxy
|
8
8
|
attr_reader :target
|
9
9
|
|
10
|
-
# We should proxy only non ? and = methods as we want to have a regular dsl
|
11
|
-
IGNORED_POSTFIXES = %w[
|
12
|
-
?
|
13
|
-
=
|
14
|
-
!
|
15
|
-
].freeze
|
16
|
-
|
17
|
-
private_constant :IGNORED_POSTFIXES
|
18
|
-
|
19
10
|
# @param target [Object] target object to which we proxy any DSL call
|
20
11
|
# @param block [Proc] block that we want to evaluate in the proxy context
|
21
12
|
def initialize(target, &block)
|
@@ -25,21 +16,23 @@ module Karafka
|
|
25
16
|
|
26
17
|
# Translates the no "=" DSL of routing into elements assignments on target
|
27
18
|
# @param method_name [Symbol] name of the missing method
|
28
|
-
|
29
|
-
# @param block [Proc] block provided to the method
|
30
|
-
def method_missing(method_name, *arguments, &block)
|
19
|
+
def method_missing(method_name, ...)
|
31
20
|
return super unless respond_to_missing?(method_name)
|
32
21
|
|
33
|
-
@target.
|
22
|
+
if @target.respond_to?(:"#{method_name}=")
|
23
|
+
@target.public_send(:"#{method_name}=", ...)
|
24
|
+
else
|
25
|
+
@target.public_send(method_name, ...)
|
26
|
+
end
|
34
27
|
end
|
35
28
|
|
36
29
|
# Tells whether or not a given element exists on the target
|
37
30
|
# @param method_name [Symbol] name of the missing method
|
38
31
|
# @param include_private [Boolean] should we include private in the check as well
|
39
32
|
def respond_to_missing?(method_name, include_private = false)
|
40
|
-
|
41
|
-
|
42
|
-
|
33
|
+
@target.respond_to?(:"#{method_name}=", include_private) ||
|
34
|
+
@target.respond_to?(method_name, include_private) ||
|
35
|
+
super
|
43
36
|
end
|
44
37
|
end
|
45
38
|
end
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
|
36
36
|
MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-09-
|
38
|
+
date: 2022-09-19 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
metadata.gz.sig
CHANGED
Binary file
|