karafka 2.0.7 → 2.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +5 -1
- data/Gemfile.lock +1 -1
- data/config/errors.yml +1 -1
- data/lib/karafka/base_consumer.rb +1 -1
- data/lib/karafka/pro/base_consumer.rb +1 -1
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +1 -1
- data/lib/karafka/pro/processing/partitioner.rb +4 -4
- data/lib/karafka/pro/routing/topic_extensions.rb +4 -4
- data/lib/karafka/processing/jobs/base.rb +2 -2
- data/lib/karafka/processing/jobs_queue.rb +1 -1
- data/lib/karafka/routing/builder.rb +10 -0
- data/lib/karafka/routing/consumer_group.rb +27 -1
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a646d63d674e6ecb80625097426f48294fe8e8b3f3e9e020cf92645a5728251
|
4
|
+
data.tar.gz: 461c58c355f84b81c3396cb4164a426c3f1e87747b3101de1e7c6fbf2a876778
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 623832478b11b1fa61c7906f423417500838e30ef97256a59290d73dcfe17670cf7eb11adcf33d422ae590c6ce5f19215340c89d369f8ec5b4e609af07d0befb
|
7
|
+
data.tar.gz: 97e50ed131c939d09de884d3ddba826381e7d314d3bc4660ecf947fa6582d9e6f16e023dad121612eadffcd441a73874c96e287e5ec1885e008d843357e94b93
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
-
## 2.0.
|
3
|
+
## 2.0.8 (2022-09-19)
|
4
|
+
- [Breaking change] Rename Virtual Partitions `concurrency` to `max_partitions` to avoid confusion (#1023).
|
5
|
+
- Allow for block based subscription groups management (#1030).
|
6
|
+
|
7
|
+
## 2.0.7 (2022-09-05)
|
4
8
|
- [Breaking change] Redefine the Virtual Partitions routing DSL to accept concurrency
|
5
9
|
- Allow for `concurrency` setting in Virtual Partitions to extend or limit number of jobs per regular partition. This allows to make sure, we do not use all the threads on virtual partitions jobs
|
6
10
|
- Allow for creation of as many Virtual Partitions as needed, without taking global `concurrency` into consideration
|
data/Gemfile.lock
CHANGED
data/config/errors.yml
CHANGED
@@ -56,4 +56,4 @@ en:
|
|
56
56
|
pro_consumer_group_topic:
|
57
57
|
consumer_format: needs to inherit from Karafka::Pro::BaseConsumer and not Karafka::Consumer
|
58
58
|
virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
|
59
|
-
virtual_partitions.
|
59
|
+
virtual_partitions.max_partitions_format: needs to be equl or more than 1
|
@@ -124,7 +124,7 @@ module Karafka
|
|
124
124
|
|
125
125
|
# Method that will perform business logic and on data received from Kafka (it will consume
|
126
126
|
# the data)
|
127
|
-
# @note This method needs
|
127
|
+
# @note This method needs to be implemented in a subclass. We stub it here as a failover if
|
128
128
|
# someone forgets about it or makes on with typo
|
129
129
|
def consume
|
130
130
|
raise NotImplementedError, 'Implement this in a subclass'
|
@@ -102,7 +102,7 @@ module Karafka
|
|
102
102
|
else
|
103
103
|
# If processing failed, we need to pause
|
104
104
|
# For long running job this will overwrite the default never-ending pause and will cause
|
105
|
-
# the processing
|
105
|
+
# the processing to keep going after the error backoff
|
106
106
|
pause(@seek_offset || first_message.offset)
|
107
107
|
end
|
108
108
|
end
|
@@ -25,7 +25,7 @@ module Karafka
|
|
25
25
|
nested(:virtual_partitions) do
|
26
26
|
required(:active) { |val| [true, false].include?(val) }
|
27
27
|
required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
|
28
|
-
required(:
|
28
|
+
required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
|
29
29
|
end
|
30
30
|
|
31
31
|
virtual do |data, errors|
|
@@ -24,7 +24,7 @@ module Karafka
|
|
24
24
|
# We only partition work if we have a virtual partitioner and more than one thread to
|
25
25
|
# process the data. With one thread it is not worth partitioning the work as the work
|
26
26
|
# itself will be assigned to one thread (pointless work)
|
27
|
-
if ktopic.virtual_partitions? && ktopic.virtual_partitions.
|
27
|
+
if ktopic.virtual_partitions? && ktopic.virtual_partitions.max_partitions > 1
|
28
28
|
# We need to reduce it to number of threads, so the group_id is not a direct effect
|
29
29
|
# of the end user action. Otherwise the persistence layer for consumers would cache
|
30
30
|
# it forever and it would cause memory leaks
|
@@ -32,14 +32,14 @@ module Karafka
|
|
32
32
|
.group_by { |msg| ktopic.virtual_partitions.partitioner.call(msg) }
|
33
33
|
.values
|
34
34
|
|
35
|
-
# Reduce the
|
35
|
+
# Reduce the number of virtual partitions to a size that matches the max_partitions
|
36
36
|
# As mentioned above we cannot use the partitioning keys directly as it could cause
|
37
37
|
# memory leaks
|
38
38
|
#
|
39
39
|
# The algorithm here is simple, we assume that the most costly in terms of processing,
|
40
40
|
# will be processing of the biggest group and we reduce the smallest once to have
|
41
|
-
# max of groups equal to
|
42
|
-
while groupings.size > ktopic.virtual_partitions.
|
41
|
+
# max of groups equal to max_partitions
|
42
|
+
while groupings.size > ktopic.virtual_partitions.max_partitions
|
43
43
|
groupings.sort_by! { |grouping| -grouping.size }
|
44
44
|
|
45
45
|
# Offset order needs to be maintained for virtual partitions
|
@@ -23,7 +23,7 @@ module Karafka
|
|
23
23
|
VirtualPartitions = Struct.new(
|
24
24
|
:active,
|
25
25
|
:partitioner,
|
26
|
-
:
|
26
|
+
:max_partitions,
|
27
27
|
keyword_init: true
|
28
28
|
) { alias_method :active?, :active }
|
29
29
|
|
@@ -34,7 +34,7 @@ module Karafka
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
# @param
|
37
|
+
# @param max_partitions [Integer] max number of virtual partitions that can come out of the
|
38
38
|
# single distribution flow. When set to more than the Karafka threading, will create
|
39
39
|
# more work than workers. When less, can ensure we have spare resources to process other
|
40
40
|
# things in parallel.
|
@@ -42,12 +42,12 @@ module Karafka
|
|
42
42
|
# @return [VirtualPartitions] method that allows to set the virtual partitions details
|
43
43
|
# during the routing configuration and then allows to retrieve it
|
44
44
|
def virtual_partitions(
|
45
|
-
|
45
|
+
max_partitions: Karafka::App.config.concurrency,
|
46
46
|
partitioner: nil
|
47
47
|
)
|
48
48
|
@virtual_partitions ||= VirtualPartitions.new(
|
49
49
|
active: !partitioner.nil?,
|
50
|
-
|
50
|
+
max_partitions: max_partitions,
|
51
51
|
partitioner: partitioner
|
52
52
|
)
|
53
53
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Karafka
|
4
4
|
module Processing
|
5
|
-
# Namespace for all the jobs that are
|
5
|
+
# Namespace for all the jobs that are supposed to run in workers.
|
6
6
|
module Jobs
|
7
7
|
# Base class for all the jobs types that are suppose to run in workers threads.
|
8
8
|
# Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
|
@@ -10,7 +10,7 @@ module Karafka
|
|
10
10
|
class Base
|
11
11
|
extend Forwardable
|
12
12
|
|
13
|
-
# @note Since one job has always one
|
13
|
+
# @note Since one job has always one executor, we use the jobs id and group id as reference
|
14
14
|
def_delegators :executor, :id, :group_id
|
15
15
|
|
16
16
|
attr_reader :executor
|
@@ -13,7 +13,7 @@ module Karafka
|
|
13
13
|
# @return [Karafka::Processing::JobsQueue]
|
14
14
|
def initialize
|
15
15
|
@queue = Queue.new
|
16
|
-
# Those queues will act as
|
16
|
+
# Those queues will act as semaphores internally. Since we need an indicator for waiting
|
17
17
|
# we could use Thread.pass but this is expensive. Instead we can just lock until any
|
18
18
|
# of the workers finishes their work and we can re-check. This means that in the worse
|
19
19
|
# scenario, we will context switch 10 times per poll instead of getting this thread
|
@@ -66,6 +66,16 @@ module Karafka
|
|
66
66
|
end
|
67
67
|
end
|
68
68
|
|
69
|
+
# Handles the simple routing case where we create one consumer group and allow for further
|
70
|
+
# subscription group customization
|
71
|
+
# @param subscription_group_name [String, Symbol] subscription group id
|
72
|
+
# @param block [Proc] further topics definitions
|
73
|
+
def subscription_group(subscription_group_name, &block)
|
74
|
+
consumer_group('app') do
|
75
|
+
target.public_send(:subscription_group=, subscription_group_name, &block)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
69
79
|
# In case we use simple style of routing, all topics will be assigned to the same consumer
|
70
80
|
# group that will be based on the client_id
|
71
81
|
#
|
@@ -10,6 +10,12 @@ module Karafka
|
|
10
10
|
class ConsumerGroup
|
11
11
|
attr_reader :id, :topics, :name
|
12
12
|
|
13
|
+
# This is a "virtual" attribute that is not building subscription groups.
|
14
|
+
# It allows us to store the "current" subscription group defined in the routing
|
15
|
+
# This subscription group id is then injected into topics, so we can compute the subscription
|
16
|
+
# groups
|
17
|
+
attr_accessor :current_subscription_group_name
|
18
|
+
|
13
19
|
# @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
|
14
20
|
# yet have an application client_id namespace, this will be added here by default.
|
15
21
|
# We add it to make a multi-system development easier for people that don't use
|
@@ -32,7 +38,27 @@ module Karafka
|
|
32
38
|
def topic=(name, &block)
|
33
39
|
topic = Topic.new(name, self)
|
34
40
|
@topics << Proxy.new(topic, &block).target
|
35
|
-
@topics.last
|
41
|
+
built_topic = @topics.last
|
42
|
+
# We overwrite it conditionally in case it was not set by the user inline in the topic
|
43
|
+
# block definition
|
44
|
+
built_topic.subscription_group ||= current_subscription_group_name
|
45
|
+
built_topic
|
46
|
+
end
|
47
|
+
|
48
|
+
# Assigns the current subscription group id based on the defined one and allows for further
|
49
|
+
# topic definition
|
50
|
+
# @param name [String, Symbol]
|
51
|
+
# @param block [Proc] block that may include topics definitions
|
52
|
+
def subscription_group=(name, &block)
|
53
|
+
# We cast it here, so the routing supports symbol based but that's anyhow later on
|
54
|
+
# validated as a string
|
55
|
+
self.current_subscription_group_name = name.to_s
|
56
|
+
|
57
|
+
Proxy.new(self, &block)
|
58
|
+
|
59
|
+
# We need to reset the current subscription group after it is used, so it won't leak
|
60
|
+
# outside to other topics that would be defined without a defined subscription group
|
61
|
+
self.current_subscription_group_name = nil
|
36
62
|
end
|
37
63
|
|
38
64
|
# @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
|
36
36
|
MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-09-
|
38
|
+
date: 2022-09-19 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
metadata.gz.sig
CHANGED
@@ -1,2 +1,3 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
<�=�g>�g��M`��Fѐzv���VU.o����
|
2
|
+
�!�J����6���g�o�j]�f�{�����xOm�)e�������������Q��]���l"��Yj�R-�s��6FPb���W씕M]�kd�A��#��j�^����!�4������áe��߷���fMo!��3�B�{{�&�B�R�E�Fj}�m9���
|
3
|
+
8ԏ��u�G�a�O�P'��~�@�ƌI�2��S}ňi� F��W:�=Ѐ�t_��y���P�� ���k��(+3n����t����%��,�|�ZF�Z�q����m�h��])M�T��#g=���H�_X�O�#Bf'>��2���+
|