karafka 2.0.7 → 2.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +5 -1
- data/Gemfile.lock +1 -1
- data/config/errors.yml +1 -1
- data/lib/karafka/base_consumer.rb +1 -1
- data/lib/karafka/pro/base_consumer.rb +1 -1
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +1 -1
- data/lib/karafka/pro/processing/partitioner.rb +4 -4
- data/lib/karafka/pro/routing/topic_extensions.rb +4 -4
- data/lib/karafka/processing/jobs/base.rb +2 -2
- data/lib/karafka/processing/jobs_queue.rb +1 -1
- data/lib/karafka/routing/builder.rb +10 -0
- data/lib/karafka/routing/consumer_group.rb +27 -1
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a646d63d674e6ecb80625097426f48294fe8e8b3f3e9e020cf92645a5728251
|
4
|
+
data.tar.gz: 461c58c355f84b81c3396cb4164a426c3f1e87747b3101de1e7c6fbf2a876778
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 623832478b11b1fa61c7906f423417500838e30ef97256a59290d73dcfe17670cf7eb11adcf33d422ae590c6ce5f19215340c89d369f8ec5b4e609af07d0befb
|
7
|
+
data.tar.gz: 97e50ed131c939d09de884d3ddba826381e7d314d3bc4660ecf947fa6582d9e6f16e023dad121612eadffcd441a73874c96e287e5ec1885e008d843357e94b93
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
-
## 2.0.
|
3
|
+
## 2.0.8 (2022-09-19)
|
4
|
+
- [Breaking change] Rename Virtual Partitions `concurrency` to `max_partitions` to avoid confusion (#1023).
|
5
|
+
- Allow for block based subscription groups management (#1030).
|
6
|
+
|
7
|
+
## 2.0.7 (2022-09-05)
|
4
8
|
- [Breaking change] Redefine the Virtual Partitions routing DSL to accept concurrency
|
5
9
|
- Allow for `concurrency` setting in Virtual Partitions to extend or limit number of jobs per regular partition. This allows to make sure, we do not use all the threads on virtual partitions jobs
|
6
10
|
- Allow for creation of as many Virtual Partitions as needed, without taking global `concurrency` into consideration
|
data/Gemfile.lock
CHANGED
data/config/errors.yml
CHANGED
@@ -56,4 +56,4 @@ en:
|
|
56
56
|
pro_consumer_group_topic:
|
57
57
|
consumer_format: needs to inherit from Karafka::Pro::BaseConsumer and not Karafka::Consumer
|
58
58
|
virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
|
59
|
-
virtual_partitions.
|
59
|
+
virtual_partitions.max_partitions_format: needs to be equl or more than 1
|
@@ -124,7 +124,7 @@ module Karafka
|
|
124
124
|
|
125
125
|
# Method that will perform business logic and on data received from Kafka (it will consume
|
126
126
|
# the data)
|
127
|
-
# @note This method needs
|
127
|
+
# @note This method needs to be implemented in a subclass. We stub it here as a failover if
|
128
128
|
# someone forgets about it or makes on with typo
|
129
129
|
def consume
|
130
130
|
raise NotImplementedError, 'Implement this in a subclass'
|
@@ -102,7 +102,7 @@ module Karafka
|
|
102
102
|
else
|
103
103
|
# If processing failed, we need to pause
|
104
104
|
# For long running job this will overwrite the default never-ending pause and will cause
|
105
|
-
# the processing
|
105
|
+
# the processing to keep going after the error backoff
|
106
106
|
pause(@seek_offset || first_message.offset)
|
107
107
|
end
|
108
108
|
end
|
@@ -25,7 +25,7 @@ module Karafka
|
|
25
25
|
nested(:virtual_partitions) do
|
26
26
|
required(:active) { |val| [true, false].include?(val) }
|
27
27
|
required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
|
28
|
-
required(:
|
28
|
+
required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
|
29
29
|
end
|
30
30
|
|
31
31
|
virtual do |data, errors|
|
@@ -24,7 +24,7 @@ module Karafka
|
|
24
24
|
# We only partition work if we have a virtual partitioner and more than one thread to
|
25
25
|
# process the data. With one thread it is not worth partitioning the work as the work
|
26
26
|
# itself will be assigned to one thread (pointless work)
|
27
|
-
if ktopic.virtual_partitions? && ktopic.virtual_partitions.
|
27
|
+
if ktopic.virtual_partitions? && ktopic.virtual_partitions.max_partitions > 1
|
28
28
|
# We need to reduce it to number of threads, so the group_id is not a direct effect
|
29
29
|
# of the end user action. Otherwise the persistence layer for consumers would cache
|
30
30
|
# it forever and it would cause memory leaks
|
@@ -32,14 +32,14 @@ module Karafka
|
|
32
32
|
.group_by { |msg| ktopic.virtual_partitions.partitioner.call(msg) }
|
33
33
|
.values
|
34
34
|
|
35
|
-
# Reduce the
|
35
|
+
# Reduce the number of virtual partitions to a size that matches the max_partitions
|
36
36
|
# As mentioned above we cannot use the partitioning keys directly as it could cause
|
37
37
|
# memory leaks
|
38
38
|
#
|
39
39
|
# The algorithm here is simple, we assume that the most costly in terms of processing,
|
40
40
|
# will be processing of the biggest group and we reduce the smallest once to have
|
41
|
-
# max of groups equal to
|
42
|
-
while groupings.size > ktopic.virtual_partitions.
|
41
|
+
# max of groups equal to max_partitions
|
42
|
+
while groupings.size > ktopic.virtual_partitions.max_partitions
|
43
43
|
groupings.sort_by! { |grouping| -grouping.size }
|
44
44
|
|
45
45
|
# Offset order needs to be maintained for virtual partitions
|
@@ -23,7 +23,7 @@ module Karafka
|
|
23
23
|
VirtualPartitions = Struct.new(
|
24
24
|
:active,
|
25
25
|
:partitioner,
|
26
|
-
:
|
26
|
+
:max_partitions,
|
27
27
|
keyword_init: true
|
28
28
|
) { alias_method :active?, :active }
|
29
29
|
|
@@ -34,7 +34,7 @@ module Karafka
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
# @param
|
37
|
+
# @param max_partitions [Integer] max number of virtual partitions that can come out of the
|
38
38
|
# single distribution flow. When set to more than the Karafka threading, will create
|
39
39
|
# more work than workers. When less, can ensure we have spare resources to process other
|
40
40
|
# things in parallel.
|
@@ -42,12 +42,12 @@ module Karafka
|
|
42
42
|
# @return [VirtualPartitions] method that allows to set the virtual partitions details
|
43
43
|
# during the routing configuration and then allows to retrieve it
|
44
44
|
def virtual_partitions(
|
45
|
-
|
45
|
+
max_partitions: Karafka::App.config.concurrency,
|
46
46
|
partitioner: nil
|
47
47
|
)
|
48
48
|
@virtual_partitions ||= VirtualPartitions.new(
|
49
49
|
active: !partitioner.nil?,
|
50
|
-
|
50
|
+
max_partitions: max_partitions,
|
51
51
|
partitioner: partitioner
|
52
52
|
)
|
53
53
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Karafka
|
4
4
|
module Processing
|
5
|
-
# Namespace for all the jobs that are
|
5
|
+
# Namespace for all the jobs that are supposed to run in workers.
|
6
6
|
module Jobs
|
7
7
|
# Base class for all the jobs types that are suppose to run in workers threads.
|
8
8
|
# Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
|
@@ -10,7 +10,7 @@ module Karafka
|
|
10
10
|
class Base
|
11
11
|
extend Forwardable
|
12
12
|
|
13
|
-
# @note Since one job has always one
|
13
|
+
# @note Since one job has always one executor, we use the jobs id and group id as reference
|
14
14
|
def_delegators :executor, :id, :group_id
|
15
15
|
|
16
16
|
attr_reader :executor
|
@@ -13,7 +13,7 @@ module Karafka
|
|
13
13
|
# @return [Karafka::Processing::JobsQueue]
|
14
14
|
def initialize
|
15
15
|
@queue = Queue.new
|
16
|
-
# Those queues will act as
|
16
|
+
# Those queues will act as semaphores internally. Since we need an indicator for waiting
|
17
17
|
# we could use Thread.pass but this is expensive. Instead we can just lock until any
|
18
18
|
# of the workers finishes their work and we can re-check. This means that in the worse
|
19
19
|
# scenario, we will context switch 10 times per poll instead of getting this thread
|
@@ -66,6 +66,16 @@ module Karafka
|
|
66
66
|
end
|
67
67
|
end
|
68
68
|
|
69
|
+
# Handles the simple routing case where we create one consumer group and allow for further
|
70
|
+
# subscription group customization
|
71
|
+
# @param subscription_group_name [String, Symbol] subscription group id
|
72
|
+
# @param block [Proc] further topics definitions
|
73
|
+
def subscription_group(subscription_group_name, &block)
|
74
|
+
consumer_group('app') do
|
75
|
+
target.public_send(:subscription_group=, subscription_group_name, &block)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
69
79
|
# In case we use simple style of routing, all topics will be assigned to the same consumer
|
70
80
|
# group that will be based on the client_id
|
71
81
|
#
|
@@ -10,6 +10,12 @@ module Karafka
|
|
10
10
|
class ConsumerGroup
|
11
11
|
attr_reader :id, :topics, :name
|
12
12
|
|
13
|
+
# This is a "virtual" attribute that is not building subscription groups.
|
14
|
+
# It allows us to store the "current" subscription group defined in the routing
|
15
|
+
# This subscription group id is then injected into topics, so we can compute the subscription
|
16
|
+
# groups
|
17
|
+
attr_accessor :current_subscription_group_name
|
18
|
+
|
13
19
|
# @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
|
14
20
|
# yet have an application client_id namespace, this will be added here by default.
|
15
21
|
# We add it to make a multi-system development easier for people that don't use
|
@@ -32,7 +38,27 @@ module Karafka
|
|
32
38
|
def topic=(name, &block)
|
33
39
|
topic = Topic.new(name, self)
|
34
40
|
@topics << Proxy.new(topic, &block).target
|
35
|
-
@topics.last
|
41
|
+
built_topic = @topics.last
|
42
|
+
# We overwrite it conditionally in case it was not set by the user inline in the topic
|
43
|
+
# block definition
|
44
|
+
built_topic.subscription_group ||= current_subscription_group_name
|
45
|
+
built_topic
|
46
|
+
end
|
47
|
+
|
48
|
+
# Assigns the current subscription group id based on the defined one and allows for further
|
49
|
+
# topic definition
|
50
|
+
# @param name [String, Symbol]
|
51
|
+
# @param block [Proc] block that may include topics definitions
|
52
|
+
def subscription_group=(name, &block)
|
53
|
+
# We cast it here, so the routing supports symbol based but that's anyhow later on
|
54
|
+
# validated as a string
|
55
|
+
self.current_subscription_group_name = name.to_s
|
56
|
+
|
57
|
+
Proxy.new(self, &block)
|
58
|
+
|
59
|
+
# We need to reset the current subscription group after it is used, so it won't leak
|
60
|
+
# outside to other topics that would be defined without a defined subscription group
|
61
|
+
self.current_subscription_group_name = nil
|
36
62
|
end
|
37
63
|
|
38
64
|
# @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
|
36
36
|
MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-09-
|
38
|
+
date: 2022-09-19 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
metadata.gz.sig
CHANGED
@@ -1,2 +1,3 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
<�=�g>�g��M`��Fѐzv���VU.o����
|
2
|
+
�!�J����6���g�o�j]�f�{�����xOm�)e�������������Q��]���l"��Yj�R-�s��6FPb���W씕M]�kd�A��#��j�^����!�4������áe��߷���fMo!��3�B�{{�&�B�R�E�Fj}�m9���
|
3
|
+
8ԏ��u�G�a�O�P'��~�@�ƌI�2��S}ňi� F��W:�=Ѐ�t_��y���P�� ���k��(+3n����t����%��,�|�ZF�Z�q����m�h��])M�T��#g=���H�_X�O�#Bf'>��2���+
|