karafka 2.0.0.alpha3 → 2.0.0.alpha6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +19 -0
- data/Gemfile.lock +10 -10
- data/bin/integrations +56 -44
- data/bin/stress +1 -1
- data/docker-compose.yml +3 -1
- data/karafka.gemspec +1 -1
- data/lib/active_job/karafka.rb +16 -15
- data/lib/karafka/base_consumer.rb +8 -8
- data/lib/karafka/connection/client.rb +35 -5
- data/lib/karafka/connection/messages_buffer.rb +40 -0
- data/lib/karafka/connection/rebalance_manager.rb +35 -20
- data/lib/karafka/contracts/consumer_group_topic.rb +1 -0
- data/lib/karafka/instrumentation/{stdout_listener.rb → logger_listener.rb} +1 -1
- data/lib/karafka/instrumentation/monitor.rb +1 -1
- data/lib/karafka/railtie.rb +12 -0
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/routing/subscription_groups_builder.rb +1 -0
- data/lib/karafka/routing/topic.rb +1 -0
- data/lib/karafka/setup/config.rb +4 -1
- data/lib/karafka/templates/karafka.rb.erb +1 -1
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +6 -6
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 862df94b26c24809f82e07f71c39433b90ef08f68f053a004a87aa466b872dac
|
4
|
+
data.tar.gz: 2c533cbd6c271fe282f59c2030d9cc885555242bb8bc9316d0264a5ccfd694a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8ce3720e535d65f121bcbfd957286cbc41404f2aff2751622183367862db55f8231c286d5bbede4a18649eafcd085952a7fc8e7569a74dbd3633c9db906114e5
|
7
|
+
data.tar.gz: e77819ccd2be263b02958fcee71ce9228c93c7dee62fabbea61711fbbdbffa997bb9a85a7f668cf8bb596b7a98a773ace560ad5075c46e70f20cd302b354b0fe
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.0.0-alpha6 (2022-04-17)
|
4
|
+
- Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
|
5
|
+
- Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
|
6
|
+
- Replace `consumer.consume` with `consumer.consumed` event to match the behaviour
|
7
|
+
- Make sure, that offset committing happens before the `consumer.consumed` event is propagated
|
8
|
+
- Fix for failing when not installed (just a dependency) (#817)
|
9
|
+
- Evict messages from partitions that were lost upon rebalancing (#825)
|
10
|
+
- Do **not** run `#revoked` on partitions that were lost and assigned back upon rebalancing (#825)
|
11
|
+
- Remove potential duplicated that could occur upon rebalance with re-assigned partitions (#825)
|
12
|
+
- Optimize integration test suite additional consumers shutdown process (#828)
|
13
|
+
- Optimize messages eviction and duplicates removal on poll stopped due to lack of messages
|
14
|
+
- Add static group membership integration spec
|
15
|
+
|
16
|
+
## 2.0.0-alpha5 (2022-04-03)
|
17
|
+
- Rename StdoutListener to LoggerListener (#811)
|
18
|
+
|
19
|
+
## 2.0.0-alpha4 (2022-03-20)
|
20
|
+
- Rails support without ActiveJob queue adapter usage (#805)
|
21
|
+
|
3
22
|
## 2.0.0-alpha3 (2022-03-16)
|
4
23
|
- Restore 'app.initialized' state and add notification on it
|
5
24
|
- Fix the installation flow for Rails and add integration tests for this scenario
|
data/Gemfile.lock
CHANGED
@@ -1,28 +1,28 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.0.0.
|
4
|
+
karafka (2.0.0.alpha6)
|
5
5
|
dry-configurable (~> 0.13)
|
6
6
|
dry-monitor (~> 0.5)
|
7
7
|
dry-validation (~> 1.7)
|
8
8
|
rdkafka (>= 0.10)
|
9
9
|
thor (>= 0.20)
|
10
|
-
waterdrop (>= 2.
|
10
|
+
waterdrop (>= 2.3.0, < 3.0.0)
|
11
11
|
zeitwerk (~> 2.3)
|
12
12
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
-
activejob (7.0.2.
|
17
|
-
activesupport (= 7.0.2.
|
16
|
+
activejob (7.0.2.3)
|
17
|
+
activesupport (= 7.0.2.3)
|
18
18
|
globalid (>= 0.3.6)
|
19
|
-
activesupport (7.0.2.
|
19
|
+
activesupport (7.0.2.3)
|
20
20
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
21
21
|
i18n (>= 1.6, < 2)
|
22
22
|
minitest (>= 5.1)
|
23
23
|
tzinfo (~> 2.0)
|
24
24
|
byebug (11.1.3)
|
25
|
-
concurrent-ruby (1.1.
|
25
|
+
concurrent-ruby (1.1.10)
|
26
26
|
diff-lcs (1.5.0)
|
27
27
|
docile (1.4.0)
|
28
28
|
dry-configurable (0.14.0)
|
@@ -64,7 +64,7 @@ GEM
|
|
64
64
|
dry-core (~> 0.5, >= 0.5)
|
65
65
|
dry-initializer (~> 3.0)
|
66
66
|
dry-schema (~> 1.9, >= 1.9.1)
|
67
|
-
factory_bot (6.2.
|
67
|
+
factory_bot (6.2.1)
|
68
68
|
activesupport (>= 5.0.0)
|
69
69
|
ffi (1.15.5)
|
70
70
|
globalid (1.0.0)
|
@@ -87,7 +87,7 @@ GEM
|
|
87
87
|
rspec-expectations (3.11.0)
|
88
88
|
diff-lcs (>= 1.2.0, < 2.0)
|
89
89
|
rspec-support (~> 3.11.0)
|
90
|
-
rspec-mocks (3.11.
|
90
|
+
rspec-mocks (3.11.1)
|
91
91
|
diff-lcs (>= 1.2.0, < 2.0)
|
92
92
|
rspec-support (~> 3.11.0)
|
93
93
|
rspec-support (3.11.0)
|
@@ -100,7 +100,7 @@ GEM
|
|
100
100
|
thor (1.2.1)
|
101
101
|
tzinfo (2.0.4)
|
102
102
|
concurrent-ruby (~> 1.0)
|
103
|
-
waterdrop (2.
|
103
|
+
waterdrop (2.3.0)
|
104
104
|
concurrent-ruby (>= 1.1)
|
105
105
|
dry-configurable (~> 0.13)
|
106
106
|
dry-monitor (~> 0.5)
|
@@ -121,4 +121,4 @@ DEPENDENCIES
|
|
121
121
|
simplecov
|
122
122
|
|
123
123
|
BUNDLED WITH
|
124
|
-
2.3.
|
124
|
+
2.3.10
|
data/bin/integrations
CHANGED
@@ -25,7 +25,7 @@ CONCURRENCY = 4
|
|
25
25
|
class Scenario
|
26
26
|
# How long a scenario can run before we kill it
|
27
27
|
# This is a fail-safe just in case something would hang
|
28
|
-
MAX_RUN_TIME = 60 *
|
28
|
+
MAX_RUN_TIME = 60 * 2
|
29
29
|
|
30
30
|
# There are rare cases where Karafka may force shutdown for some of the integration cases
|
31
31
|
# This includes exactly those
|
@@ -44,17 +44,30 @@ class Scenario
|
|
44
44
|
# @param path [String] path to the scenarios file
|
45
45
|
def initialize(path)
|
46
46
|
@path = path
|
47
|
-
@stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
|
48
|
-
@started_at = current_time
|
49
47
|
# Last 1024 characters from stdout
|
50
48
|
@stdout_tail = ''
|
51
49
|
end
|
52
50
|
|
51
|
+
# Starts running given scenario in a separate process
|
52
|
+
def start
|
53
|
+
@stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
|
54
|
+
@started_at = current_time
|
55
|
+
end
|
56
|
+
|
53
57
|
# @return [String] integration spec name
|
54
58
|
def name
|
55
59
|
@path.gsub("#{ROOT_PATH}/spec/integrations/", '')
|
56
60
|
end
|
57
61
|
|
62
|
+
# @return [Boolean] true if spec is pristine
|
63
|
+
def pristine?
|
64
|
+
scenario_dir = File.dirname(@path)
|
65
|
+
|
66
|
+
# If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
|
67
|
+
# to run bundle install, etc in order to run it
|
68
|
+
File.exist?(File.join(scenario_dir, 'Gemfile'))
|
69
|
+
end
|
70
|
+
|
58
71
|
# @return [Boolean] did this scenario finished or is it still running
|
59
72
|
def finished?
|
60
73
|
# If the thread is running too long, kill it
|
@@ -73,6 +86,13 @@ class Scenario
|
|
73
86
|
!@wait_thr.alive?
|
74
87
|
end
|
75
88
|
|
89
|
+
# @return [Boolean] did this scenario finish successfully or not
|
90
|
+
def success?
|
91
|
+
expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
|
92
|
+
|
93
|
+
expected_exit_codes.include?(exit_code)
|
94
|
+
end
|
95
|
+
|
76
96
|
# @return [Integer] pid of the process of this scenario
|
77
97
|
def pid
|
78
98
|
@wait_thr.pid
|
@@ -84,13 +104,6 @@ class Scenario
|
|
84
104
|
@wait_thr.value&.exitstatus || 123
|
85
105
|
end
|
86
106
|
|
87
|
-
# @return [Boolean] did this scenario finish successfully or not
|
88
|
-
def success?
|
89
|
-
expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
|
90
|
-
|
91
|
-
expected_exit_codes.include?(exit_code)
|
92
|
-
end
|
93
|
-
|
94
107
|
# Prints a status report when scenario is finished and stdout if it failed
|
95
108
|
def report
|
96
109
|
result = success? ? "\e[#{32}m#{'OK'}\e[0m" : "\e[#{31}m#{'FAILED'}\e[0m"
|
@@ -109,11 +122,10 @@ class Scenario
|
|
109
122
|
# Sets up a proper environment for a given spec to run and returns the run command
|
110
123
|
# @return [String] run command
|
111
124
|
def init_and_build_cmd
|
112
|
-
scenario_dir = File.dirname(@path)
|
113
|
-
|
114
125
|
# If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
|
115
126
|
# to run bundle install, etc in order to run it
|
116
|
-
if
|
127
|
+
if pristine?
|
128
|
+
scenario_dir = File.dirname(@path)
|
117
129
|
# We copy the spec into a temp dir, not to pollute the spec location with logs, etc
|
118
130
|
temp_dir = Dir.mktmpdir
|
119
131
|
file_name = File.basename(@path)
|
@@ -141,31 +153,6 @@ class Scenario
|
|
141
153
|
end
|
142
154
|
end
|
143
155
|
|
144
|
-
# Simple array to keep track of active integration processes thread running with info on which
|
145
|
-
# test scenario is running
|
146
|
-
active_scenarios = []
|
147
|
-
|
148
|
-
# Finished runners
|
149
|
-
finished_scenarios = []
|
150
|
-
|
151
|
-
# Waits for any of the processes to be finished and tracks exit codes
|
152
|
-
#
|
153
|
-
# @param active_scenarios [Array] active runners
|
154
|
-
# @param finished_scenarios [Hash] finished forks exit codes
|
155
|
-
def wait_and_track(active_scenarios, finished_scenarios)
|
156
|
-
exited = active_scenarios.find(&:finished?)
|
157
|
-
|
158
|
-
if exited
|
159
|
-
scenario = active_scenarios.delete(exited)
|
160
|
-
|
161
|
-
scenario.report
|
162
|
-
|
163
|
-
finished_scenarios << scenario
|
164
|
-
else
|
165
|
-
Thread.pass
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
156
|
# Load all the specs
|
170
157
|
specs = Dir[ROOT_PATH.join('spec/integrations/**/*.rb')]
|
171
158
|
|
@@ -182,15 +169,40 @@ seed = (ENV['SEED'] || rand(0..10_000)).to_i
|
|
182
169
|
|
183
170
|
puts "Random seed: #{seed}"
|
184
171
|
|
185
|
-
|
186
|
-
|
172
|
+
scenarios = specs
|
173
|
+
.shuffle(random: Random.new(seed))
|
174
|
+
.map { |integration_test| Scenario.new(integration_test) }
|
187
175
|
|
188
|
-
|
176
|
+
regulars = scenarios.reject(&:pristine?)
|
177
|
+
pristine = scenarios.select(&:pristine?)
|
189
178
|
|
190
|
-
|
191
|
-
|
179
|
+
active_scenarios = []
|
180
|
+
finished_scenarios = []
|
181
|
+
|
182
|
+
while finished_scenarios.size < scenarios.size
|
183
|
+
# If we have space to run another scenario, we add it
|
184
|
+
if active_scenarios.size < CONCURRENCY
|
185
|
+
scenario = nil
|
186
|
+
# We can run only one pristine at the same time due to concurrency issues within bundler
|
187
|
+
# Since they usually take longer than others, we try to run them as fast as possible when there
|
188
|
+
# is a slot
|
189
|
+
scenario = pristine.pop unless active_scenarios.any?(&:pristine?)
|
190
|
+
scenario ||= regulars.pop
|
191
|
+
|
192
|
+
if scenario
|
193
|
+
scenario.start
|
194
|
+
active_scenarios << scenario
|
195
|
+
end
|
196
|
+
end
|
192
197
|
|
193
|
-
|
198
|
+
active_scenarios.select(&:finished?).each do |exited|
|
199
|
+
scenario = active_scenarios.delete(exited)
|
200
|
+
scenario.report
|
201
|
+
finished_scenarios << scenario
|
202
|
+
end
|
203
|
+
|
204
|
+
sleep(0.1)
|
205
|
+
end
|
194
206
|
|
195
207
|
# Fail all if any of the tests does not have expected exit code
|
196
208
|
raise IntegrationTestError unless finished_scenarios.all?(&:success?)
|
data/bin/stress
CHANGED
data/docker-compose.yml
CHANGED
@@ -14,7 +14,9 @@ services:
|
|
14
14
|
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
15
15
|
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
|
16
16
|
KAFKA_CREATE_TOPICS:
|
17
|
-
"
|
17
|
+
"integrations_0_02:2:1,\
|
18
|
+
integrations_1_02:2:1,\
|
19
|
+
integrations_0_03:3:1,\
|
18
20
|
integrations_1_03:3:1,\
|
19
21
|
integrations_2_03:3:1,\
|
20
22
|
integrations_0_10:10:1,\
|
data/karafka.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_dependency 'dry-validation', '~> 1.7'
|
22
22
|
spec.add_dependency 'rdkafka', '>= 0.10'
|
23
23
|
spec.add_dependency 'thor', '>= 0.20'
|
24
|
-
spec.add_dependency 'waterdrop', '>= 2.
|
24
|
+
spec.add_dependency 'waterdrop', '>= 2.3.0', '< 3.0.0'
|
25
25
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
26
26
|
|
27
27
|
spec.required_ruby_version = '>= 2.6.0'
|
data/lib/active_job/karafka.rb
CHANGED
@@ -1,20 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
require 'active_job
|
5
|
-
|
3
|
+
begin
|
4
|
+
require 'active_job'
|
5
|
+
require_relative 'queue_adapters/karafka_adapter'
|
6
6
|
|
7
|
-
module ActiveJob
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
module ActiveJob
|
8
|
+
# Namespace for usage simplification outside of Rails where Railtie will not kick in.
|
9
|
+
# That way a require 'active_job/karafka' should be enough to use it
|
10
|
+
module Karafka
|
11
|
+
end
|
11
12
|
end
|
12
|
-
end
|
13
|
-
|
14
|
-
# We extend routing builder by adding a simple wrapper for easier jobs topics defining
|
15
|
-
# This needs to be extended here as it is going to be used in karafka routes, hence doing that in
|
16
|
-
# the railtie initializer would be too late
|
17
|
-
::Karafka::Routing::Builder.include ::Karafka::ActiveJob::RoutingExtensions
|
18
|
-
::Karafka::Routing::Proxy.include ::Karafka::ActiveJob::RoutingExtensions
|
19
13
|
|
20
|
-
# We extend
|
14
|
+
# We extend routing builder by adding a simple wrapper for easier jobs topics defining
|
15
|
+
# This needs to be extended here as it is going to be used in karafka routes, hence doing that in
|
16
|
+
# the railtie initializer would be too late
|
17
|
+
::Karafka::Routing::Builder.include ::Karafka::ActiveJob::RoutingExtensions
|
18
|
+
::Karafka::Routing::Proxy.include ::Karafka::ActiveJob::RoutingExtensions
|
19
|
+
rescue LoadError
|
20
|
+
# We extend ActiveJob stuff in the railtie
|
21
|
+
end
|
@@ -21,18 +21,18 @@ module Karafka
|
|
21
21
|
# that may not yet kick in when error occurs. That way we pause always on the last processed
|
22
22
|
# message.
|
23
23
|
def on_consume
|
24
|
-
Karafka.monitor.instrument('consumer.
|
24
|
+
Karafka.monitor.instrument('consumer.consumed', caller: self) do
|
25
25
|
consume
|
26
|
-
end
|
27
26
|
|
28
|
-
|
27
|
+
pause.reset
|
29
28
|
|
30
|
-
|
31
|
-
|
29
|
+
# Mark as consumed only if manual offset management is not on
|
30
|
+
return if topic.manual_offset_management
|
32
31
|
|
33
|
-
|
34
|
-
|
35
|
-
|
32
|
+
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
33
|
+
# with manual offset management
|
34
|
+
mark_as_consumed(messages.last)
|
35
|
+
end
|
36
36
|
rescue StandardError => e
|
37
37
|
Karafka.monitor.instrument(
|
38
38
|
'error.occurred',
|
@@ -48,6 +48,7 @@ module Karafka
|
|
48
48
|
time_poll.start
|
49
49
|
|
50
50
|
@buffer.clear
|
51
|
+
@rebalance_manager.clear
|
51
52
|
|
52
53
|
loop do
|
53
54
|
# Don't fetch more messages if we do not have any time left
|
@@ -58,13 +59,23 @@ module Karafka
|
|
58
59
|
# Fetch message within our time boundaries
|
59
60
|
message = poll(time_poll.remaining)
|
60
61
|
|
61
|
-
#
|
62
|
-
|
63
|
-
|
64
|
-
@buffer << message
|
62
|
+
# Put a message to the buffer if there is one
|
63
|
+
@buffer << message if message
|
65
64
|
|
66
65
|
# Track time spent on all of the processing and polling
|
67
66
|
time_poll.checkpoint
|
67
|
+
|
68
|
+
# Upon polling rebalance manager might have been updated.
|
69
|
+
# If partition revocation happens, we need to remove messages from revoked partitions
|
70
|
+
# as well as ensure we do not have duplicated due to the offset reset for partitions
|
71
|
+
# that we got assigned
|
72
|
+
remove_revoked_and_duplicated_messages if @rebalance_manager.revoked_partitions?
|
73
|
+
|
74
|
+
# Finally once we've (potentially) removed revoked, etc, if no messages were returned
|
75
|
+
# we can break.
|
76
|
+
# Worth keeping in mind, that the rebalance manager might have been updated despite no
|
77
|
+
# messages being returned during a poll
|
78
|
+
break unless message
|
68
79
|
end
|
69
80
|
|
70
81
|
@buffer
|
@@ -84,6 +95,9 @@ module Karafka
|
|
84
95
|
# Ignoring a case where there would not be an offset (for example when rebalance occurs).
|
85
96
|
#
|
86
97
|
# @param async [Boolean] should the commit happen async or sync (async by default)
|
98
|
+
# @return [Boolean] did committing was successful. It may be not, when we no longer own
|
99
|
+
# given partition.
|
100
|
+
#
|
87
101
|
# @note This will commit all the offsets for the whole consumer. In order to achieve
|
88
102
|
# granular control over where the offset should be for particular topic partitions, the
|
89
103
|
# store_offset should be used to only store new offset when we want to to be flushed
|
@@ -212,6 +226,8 @@ module Karafka
|
|
212
226
|
::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
|
213
227
|
|
214
228
|
@kafka.close
|
229
|
+
@buffer.clear
|
230
|
+
@rebalance_manager.clear
|
215
231
|
end
|
216
232
|
end
|
217
233
|
|
@@ -232,7 +248,7 @@ module Karafka
|
|
232
248
|
# Performs a single poll operation.
|
233
249
|
#
|
234
250
|
# @param timeout [Integer] timeout for a single poll
|
235
|
-
# @return [
|
251
|
+
# @return [Rdkafka::Consumer::Message, nil] fetched message or nil if nothing polled
|
236
252
|
def poll(timeout)
|
237
253
|
time_poll ||= TimeTrackers::Poll.new(timeout)
|
238
254
|
|
@@ -301,6 +317,20 @@ module Karafka
|
|
301
317
|
|
302
318
|
consumer
|
303
319
|
end
|
320
|
+
|
321
|
+
# We may have a case where in the middle of data polling, we've lost a partition.
|
322
|
+
# In a case like this we should remove all the pre-buffered messages from list partitions as
|
323
|
+
# we are no longer responsible in a given process for processing those messages and they
|
324
|
+
# should have been picked up by a different process.
|
325
|
+
def remove_revoked_and_duplicated_messages
|
326
|
+
@rebalance_manager.revoked_partitions.each do |topic, partitions|
|
327
|
+
partitions.each do |partition|
|
328
|
+
@buffer.delete(topic, partition)
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
@buffer.uniq!
|
333
|
+
end
|
304
334
|
end
|
305
335
|
end
|
306
336
|
end
|
@@ -42,6 +42,37 @@ module Karafka
|
|
42
42
|
@groups[message.topic][message.partition] << message
|
43
43
|
end
|
44
44
|
|
45
|
+
# Removes given topic and partition data out of the buffer
|
46
|
+
# This is used when there's a partition revocation
|
47
|
+
# @param topic [String] topic we're interested in
|
48
|
+
# @param partition [Integer] partition of which data we want to remove
|
49
|
+
def delete(topic, partition)
|
50
|
+
return unless @groups.key?(topic)
|
51
|
+
return unless @groups.fetch(topic).key?(partition)
|
52
|
+
|
53
|
+
topic_data = @groups.fetch(topic)
|
54
|
+
topic_data.delete(partition)
|
55
|
+
|
56
|
+
recount!
|
57
|
+
|
58
|
+
# If there are no more partitions to handle in a given topic, remove it completely
|
59
|
+
@groups.delete(topic) if topic_data.empty?
|
60
|
+
end
|
61
|
+
|
62
|
+
# Removes duplicated messages from the same partitions
|
63
|
+
# This should be used only when rebalance occurs, as we may get data again we already have
|
64
|
+
# due to the processing from the last offset. In cases like this, we may get same data
|
65
|
+
# again and we do want to ensure as few duplications as possible
|
66
|
+
def uniq!
|
67
|
+
@groups.each_value do |partitions|
|
68
|
+
partitions.each_value do |messages|
|
69
|
+
messages.uniq!(&:offset)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
recount!
|
74
|
+
end
|
75
|
+
|
45
76
|
# Removes all the data from the buffer.
|
46
77
|
#
|
47
78
|
# @note We do not clear the whole groups hash but rather we clear the partition hashes, so
|
@@ -52,6 +83,15 @@ module Karafka
|
|
52
83
|
@size = 0
|
53
84
|
@groups.each_value(&:clear)
|
54
85
|
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
# Updates the messages count if we performed any operations that could change the state
|
90
|
+
def recount!
|
91
|
+
@size = @groups.each_value.sum do |partitions|
|
92
|
+
partitions.each_value.map(&:count).sum
|
93
|
+
end
|
94
|
+
end
|
55
95
|
end
|
56
96
|
end
|
57
97
|
end
|
@@ -9,35 +9,50 @@ module Karafka
|
|
9
9
|
#
|
10
10
|
# @note Since this does not happen really often, we try to stick with same objects for the
|
11
11
|
# empty states most of the time, so we don't create many objects during the manager life
|
12
|
+
#
|
13
|
+
# @note Internally in the rebalance manager we have a notion of lost partitions. Partitions
|
14
|
+
# that are lost, are those that got revoked but did not get re-assigned back. We do not
|
15
|
+
# expose this concept outside and we normalize to have them revoked, as it is irrelevant
|
16
|
+
# from the rest of the code perspective as only those that are lost are truly revoked.
|
12
17
|
class RebalanceManager
|
18
|
+
# Empty array for internal usage not to create new objects
|
19
|
+
EMPTY_ARRAY = [].freeze
|
20
|
+
|
21
|
+
private_constant :EMPTY_ARRAY
|
22
|
+
|
13
23
|
# @return [RebalanceManager]
|
14
24
|
def initialize
|
15
|
-
@
|
16
|
-
@
|
25
|
+
@assigned_partitions = {}
|
26
|
+
@revoked_partitions = {}
|
27
|
+
@lost_partitions = {}
|
17
28
|
end
|
18
29
|
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
result = @assigned.dup
|
27
|
-
@assigned.clear
|
28
|
-
result
|
30
|
+
# Resets the rebalance manager state
|
31
|
+
# This needs to be done before each polling loop as during the polling, the state may be
|
32
|
+
# changed
|
33
|
+
def clear
|
34
|
+
@assigned_partitions.clear
|
35
|
+
@revoked_partitions.clear
|
36
|
+
@lost_partitions.clear
|
29
37
|
end
|
30
38
|
|
31
39
|
# @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
|
32
40
|
# which we've lost partitions and array with ids of the partitions as the value
|
33
|
-
# @note
|
34
|
-
# for new revoked partitions are set only during a state change
|
41
|
+
# @note We do not consider as lost topics and partitions that got revoked and assigned
|
35
42
|
def revoked_partitions
|
36
|
-
return @
|
43
|
+
return @revoked_partitions if @revoked_partitions.empty?
|
44
|
+
return @lost_partitions unless @lost_partitions.empty?
|
45
|
+
|
46
|
+
@revoked_partitions.each do |topic, partitions|
|
47
|
+
@lost_partitions[topic] = partitions - @assigned_partitions.fetch(topic, EMPTY_ARRAY)
|
48
|
+
end
|
49
|
+
|
50
|
+
@lost_partitions
|
51
|
+
end
|
37
52
|
|
38
|
-
|
39
|
-
|
40
|
-
|
53
|
+
# @return [Boolean] true if any partitions were revoked
|
54
|
+
def revoked_partitions?
|
55
|
+
!revoked_partitions.empty?
|
41
56
|
end
|
42
57
|
|
43
58
|
# Callback that kicks in inside of rdkafka, when new partitions are assigned.
|
@@ -46,7 +61,7 @@ module Karafka
|
|
46
61
|
# @param _ [Rdkafka::Consumer]
|
47
62
|
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
48
63
|
def on_partitions_assigned(_, partitions)
|
49
|
-
@
|
64
|
+
@assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
50
65
|
end
|
51
66
|
|
52
67
|
# Callback that kicks in inside of rdkafka, when partitions are revoked.
|
@@ -55,7 +70,7 @@ module Karafka
|
|
55
70
|
# @param _ [Rdkafka::Consumer]
|
56
71
|
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
57
72
|
def on_partitions_revoked(_, partitions)
|
58
|
-
@
|
73
|
+
@revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
59
74
|
end
|
60
75
|
end
|
61
76
|
end
|
@@ -10,6 +10,7 @@ module Karafka
|
|
10
10
|
required(:id).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
|
11
11
|
required(:kafka).filled
|
12
12
|
required(:max_messages) { int? & gteq?(1) }
|
13
|
+
required(:initial_offset).filled(included_in?: %w[earliest latest])
|
13
14
|
required(:max_wait_time).filled { int? & gteq?(10) }
|
14
15
|
required(:manual_offset_management).filled(:bool?)
|
15
16
|
required(:name).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
|
@@ -4,7 +4,7 @@ module Karafka
|
|
4
4
|
module Instrumentation
|
5
5
|
# Default listener that hooks up to our instrumentation and uses its events for logging
|
6
6
|
# It can be removed/replaced or anything without any harm to the Karafka app flow.
|
7
|
-
class
|
7
|
+
class LoggerListener
|
8
8
|
# Log levels that we use in this particular listener
|
9
9
|
USED_LOG_LEVELS = %i[
|
10
10
|
debug
|
data/lib/karafka/railtie.rb
CHANGED
@@ -82,8 +82,20 @@ if rails
|
|
82
82
|
initializer 'karafka.require_karafka_boot_file' do |app|
|
83
83
|
rails6plus = Rails.gem_version >= Gem::Version.new('6.0.0')
|
84
84
|
|
85
|
+
# If the boot file location is set to "false", we should not raise an exception and we
|
86
|
+
# should just not load karafka stuff. Setting this explicitly to false indicates, that
|
87
|
+
# karafka is part of the supply chain but it is not a first class citizen of a given
|
88
|
+
# system (may be just a dependency of a dependency), thus railtie should not kick in to
|
89
|
+
# load the non-existing boot file
|
90
|
+
next if Karafka.boot_file.to_s == 'false'
|
91
|
+
|
85
92
|
karafka_boot_file = Rails.root.join(Karafka.boot_file.to_s).to_s
|
86
93
|
|
94
|
+
# Provide more comprehensive error for when no boot file
|
95
|
+
unless File.exist?(karafka_boot_file)
|
96
|
+
raise(Karafka::Errors::MissingBootFileError, karafka_boot_file)
|
97
|
+
end
|
98
|
+
|
87
99
|
if rails6plus
|
88
100
|
app.reloader.to_prepare do
|
89
101
|
# Load Karafka boot file, so it can be used in Rails server context
|
@@ -41,7 +41,7 @@ module Karafka
|
|
41
41
|
|
42
42
|
kafka[:'client.id'] ||= Karafka::App.config.client_id
|
43
43
|
kafka[:'group.id'] ||= @topics.first.consumer_group.id
|
44
|
-
kafka[:'auto.offset.reset'] ||=
|
44
|
+
kafka[:'auto.offset.reset'] ||= @topics.first.initial_offset
|
45
45
|
# Karafka manages the offsets based on the processing state, thus we do not rely on the
|
46
46
|
# rdkafka offset auto-storing
|
47
47
|
kafka[:'enable.auto.offset.store'] = 'false'
|
data/lib/karafka/setup/config.rb
CHANGED
@@ -54,10 +54,13 @@ module Karafka
|
|
54
54
|
setting :consumer_persistence, default: true
|
55
55
|
# Default deserializer for converting incoming data into ruby objects
|
56
56
|
setting :deserializer, default: Karafka::Serialization::Json::Deserializer.new
|
57
|
+
# option [String] should we start with the earliest possible offset or latest
|
58
|
+
# This will set the `auto.offset.reset` value unless present in the kafka scope
|
59
|
+
setting :initial_offset, default: 'earliest'
|
57
60
|
# option [Boolean] should we leave offset management to the user
|
58
61
|
setting :manual_offset_management, default: false
|
59
62
|
# options max_messages [Integer] how many messages do we want to fetch from Kafka in one go
|
60
|
-
setting :max_messages, default:
|
63
|
+
setting :max_messages, default: 1_000
|
61
64
|
# option [Integer] number of milliseconds we can wait while fetching data
|
62
65
|
setting :max_wait_time, default: 10_000
|
63
66
|
# option shutdown_timeout [Integer] the number of milliseconds after which Karafka no
|
@@ -40,7 +40,7 @@ class KarafkaApp < Karafka::App
|
|
40
40
|
# interested in logging events for certain environments. Since instrumentation
|
41
41
|
# notifications add extra boilerplate, if you want to achieve max performance,
|
42
42
|
# listen to only what you really need for given environment.
|
43
|
-
Karafka.monitor.subscribe(Karafka::Instrumentation::
|
43
|
+
Karafka.monitor.subscribe(Karafka::Instrumentation::LoggerListener.new)
|
44
44
|
# Karafka.monitor.subscribe(Karafka::Instrumentation::ProctitleListener.new)
|
45
45
|
|
46
46
|
routes.draw do
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.0.
|
4
|
+
version: 2.0.0.alpha6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -34,7 +34,7 @@ cert_chain:
|
|
34
34
|
R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
|
35
35
|
pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
|
36
36
|
-----END CERTIFICATE-----
|
37
|
-
date: 2022-
|
37
|
+
date: 2022-04-17 00:00:00.000000000 Z
|
38
38
|
dependencies:
|
39
39
|
- !ruby/object:Gem::Dependency
|
40
40
|
name: dry-configurable
|
@@ -112,7 +112,7 @@ dependencies:
|
|
112
112
|
requirements:
|
113
113
|
- - ">="
|
114
114
|
- !ruby/object:Gem::Version
|
115
|
-
version: 2.
|
115
|
+
version: 2.3.0
|
116
116
|
- - "<"
|
117
117
|
- !ruby/object:Gem::Version
|
118
118
|
version: 3.0.0
|
@@ -122,7 +122,7 @@ dependencies:
|
|
122
122
|
requirements:
|
123
123
|
- - ">="
|
124
124
|
- !ruby/object:Gem::Version
|
125
|
-
version: 2.
|
125
|
+
version: 2.3.0
|
126
126
|
- - "<"
|
127
127
|
- !ruby/object:Gem::Version
|
128
128
|
version: 3.0.0
|
@@ -211,9 +211,9 @@ files:
|
|
211
211
|
- lib/karafka/instrumentation/callbacks/error.rb
|
212
212
|
- lib/karafka/instrumentation/callbacks/statistics.rb
|
213
213
|
- lib/karafka/instrumentation/logger.rb
|
214
|
+
- lib/karafka/instrumentation/logger_listener.rb
|
214
215
|
- lib/karafka/instrumentation/monitor.rb
|
215
216
|
- lib/karafka/instrumentation/proctitle_listener.rb
|
216
|
-
- lib/karafka/instrumentation/stdout_listener.rb
|
217
217
|
- lib/karafka/licenser.rb
|
218
218
|
- lib/karafka/messages/batch_metadata.rb
|
219
219
|
- lib/karafka/messages/builders/batch_metadata.rb
|
@@ -282,7 +282,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
282
282
|
- !ruby/object:Gem::Version
|
283
283
|
version: 1.3.1
|
284
284
|
requirements: []
|
285
|
-
rubygems_version: 3.3.
|
285
|
+
rubygems_version: 3.3.3
|
286
286
|
signing_key:
|
287
287
|
specification_version: 4
|
288
288
|
summary: Ruby based framework for working with Apache Kafka
|
metadata.gz.sig
CHANGED
Binary file
|