karafka 2.0.0.alpha4 → 2.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +26 -3
- data/Gemfile.lock +11 -11
- data/bin/integrations +55 -43
- data/docker-compose.yml +4 -1
- data/karafka.gemspec +1 -1
- data/lib/karafka/base_consumer.rb +65 -12
- data/lib/karafka/connection/client.rb +35 -5
- data/lib/karafka/connection/listener.rb +11 -7
- data/lib/karafka/connection/messages_buffer.rb +44 -13
- data/lib/karafka/connection/pauses_manager.rb +2 -2
- data/lib/karafka/connection/rebalance_manager.rb +35 -20
- data/lib/karafka/contracts/config.rb +1 -0
- data/lib/karafka/instrumentation/{stdout_listener.rb → logger_listener.rb} +1 -1
- data/lib/karafka/instrumentation/monitor.rb +2 -1
- data/lib/karafka/pro/active_job/dispatcher.rb +9 -9
- data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
- data/lib/karafka/pro/loader.rb +13 -8
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/processing/executor.rb +15 -10
- data/lib/karafka/processing/jobs/base.rb +16 -0
- data/lib/karafka/processing/jobs/consume.rb +7 -2
- data/lib/karafka/processing/jobs_queue.rb +18 -9
- data/lib/karafka/processing/worker.rb +23 -0
- data/lib/karafka/railtie.rb +12 -0
- data/lib/karafka/scheduler.rb +21 -0
- data/lib/karafka/setup/config.rb +3 -1
- data/lib/karafka/templates/karafka.rb.erb +1 -1
- data/lib/karafka/time_trackers/pause.rb +10 -2
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +8 -6
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f108cb4288d0ed0510381f51c77d49e052b947f6180c9b9c0b06e0ac2b599894
|
4
|
+
data.tar.gz: 3d79066d0107c08f450ca9f4c3b5c4a39aae497836c80bf8380c65f1406b82c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4aae257010c992c59ce4b01ead54ff2cfd4e8ccd8cbe6b52214b3cedf8f879690e0d577f2b41f44b1ab6888d7e27bbc92f3ba4a69e8b127687fb4c43bff51fbc
|
7
|
+
data.tar.gz: f65e425cb84152d20a055bdb9a94fd98280597cdf5e431337cb8604040534cacbfdd03efd6dc23b86c9ecf25721c860bd55ca75ad3f98e4c66136a88c1efc4e7
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.1.
|
1
|
+
3.1.2
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,29 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
-
## 2.0.0-
|
3
|
+
## 2.0.0-beta1 (2022-05-22)
|
4
|
+
- Update the jobs queue blocking engine and allow for non-blocking jobs execution
|
5
|
+
- Provide `#prepared` hook that always runs before the fetching loop is unblocked
|
6
|
+
- [Pro] Introduce performance tracker for scheduling optimizer
|
7
|
+
- Provide ability to pause (`#pause`) and resume (`#resume`) given partitions from the consumers
|
8
|
+
- Small integration specs refactoring + specs for pausing scenarios
|
9
|
+
|
10
|
+
## 2.0.0-alpha6 (2022-04-17)
|
11
|
+
- Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
|
12
|
+
- Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
|
13
|
+
- Replace `consumer.consume` with `consumer.consumed` event to match the behaviour
|
14
|
+
- Make sure, that offset committing happens before the `consumer.consumed` event is propagated
|
15
|
+
- Fix for failing when not installed (just a dependency) (#817)
|
16
|
+
- Evict messages from partitions that were lost upon rebalancing (#825)
|
17
|
+
- Do **not** run `#revoked` on partitions that were lost and assigned back upon rebalancing (#825)
|
18
|
+
- Remove potential duplicated that could occur upon rebalance with re-assigned partitions (#825)
|
19
|
+
- Optimize integration test suite additional consumers shutdown process (#828)
|
20
|
+
- Optimize messages eviction and duplicates removal on poll stopped due to lack of messages
|
21
|
+
- Add static group membership integration spec
|
22
|
+
|
23
|
+
## 2.0.0-alpha5 (2022-04-03)
|
24
|
+
- Rename StdoutListener to LoggerListener (#811)
|
25
|
+
|
26
|
+
## 2.0.0-alpha4 (2022-03-20)
|
4
27
|
- Rails support without ActiveJob queue adapter usage (#805)
|
5
28
|
|
6
29
|
## 2.0.0-alpha3 (2022-03-16)
|
@@ -10,12 +33,12 @@
|
|
10
33
|
|
11
34
|
## 2.0.0-alpha2 (2022-02-19)
|
12
35
|
- Require `kafka` keys to be symbols
|
13
|
-
- Added ActiveJob Pro adapter
|
36
|
+
- [Pro] Added ActiveJob Pro adapter
|
14
37
|
- Small updates to the license and docs
|
15
38
|
|
16
39
|
## 2.0.0-alpha1 (2022-01-30)
|
17
40
|
- Change license to `LGPL-3.0`
|
18
|
-
- Introduce a Pro subscription
|
41
|
+
- [Pro] Introduce a Pro subscription
|
19
42
|
- Switch from `ruby-kafka` to `librdkafka` as an underlying driver
|
20
43
|
- Introduce fully automatic integration tests that go through the whole server lifecycle
|
21
44
|
- Integrate WaterDrop tightly with autoconfiguration inheritance and an option to redefine it
|
data/Gemfile.lock
CHANGED
@@ -1,31 +1,31 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.0.0.
|
4
|
+
karafka (2.0.0.beta1)
|
5
5
|
dry-configurable (~> 0.13)
|
6
6
|
dry-monitor (~> 0.5)
|
7
7
|
dry-validation (~> 1.7)
|
8
8
|
rdkafka (>= 0.10)
|
9
9
|
thor (>= 0.20)
|
10
|
-
waterdrop (>= 2.
|
10
|
+
waterdrop (>= 2.3.0, < 3.0.0)
|
11
11
|
zeitwerk (~> 2.3)
|
12
12
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
-
activejob (7.0.
|
17
|
-
activesupport (= 7.0.
|
16
|
+
activejob (7.0.3)
|
17
|
+
activesupport (= 7.0.3)
|
18
18
|
globalid (>= 0.3.6)
|
19
|
-
activesupport (7.0.
|
19
|
+
activesupport (7.0.3)
|
20
20
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
21
21
|
i18n (>= 1.6, < 2)
|
22
22
|
minitest (>= 5.1)
|
23
23
|
tzinfo (~> 2.0)
|
24
24
|
byebug (11.1.3)
|
25
|
-
concurrent-ruby (1.1.
|
25
|
+
concurrent-ruby (1.1.10)
|
26
26
|
diff-lcs (1.5.0)
|
27
27
|
docile (1.4.0)
|
28
|
-
dry-configurable (0.
|
28
|
+
dry-configurable (0.15.0)
|
29
29
|
concurrent-ruby (~> 1.0)
|
30
30
|
dry-core (~> 0.6)
|
31
31
|
dry-container (0.9.0)
|
@@ -64,7 +64,7 @@ GEM
|
|
64
64
|
dry-core (~> 0.5, >= 0.5)
|
65
65
|
dry-initializer (~> 3.0)
|
66
66
|
dry-schema (~> 1.9, >= 1.9.1)
|
67
|
-
factory_bot (6.2.
|
67
|
+
factory_bot (6.2.1)
|
68
68
|
activesupport (>= 5.0.0)
|
69
69
|
ffi (1.15.5)
|
70
70
|
globalid (1.0.0)
|
@@ -87,7 +87,7 @@ GEM
|
|
87
87
|
rspec-expectations (3.11.0)
|
88
88
|
diff-lcs (>= 1.2.0, < 2.0)
|
89
89
|
rspec-support (~> 3.11.0)
|
90
|
-
rspec-mocks (3.11.
|
90
|
+
rspec-mocks (3.11.1)
|
91
91
|
diff-lcs (>= 1.2.0, < 2.0)
|
92
92
|
rspec-support (~> 3.11.0)
|
93
93
|
rspec-support (3.11.0)
|
@@ -100,7 +100,7 @@ GEM
|
|
100
100
|
thor (1.2.1)
|
101
101
|
tzinfo (2.0.4)
|
102
102
|
concurrent-ruby (~> 1.0)
|
103
|
-
waterdrop (2.
|
103
|
+
waterdrop (2.3.0)
|
104
104
|
concurrent-ruby (>= 1.1)
|
105
105
|
dry-configurable (~> 0.13)
|
106
106
|
dry-monitor (~> 0.5)
|
@@ -121,4 +121,4 @@ DEPENDENCIES
|
|
121
121
|
simplecov
|
122
122
|
|
123
123
|
BUNDLED WITH
|
124
|
-
2.3.
|
124
|
+
2.3.11
|
data/bin/integrations
CHANGED
@@ -44,17 +44,30 @@ class Scenario
|
|
44
44
|
# @param path [String] path to the scenarios file
|
45
45
|
def initialize(path)
|
46
46
|
@path = path
|
47
|
-
@stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
|
48
|
-
@started_at = current_time
|
49
47
|
# Last 1024 characters from stdout
|
50
48
|
@stdout_tail = ''
|
51
49
|
end
|
52
50
|
|
51
|
+
# Starts running given scenario in a separate process
|
52
|
+
def start
|
53
|
+
@stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
|
54
|
+
@started_at = current_time
|
55
|
+
end
|
56
|
+
|
53
57
|
# @return [String] integration spec name
|
54
58
|
def name
|
55
59
|
@path.gsub("#{ROOT_PATH}/spec/integrations/", '')
|
56
60
|
end
|
57
61
|
|
62
|
+
# @return [Boolean] true if spec is pristine
|
63
|
+
def pristine?
|
64
|
+
scenario_dir = File.dirname(@path)
|
65
|
+
|
66
|
+
# If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
|
67
|
+
# to run bundle install, etc in order to run it
|
68
|
+
File.exist?(File.join(scenario_dir, 'Gemfile'))
|
69
|
+
end
|
70
|
+
|
58
71
|
# @return [Boolean] did this scenario finished or is it still running
|
59
72
|
def finished?
|
60
73
|
# If the thread is running too long, kill it
|
@@ -73,6 +86,13 @@ class Scenario
|
|
73
86
|
!@wait_thr.alive?
|
74
87
|
end
|
75
88
|
|
89
|
+
# @return [Boolean] did this scenario finish successfully or not
|
90
|
+
def success?
|
91
|
+
expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
|
92
|
+
|
93
|
+
expected_exit_codes.include?(exit_code)
|
94
|
+
end
|
95
|
+
|
76
96
|
# @return [Integer] pid of the process of this scenario
|
77
97
|
def pid
|
78
98
|
@wait_thr.pid
|
@@ -84,13 +104,6 @@ class Scenario
|
|
84
104
|
@wait_thr.value&.exitstatus || 123
|
85
105
|
end
|
86
106
|
|
87
|
-
# @return [Boolean] did this scenario finish successfully or not
|
88
|
-
def success?
|
89
|
-
expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
|
90
|
-
|
91
|
-
expected_exit_codes.include?(exit_code)
|
92
|
-
end
|
93
|
-
|
94
107
|
# Prints a status report when scenario is finished and stdout if it failed
|
95
108
|
def report
|
96
109
|
result = success? ? "\e[#{32}m#{'OK'}\e[0m" : "\e[#{31}m#{'FAILED'}\e[0m"
|
@@ -109,11 +122,10 @@ class Scenario
|
|
109
122
|
# Sets up a proper environment for a given spec to run and returns the run command
|
110
123
|
# @return [String] run command
|
111
124
|
def init_and_build_cmd
|
112
|
-
scenario_dir = File.dirname(@path)
|
113
|
-
|
114
125
|
# If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
|
115
126
|
# to run bundle install, etc in order to run it
|
116
|
-
if
|
127
|
+
if pristine?
|
128
|
+
scenario_dir = File.dirname(@path)
|
117
129
|
# We copy the spec into a temp dir, not to pollute the spec location with logs, etc
|
118
130
|
temp_dir = Dir.mktmpdir
|
119
131
|
file_name = File.basename(@path)
|
@@ -141,31 +153,6 @@ class Scenario
|
|
141
153
|
end
|
142
154
|
end
|
143
155
|
|
144
|
-
# Simple array to keep track of active integration processes thread running with info on which
|
145
|
-
# test scenario is running
|
146
|
-
active_scenarios = []
|
147
|
-
|
148
|
-
# Finished runners
|
149
|
-
finished_scenarios = []
|
150
|
-
|
151
|
-
# Waits for any of the processes to be finished and tracks exit codes
|
152
|
-
#
|
153
|
-
# @param active_scenarios [Array] active runners
|
154
|
-
# @param finished_scenarios [Hash] finished forks exit codes
|
155
|
-
def wait_and_track(active_scenarios, finished_scenarios)
|
156
|
-
exited = active_scenarios.find(&:finished?)
|
157
|
-
|
158
|
-
if exited
|
159
|
-
scenario = active_scenarios.delete(exited)
|
160
|
-
|
161
|
-
scenario.report
|
162
|
-
|
163
|
-
finished_scenarios << scenario
|
164
|
-
else
|
165
|
-
Thread.pass
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
156
|
# Load all the specs
|
170
157
|
specs = Dir[ROOT_PATH.join('spec/integrations/**/*.rb')]
|
171
158
|
|
@@ -182,15 +169,40 @@ seed = (ENV['SEED'] || rand(0..10_000)).to_i
|
|
182
169
|
|
183
170
|
puts "Random seed: #{seed}"
|
184
171
|
|
185
|
-
|
186
|
-
|
172
|
+
scenarios = specs
|
173
|
+
.shuffle(random: Random.new(seed))
|
174
|
+
.map { |integration_test| Scenario.new(integration_test) }
|
187
175
|
|
188
|
-
|
176
|
+
regulars = scenarios.reject(&:pristine?)
|
177
|
+
pristine = scenarios.select(&:pristine?)
|
189
178
|
|
190
|
-
|
191
|
-
|
179
|
+
active_scenarios = []
|
180
|
+
finished_scenarios = []
|
181
|
+
|
182
|
+
while finished_scenarios.size < scenarios.size
|
183
|
+
# If we have space to run another scenario, we add it
|
184
|
+
if active_scenarios.size < CONCURRENCY
|
185
|
+
scenario = nil
|
186
|
+
# We can run only one pristine at the same time due to concurrency issues within bundler
|
187
|
+
# Since they usually take longer than others, we try to run them as fast as possible when there
|
188
|
+
# is a slot
|
189
|
+
scenario = pristine.pop unless active_scenarios.any?(&:pristine?)
|
190
|
+
scenario ||= regulars.pop
|
191
|
+
|
192
|
+
if scenario
|
193
|
+
scenario.start
|
194
|
+
active_scenarios << scenario
|
195
|
+
end
|
196
|
+
end
|
192
197
|
|
193
|
-
|
198
|
+
active_scenarios.select(&:finished?).each do |exited|
|
199
|
+
scenario = active_scenarios.delete(exited)
|
200
|
+
scenario.report
|
201
|
+
finished_scenarios << scenario
|
202
|
+
end
|
203
|
+
|
204
|
+
sleep(0.1)
|
205
|
+
end
|
194
206
|
|
195
207
|
# Fail all if any of the tests does not have expected exit code
|
196
208
|
raise IntegrationTestError unless finished_scenarios.all?(&:success?)
|
data/docker-compose.yml
CHANGED
@@ -14,7 +14,10 @@ services:
|
|
14
14
|
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
15
15
|
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
|
16
16
|
KAFKA_CREATE_TOPICS:
|
17
|
-
"
|
17
|
+
"integrations_0_02:2:1,\
|
18
|
+
integrations_1_02:2:1,\
|
19
|
+
integrations_2_02:2:1,\
|
20
|
+
integrations_0_03:3:1,\
|
18
21
|
integrations_1_03:3:1,\
|
19
22
|
integrations_2_03:3:1,\
|
20
23
|
integrations_0_10:10:1,\
|
data/karafka.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_dependency 'dry-validation', '~> 1.7'
|
22
22
|
spec.add_dependency 'rdkafka', '>= 0.10'
|
23
23
|
spec.add_dependency 'thor', '>= 0.20'
|
24
|
-
spec.add_dependency 'waterdrop', '>= 2.
|
24
|
+
spec.add_dependency 'waterdrop', '>= 2.3.0', '< 3.0.0'
|
25
25
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
26
26
|
|
27
27
|
spec.required_ruby_version = '>= 2.6.0'
|
@@ -10,8 +10,8 @@ module Karafka
|
|
10
10
|
attr_accessor :messages
|
11
11
|
# @return [Karafka::Connection::Client] kafka connection client
|
12
12
|
attr_accessor :client
|
13
|
-
# @return [Karafka::TimeTrackers::Pause] current topic partition pause
|
14
|
-
attr_accessor :
|
13
|
+
# @return [Karafka::TimeTrackers::Pause] current topic partition pause tracker
|
14
|
+
attr_accessor :pause_tracker
|
15
15
|
# @return [Waterdrop::Producer] producer instance
|
16
16
|
attr_accessor :producer
|
17
17
|
|
@@ -21,18 +21,18 @@ module Karafka
|
|
21
21
|
# that may not yet kick in when error occurs. That way we pause always on the last processed
|
22
22
|
# message.
|
23
23
|
def on_consume
|
24
|
-
Karafka.monitor.instrument('consumer.
|
24
|
+
Karafka.monitor.instrument('consumer.consumed', caller: self) do
|
25
25
|
consume
|
26
|
-
end
|
27
26
|
|
28
|
-
|
27
|
+
pause_tracker.reset
|
29
28
|
|
30
|
-
|
31
|
-
|
29
|
+
# Mark as consumed only if manual offset management is not on
|
30
|
+
return if topic.manual_offset_management
|
32
31
|
|
33
|
-
|
34
|
-
|
35
|
-
|
32
|
+
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
33
|
+
# with manual offset management
|
34
|
+
mark_as_consumed(messages.last)
|
35
|
+
end
|
36
36
|
rescue StandardError => e
|
37
37
|
Karafka.monitor.instrument(
|
38
38
|
'error.occurred',
|
@@ -40,8 +40,8 @@ module Karafka
|
|
40
40
|
caller: self,
|
41
41
|
type: 'consumer.consume.error'
|
42
42
|
)
|
43
|
-
|
44
|
-
pause.
|
43
|
+
|
44
|
+
pause(@seek_offset || messages.first.offset)
|
45
45
|
end
|
46
46
|
|
47
47
|
# Trigger method for running on shutdown.
|
@@ -76,8 +76,31 @@ module Karafka
|
|
76
76
|
)
|
77
77
|
end
|
78
78
|
|
79
|
+
# Can be used to run preparation code
|
80
|
+
#
|
81
|
+
# @private
|
82
|
+
# @note This should not be used by the end users as it is part of the lifecycle of things but
|
83
|
+
# not as part of the public api. This can act as a hook when creating non-blocking
|
84
|
+
# consumers and doing other advanced stuff
|
85
|
+
def on_prepared
|
86
|
+
Karafka.monitor.instrument('consumer.prepared', caller: self) do
|
87
|
+
prepared
|
88
|
+
end
|
89
|
+
rescue StandardError => e
|
90
|
+
Karafka.monitor.instrument(
|
91
|
+
'error.occurred',
|
92
|
+
error: e,
|
93
|
+
caller: self,
|
94
|
+
type: 'consumer.prepared.error'
|
95
|
+
)
|
96
|
+
end
|
97
|
+
|
79
98
|
private
|
80
99
|
|
100
|
+
# Method that gets called in the blocking flow allowing to setup any type of resources or to
|
101
|
+
# send additional commands to Kafka before the proper execution starts.
|
102
|
+
def prepared; end
|
103
|
+
|
81
104
|
# Method that will perform business logic and on data received from Kafka (it will consume
|
82
105
|
# the data)
|
83
106
|
# @note This method needs bo be implemented in a subclass. We stub it here as a failover if
|
@@ -97,6 +120,10 @@ module Karafka
|
|
97
120
|
# Marks message as consumed in an async way.
|
98
121
|
#
|
99
122
|
# @param message [Messages::Message] last successfully processed message.
|
123
|
+
# @note We keep track of this offset in case we would mark as consumed and got error when
|
124
|
+
# processing another message. In case like this we do not pause on the message we've already
|
125
|
+
# processed but rather at the next one. This applies to both sync and async versions of this
|
126
|
+
# method.
|
100
127
|
def mark_as_consumed(message)
|
101
128
|
client.mark_as_consumed(message)
|
102
129
|
@seek_offset = message.offset + 1
|
@@ -110,6 +137,32 @@ module Karafka
|
|
110
137
|
@seek_offset = message.offset + 1
|
111
138
|
end
|
112
139
|
|
140
|
+
# Pauses processing on a given offset for the current topic partition
|
141
|
+
#
|
142
|
+
# After given partition is resumed, it will continue processing from the given offset
|
143
|
+
# @param offset [Integer] offset from which we want to restart the processing
|
144
|
+
# @param timeout [Integer, nil] how long in milliseconds do we want to pause or nil to use the
|
145
|
+
# default exponential pausing strategy defined for retries
|
146
|
+
def pause(offset, timeout = nil)
|
147
|
+
client.pause(
|
148
|
+
messages.metadata.topic,
|
149
|
+
messages.metadata.partition,
|
150
|
+
offset
|
151
|
+
)
|
152
|
+
|
153
|
+
timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
|
154
|
+
end
|
155
|
+
|
156
|
+
# Resumes processing of the current topic partition
|
157
|
+
def resume
|
158
|
+
client.resume(
|
159
|
+
messages.metadata.topic,
|
160
|
+
messages.metadata.partition
|
161
|
+
)
|
162
|
+
|
163
|
+
pause_tracker.expire
|
164
|
+
end
|
165
|
+
|
113
166
|
# Seeks in the context of current topic and partition
|
114
167
|
#
|
115
168
|
# @param offset [Integer] offset where we want to seek
|
@@ -48,6 +48,7 @@ module Karafka
|
|
48
48
|
time_poll.start
|
49
49
|
|
50
50
|
@buffer.clear
|
51
|
+
@rebalance_manager.clear
|
51
52
|
|
52
53
|
loop do
|
53
54
|
# Don't fetch more messages if we do not have any time left
|
@@ -58,13 +59,23 @@ module Karafka
|
|
58
59
|
# Fetch message within our time boundaries
|
59
60
|
message = poll(time_poll.remaining)
|
60
61
|
|
61
|
-
#
|
62
|
-
|
63
|
-
|
64
|
-
@buffer << message
|
62
|
+
# Put a message to the buffer if there is one
|
63
|
+
@buffer << message if message
|
65
64
|
|
66
65
|
# Track time spent on all of the processing and polling
|
67
66
|
time_poll.checkpoint
|
67
|
+
|
68
|
+
# Upon polling rebalance manager might have been updated.
|
69
|
+
# If partition revocation happens, we need to remove messages from revoked partitions
|
70
|
+
# as well as ensure we do not have duplicated due to the offset reset for partitions
|
71
|
+
# that we got assigned
|
72
|
+
remove_revoked_and_duplicated_messages if @rebalance_manager.revoked_partitions?
|
73
|
+
|
74
|
+
# Finally once we've (potentially) removed revoked, etc, if no messages were returned
|
75
|
+
# we can break.
|
76
|
+
# Worth keeping in mind, that the rebalance manager might have been updated despite no
|
77
|
+
# messages being returned during a poll
|
78
|
+
break unless message
|
68
79
|
end
|
69
80
|
|
70
81
|
@buffer
|
@@ -84,6 +95,9 @@ module Karafka
|
|
84
95
|
# Ignoring a case where there would not be an offset (for example when rebalance occurs).
|
85
96
|
#
|
86
97
|
# @param async [Boolean] should the commit happen async or sync (async by default)
|
98
|
+
# @return [Boolean] did committing was successful. It may be not, when we no longer own
|
99
|
+
# given partition.
|
100
|
+
#
|
87
101
|
# @note This will commit all the offsets for the whole consumer. In order to achieve
|
88
102
|
# granular control over where the offset should be for particular topic partitions, the
|
89
103
|
# store_offset should be used to only store new offset when we want to to be flushed
|
@@ -212,6 +226,8 @@ module Karafka
|
|
212
226
|
::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
|
213
227
|
|
214
228
|
@kafka.close
|
229
|
+
@buffer.clear
|
230
|
+
@rebalance_manager.clear
|
215
231
|
end
|
216
232
|
end
|
217
233
|
|
@@ -232,7 +248,7 @@ module Karafka
|
|
232
248
|
# Performs a single poll operation.
|
233
249
|
#
|
234
250
|
# @param timeout [Integer] timeout for a single poll
|
235
|
-
# @return [
|
251
|
+
# @return [Rdkafka::Consumer::Message, nil] fetched message or nil if nothing polled
|
236
252
|
def poll(timeout)
|
237
253
|
time_poll ||= TimeTrackers::Poll.new(timeout)
|
238
254
|
|
@@ -301,6 +317,20 @@ module Karafka
|
|
301
317
|
|
302
318
|
consumer
|
303
319
|
end
|
320
|
+
|
321
|
+
# We may have a case where in the middle of data polling, we've lost a partition.
|
322
|
+
# In a case like this we should remove all the pre-buffered messages from list partitions as
|
323
|
+
# we are no longer responsible in a given process for processing those messages and they
|
324
|
+
# should have been picked up by a different process.
|
325
|
+
def remove_revoked_and_duplicated_messages
|
326
|
+
@rebalance_manager.revoked_partitions.each do |topic, partitions|
|
327
|
+
partitions.each do |partition|
|
328
|
+
@buffer.delete(topic, partition)
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
@buffer.uniq!
|
333
|
+
end
|
304
334
|
end
|
305
335
|
end
|
306
336
|
end
|
@@ -15,6 +15,8 @@ module Karafka
|
|
15
15
|
@pauses_manager = PausesManager.new
|
16
16
|
@client = Client.new(@subscription_group)
|
17
17
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
18
|
+
# We reference scheduler here as it is much faster than fetching this each time
|
19
|
+
@scheduler = ::Karafka::App.config.internal.scheduler
|
18
20
|
end
|
19
21
|
|
20
22
|
# Runs the main listener fetch loop.
|
@@ -66,9 +68,9 @@ module Karafka
|
|
66
68
|
# distributing consuming jobs as upon revoking, we might get assigned to the same
|
67
69
|
# partitions, thus getting their jobs. The revoking jobs need to finish before
|
68
70
|
# appropriate consumers are taken down and re-created
|
69
|
-
wait(@subscription_group) if
|
71
|
+
wait(@subscription_group) if schedule_revoke_lost_partitions_jobs
|
70
72
|
|
71
|
-
|
73
|
+
schedule_partitions_jobs(messages_buffer)
|
72
74
|
|
73
75
|
# We wait only on jobs from our subscription group. Other groups are independent.
|
74
76
|
wait(@subscription_group)
|
@@ -103,15 +105,17 @@ module Karafka
|
|
103
105
|
|
104
106
|
# Enqueues revoking jobs for partitions that were taken away from the running process.
|
105
107
|
# @return [Boolean] was there anything to revoke
|
106
|
-
|
108
|
+
# @note We do not use scheduler here as those jobs are not meant to be order optimized in
|
109
|
+
# any way. Since they operate occasionally it is irrelevant.
|
110
|
+
def schedule_revoke_lost_partitions_jobs
|
107
111
|
revoked_partitions = @client.rebalance_manager.revoked_partitions
|
108
112
|
|
109
113
|
return false if revoked_partitions.empty?
|
110
114
|
|
111
115
|
revoked_partitions.each do |topic, partitions|
|
112
116
|
partitions.each do |partition|
|
113
|
-
|
114
|
-
executor = @executors.fetch(topic, partition,
|
117
|
+
pause_tracker = @pauses_manager.fetch(topic, partition)
|
118
|
+
executor = @executors.fetch(topic, partition, pause_tracker)
|
115
119
|
@jobs_queue << Processing::Jobs::Revoked.new(executor)
|
116
120
|
end
|
117
121
|
end
|
@@ -122,8 +126,8 @@ module Karafka
|
|
122
126
|
# Takes the messages per topic partition and enqueues processing jobs in threads.
|
123
127
|
#
|
124
128
|
# @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
|
125
|
-
def
|
126
|
-
messages_buffer
|
129
|
+
def schedule_partitions_jobs(messages_buffer)
|
130
|
+
@scheduler.call(messages_buffer) do |topic, partition, messages|
|
127
131
|
pause = @pauses_manager.fetch(topic, partition)
|
128
132
|
|
129
133
|
next if pause.paused?
|
@@ -10,6 +10,10 @@ module Karafka
|
|
10
10
|
class MessagesBuffer
|
11
11
|
attr_reader :size
|
12
12
|
|
13
|
+
extend Forwardable
|
14
|
+
|
15
|
+
def_delegators :@groups, :each
|
16
|
+
|
13
17
|
# @return [Karafka::Connection::MessagesBuffer] buffer instance
|
14
18
|
def initialize
|
15
19
|
@size = 0
|
@@ -20,19 +24,6 @@ module Karafka
|
|
20
24
|
end
|
21
25
|
end
|
22
26
|
|
23
|
-
# Iterates over aggregated data providing messages per topic partition.
|
24
|
-
#
|
25
|
-
# @yieldparam [String] topic name
|
26
|
-
# @yieldparam [Integer] partition number
|
27
|
-
# @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
|
28
|
-
def each
|
29
|
-
@groups.each do |topic, partitions|
|
30
|
-
partitions.each do |partition, messages|
|
31
|
-
yield(topic, partition, messages)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
27
|
# Adds a message to the buffer.
|
37
28
|
#
|
38
29
|
# @param message [Rdkafka::Consumer::Message] raw rdkafka message
|
@@ -42,6 +33,37 @@ module Karafka
|
|
42
33
|
@groups[message.topic][message.partition] << message
|
43
34
|
end
|
44
35
|
|
36
|
+
# Removes given topic and partition data out of the buffer
|
37
|
+
# This is used when there's a partition revocation
|
38
|
+
# @param topic [String] topic we're interested in
|
39
|
+
# @param partition [Integer] partition of which data we want to remove
|
40
|
+
def delete(topic, partition)
|
41
|
+
return unless @groups.key?(topic)
|
42
|
+
return unless @groups.fetch(topic).key?(partition)
|
43
|
+
|
44
|
+
topic_data = @groups.fetch(topic)
|
45
|
+
topic_data.delete(partition)
|
46
|
+
|
47
|
+
recount!
|
48
|
+
|
49
|
+
# If there are no more partitions to handle in a given topic, remove it completely
|
50
|
+
@groups.delete(topic) if topic_data.empty?
|
51
|
+
end
|
52
|
+
|
53
|
+
# Removes duplicated messages from the same partitions
|
54
|
+
# This should be used only when rebalance occurs, as we may get data again we already have
|
55
|
+
# due to the processing from the last offset. In cases like this, we may get same data
|
56
|
+
# again and we do want to ensure as few duplications as possible
|
57
|
+
def uniq!
|
58
|
+
@groups.each_value do |partitions|
|
59
|
+
partitions.each_value do |messages|
|
60
|
+
messages.uniq!(&:offset)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
recount!
|
65
|
+
end
|
66
|
+
|
45
67
|
# Removes all the data from the buffer.
|
46
68
|
#
|
47
69
|
# @note We do not clear the whole groups hash but rather we clear the partition hashes, so
|
@@ -52,6 +74,15 @@ module Karafka
|
|
52
74
|
@size = 0
|
53
75
|
@groups.each_value(&:clear)
|
54
76
|
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# Updates the messages count if we performed any operations that could change the state
|
81
|
+
def recount!
|
82
|
+
@size = @groups.each_value.sum do |partitions|
|
83
|
+
partitions.each_value.map(&:count).sum
|
84
|
+
end
|
85
|
+
end
|
55
86
|
end
|
56
87
|
end
|
57
88
|
end
|
@@ -12,11 +12,11 @@ module Karafka
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
# Creates or fetches pause of a given topic partition.
|
15
|
+
# Creates or fetches pause tracker of a given topic partition.
|
16
16
|
#
|
17
17
|
# @param topic [String] topic name
|
18
18
|
# @param partition [Integer] partition number
|
19
|
-
# @return [Karafka::TimeTrackers::Pause] pause instance
|
19
|
+
# @return [Karafka::TimeTrackers::Pause] pause tracker instance
|
20
20
|
def fetch(topic, partition)
|
21
21
|
@pauses[topic][partition] ||= TimeTrackers::Pause.new(
|
22
22
|
timeout: Karafka::App.config.pause_timeout,
|