karafka 2.0.0.alpha4 → 2.0.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +26 -3
- data/Gemfile.lock +11 -11
- data/bin/integrations +55 -43
- data/docker-compose.yml +4 -1
- data/karafka.gemspec +1 -1
- data/lib/karafka/base_consumer.rb +65 -12
- data/lib/karafka/connection/client.rb +35 -5
- data/lib/karafka/connection/listener.rb +11 -7
- data/lib/karafka/connection/messages_buffer.rb +44 -13
- data/lib/karafka/connection/pauses_manager.rb +2 -2
- data/lib/karafka/connection/rebalance_manager.rb +35 -20
- data/lib/karafka/contracts/config.rb +1 -0
- data/lib/karafka/instrumentation/{stdout_listener.rb → logger_listener.rb} +1 -1
- data/lib/karafka/instrumentation/monitor.rb +2 -1
- data/lib/karafka/pro/active_job/dispatcher.rb +9 -9
- data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
- data/lib/karafka/pro/loader.rb +13 -8
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/processing/executor.rb +15 -10
- data/lib/karafka/processing/jobs/base.rb +16 -0
- data/lib/karafka/processing/jobs/consume.rb +7 -2
- data/lib/karafka/processing/jobs_queue.rb +18 -9
- data/lib/karafka/processing/worker.rb +23 -0
- data/lib/karafka/railtie.rb +12 -0
- data/lib/karafka/scheduler.rb +21 -0
- data/lib/karafka/setup/config.rb +3 -1
- data/lib/karafka/templates/karafka.rb.erb +1 -1
- data/lib/karafka/time_trackers/pause.rb +10 -2
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +8 -6
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f108cb4288d0ed0510381f51c77d49e052b947f6180c9b9c0b06e0ac2b599894
|
4
|
+
data.tar.gz: 3d79066d0107c08f450ca9f4c3b5c4a39aae497836c80bf8380c65f1406b82c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4aae257010c992c59ce4b01ead54ff2cfd4e8ccd8cbe6b52214b3cedf8f879690e0d577f2b41f44b1ab6888d7e27bbc92f3ba4a69e8b127687fb4c43bff51fbc
|
7
|
+
data.tar.gz: f65e425cb84152d20a055bdb9a94fd98280597cdf5e431337cb8604040534cacbfdd03efd6dc23b86c9ecf25721c860bd55ca75ad3f98e4c66136a88c1efc4e7
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.1.
|
1
|
+
3.1.2
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,29 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
-
## 2.0.0-
|
3
|
+
## 2.0.0-beta1 (2022-05-22)
|
4
|
+
- Update the jobs queue blocking engine and allow for non-blocking jobs execution
|
5
|
+
- Provide `#prepared` hook that always runs before the fetching loop is unblocked
|
6
|
+
- [Pro] Introduce performance tracker for scheduling optimizer
|
7
|
+
- Provide ability to pause (`#pause`) and resume (`#resume`) given partitions from the consumers
|
8
|
+
- Small integration specs refactoring + specs for pausing scenarios
|
9
|
+
|
10
|
+
## 2.0.0-alpha6 (2022-04-17)
|
11
|
+
- Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
|
12
|
+
- Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
|
13
|
+
- Replace `consumer.consume` with `consumer.consumed` event to match the behaviour
|
14
|
+
- Make sure, that offset committing happens before the `consumer.consumed` event is propagated
|
15
|
+
- Fix for failing when not installed (just a dependency) (#817)
|
16
|
+
- Evict messages from partitions that were lost upon rebalancing (#825)
|
17
|
+
- Do **not** run `#revoked` on partitions that were lost and assigned back upon rebalancing (#825)
|
18
|
+
- Remove potential duplicated that could occur upon rebalance with re-assigned partitions (#825)
|
19
|
+
- Optimize integration test suite additional consumers shutdown process (#828)
|
20
|
+
- Optimize messages eviction and duplicates removal on poll stopped due to lack of messages
|
21
|
+
- Add static group membership integration spec
|
22
|
+
|
23
|
+
## 2.0.0-alpha5 (2022-04-03)
|
24
|
+
- Rename StdoutListener to LoggerListener (#811)
|
25
|
+
|
26
|
+
## 2.0.0-alpha4 (2022-03-20)
|
4
27
|
- Rails support without ActiveJob queue adapter usage (#805)
|
5
28
|
|
6
29
|
## 2.0.0-alpha3 (2022-03-16)
|
@@ -10,12 +33,12 @@
|
|
10
33
|
|
11
34
|
## 2.0.0-alpha2 (2022-02-19)
|
12
35
|
- Require `kafka` keys to be symbols
|
13
|
-
- Added ActiveJob Pro adapter
|
36
|
+
- [Pro] Added ActiveJob Pro adapter
|
14
37
|
- Small updates to the license and docs
|
15
38
|
|
16
39
|
## 2.0.0-alpha1 (2022-01-30)
|
17
40
|
- Change license to `LGPL-3.0`
|
18
|
-
- Introduce a Pro subscription
|
41
|
+
- [Pro] Introduce a Pro subscription
|
19
42
|
- Switch from `ruby-kafka` to `librdkafka` as an underlying driver
|
20
43
|
- Introduce fully automatic integration tests that go through the whole server lifecycle
|
21
44
|
- Integrate WaterDrop tightly with autoconfiguration inheritance and an option to redefine it
|
data/Gemfile.lock
CHANGED
@@ -1,31 +1,31 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.0.0.
|
4
|
+
karafka (2.0.0.beta1)
|
5
5
|
dry-configurable (~> 0.13)
|
6
6
|
dry-monitor (~> 0.5)
|
7
7
|
dry-validation (~> 1.7)
|
8
8
|
rdkafka (>= 0.10)
|
9
9
|
thor (>= 0.20)
|
10
|
-
waterdrop (>= 2.
|
10
|
+
waterdrop (>= 2.3.0, < 3.0.0)
|
11
11
|
zeitwerk (~> 2.3)
|
12
12
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
-
activejob (7.0.
|
17
|
-
activesupport (= 7.0.
|
16
|
+
activejob (7.0.3)
|
17
|
+
activesupport (= 7.0.3)
|
18
18
|
globalid (>= 0.3.6)
|
19
|
-
activesupport (7.0.
|
19
|
+
activesupport (7.0.3)
|
20
20
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
21
21
|
i18n (>= 1.6, < 2)
|
22
22
|
minitest (>= 5.1)
|
23
23
|
tzinfo (~> 2.0)
|
24
24
|
byebug (11.1.3)
|
25
|
-
concurrent-ruby (1.1.
|
25
|
+
concurrent-ruby (1.1.10)
|
26
26
|
diff-lcs (1.5.0)
|
27
27
|
docile (1.4.0)
|
28
|
-
dry-configurable (0.
|
28
|
+
dry-configurable (0.15.0)
|
29
29
|
concurrent-ruby (~> 1.0)
|
30
30
|
dry-core (~> 0.6)
|
31
31
|
dry-container (0.9.0)
|
@@ -64,7 +64,7 @@ GEM
|
|
64
64
|
dry-core (~> 0.5, >= 0.5)
|
65
65
|
dry-initializer (~> 3.0)
|
66
66
|
dry-schema (~> 1.9, >= 1.9.1)
|
67
|
-
factory_bot (6.2.
|
67
|
+
factory_bot (6.2.1)
|
68
68
|
activesupport (>= 5.0.0)
|
69
69
|
ffi (1.15.5)
|
70
70
|
globalid (1.0.0)
|
@@ -87,7 +87,7 @@ GEM
|
|
87
87
|
rspec-expectations (3.11.0)
|
88
88
|
diff-lcs (>= 1.2.0, < 2.0)
|
89
89
|
rspec-support (~> 3.11.0)
|
90
|
-
rspec-mocks (3.11.
|
90
|
+
rspec-mocks (3.11.1)
|
91
91
|
diff-lcs (>= 1.2.0, < 2.0)
|
92
92
|
rspec-support (~> 3.11.0)
|
93
93
|
rspec-support (3.11.0)
|
@@ -100,7 +100,7 @@ GEM
|
|
100
100
|
thor (1.2.1)
|
101
101
|
tzinfo (2.0.4)
|
102
102
|
concurrent-ruby (~> 1.0)
|
103
|
-
waterdrop (2.
|
103
|
+
waterdrop (2.3.0)
|
104
104
|
concurrent-ruby (>= 1.1)
|
105
105
|
dry-configurable (~> 0.13)
|
106
106
|
dry-monitor (~> 0.5)
|
@@ -121,4 +121,4 @@ DEPENDENCIES
|
|
121
121
|
simplecov
|
122
122
|
|
123
123
|
BUNDLED WITH
|
124
|
-
2.3.
|
124
|
+
2.3.11
|
data/bin/integrations
CHANGED
@@ -44,17 +44,30 @@ class Scenario
|
|
44
44
|
# @param path [String] path to the scenarios file
|
45
45
|
def initialize(path)
|
46
46
|
@path = path
|
47
|
-
@stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
|
48
|
-
@started_at = current_time
|
49
47
|
# Last 1024 characters from stdout
|
50
48
|
@stdout_tail = ''
|
51
49
|
end
|
52
50
|
|
51
|
+
# Starts running given scenario in a separate process
|
52
|
+
def start
|
53
|
+
@stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
|
54
|
+
@started_at = current_time
|
55
|
+
end
|
56
|
+
|
53
57
|
# @return [String] integration spec name
|
54
58
|
def name
|
55
59
|
@path.gsub("#{ROOT_PATH}/spec/integrations/", '')
|
56
60
|
end
|
57
61
|
|
62
|
+
# @return [Boolean] true if spec is pristine
|
63
|
+
def pristine?
|
64
|
+
scenario_dir = File.dirname(@path)
|
65
|
+
|
66
|
+
# If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
|
67
|
+
# to run bundle install, etc in order to run it
|
68
|
+
File.exist?(File.join(scenario_dir, 'Gemfile'))
|
69
|
+
end
|
70
|
+
|
58
71
|
# @return [Boolean] did this scenario finished or is it still running
|
59
72
|
def finished?
|
60
73
|
# If the thread is running too long, kill it
|
@@ -73,6 +86,13 @@ class Scenario
|
|
73
86
|
!@wait_thr.alive?
|
74
87
|
end
|
75
88
|
|
89
|
+
# @return [Boolean] did this scenario finish successfully or not
|
90
|
+
def success?
|
91
|
+
expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
|
92
|
+
|
93
|
+
expected_exit_codes.include?(exit_code)
|
94
|
+
end
|
95
|
+
|
76
96
|
# @return [Integer] pid of the process of this scenario
|
77
97
|
def pid
|
78
98
|
@wait_thr.pid
|
@@ -84,13 +104,6 @@ class Scenario
|
|
84
104
|
@wait_thr.value&.exitstatus || 123
|
85
105
|
end
|
86
106
|
|
87
|
-
# @return [Boolean] did this scenario finish successfully or not
|
88
|
-
def success?
|
89
|
-
expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
|
90
|
-
|
91
|
-
expected_exit_codes.include?(exit_code)
|
92
|
-
end
|
93
|
-
|
94
107
|
# Prints a status report when scenario is finished and stdout if it failed
|
95
108
|
def report
|
96
109
|
result = success? ? "\e[#{32}m#{'OK'}\e[0m" : "\e[#{31}m#{'FAILED'}\e[0m"
|
@@ -109,11 +122,10 @@ class Scenario
|
|
109
122
|
# Sets up a proper environment for a given spec to run and returns the run command
|
110
123
|
# @return [String] run command
|
111
124
|
def init_and_build_cmd
|
112
|
-
scenario_dir = File.dirname(@path)
|
113
|
-
|
114
125
|
# If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
|
115
126
|
# to run bundle install, etc in order to run it
|
116
|
-
if
|
127
|
+
if pristine?
|
128
|
+
scenario_dir = File.dirname(@path)
|
117
129
|
# We copy the spec into a temp dir, not to pollute the spec location with logs, etc
|
118
130
|
temp_dir = Dir.mktmpdir
|
119
131
|
file_name = File.basename(@path)
|
@@ -141,31 +153,6 @@ class Scenario
|
|
141
153
|
end
|
142
154
|
end
|
143
155
|
|
144
|
-
# Simple array to keep track of active integration processes thread running with info on which
|
145
|
-
# test scenario is running
|
146
|
-
active_scenarios = []
|
147
|
-
|
148
|
-
# Finished runners
|
149
|
-
finished_scenarios = []
|
150
|
-
|
151
|
-
# Waits for any of the processes to be finished and tracks exit codes
|
152
|
-
#
|
153
|
-
# @param active_scenarios [Array] active runners
|
154
|
-
# @param finished_scenarios [Hash] finished forks exit codes
|
155
|
-
def wait_and_track(active_scenarios, finished_scenarios)
|
156
|
-
exited = active_scenarios.find(&:finished?)
|
157
|
-
|
158
|
-
if exited
|
159
|
-
scenario = active_scenarios.delete(exited)
|
160
|
-
|
161
|
-
scenario.report
|
162
|
-
|
163
|
-
finished_scenarios << scenario
|
164
|
-
else
|
165
|
-
Thread.pass
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
156
|
# Load all the specs
|
170
157
|
specs = Dir[ROOT_PATH.join('spec/integrations/**/*.rb')]
|
171
158
|
|
@@ -182,15 +169,40 @@ seed = (ENV['SEED'] || rand(0..10_000)).to_i
|
|
182
169
|
|
183
170
|
puts "Random seed: #{seed}"
|
184
171
|
|
185
|
-
|
186
|
-
|
172
|
+
scenarios = specs
|
173
|
+
.shuffle(random: Random.new(seed))
|
174
|
+
.map { |integration_test| Scenario.new(integration_test) }
|
187
175
|
|
188
|
-
|
176
|
+
regulars = scenarios.reject(&:pristine?)
|
177
|
+
pristine = scenarios.select(&:pristine?)
|
189
178
|
|
190
|
-
|
191
|
-
|
179
|
+
active_scenarios = []
|
180
|
+
finished_scenarios = []
|
181
|
+
|
182
|
+
while finished_scenarios.size < scenarios.size
|
183
|
+
# If we have space to run another scenario, we add it
|
184
|
+
if active_scenarios.size < CONCURRENCY
|
185
|
+
scenario = nil
|
186
|
+
# We can run only one pristine at the same time due to concurrency issues within bundler
|
187
|
+
# Since they usually take longer than others, we try to run them as fast as possible when there
|
188
|
+
# is a slot
|
189
|
+
scenario = pristine.pop unless active_scenarios.any?(&:pristine?)
|
190
|
+
scenario ||= regulars.pop
|
191
|
+
|
192
|
+
if scenario
|
193
|
+
scenario.start
|
194
|
+
active_scenarios << scenario
|
195
|
+
end
|
196
|
+
end
|
192
197
|
|
193
|
-
|
198
|
+
active_scenarios.select(&:finished?).each do |exited|
|
199
|
+
scenario = active_scenarios.delete(exited)
|
200
|
+
scenario.report
|
201
|
+
finished_scenarios << scenario
|
202
|
+
end
|
203
|
+
|
204
|
+
sleep(0.1)
|
205
|
+
end
|
194
206
|
|
195
207
|
# Fail all if any of the tests does not have expected exit code
|
196
208
|
raise IntegrationTestError unless finished_scenarios.all?(&:success?)
|
data/docker-compose.yml
CHANGED
@@ -14,7 +14,10 @@ services:
|
|
14
14
|
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
15
15
|
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
|
16
16
|
KAFKA_CREATE_TOPICS:
|
17
|
-
"
|
17
|
+
"integrations_0_02:2:1,\
|
18
|
+
integrations_1_02:2:1,\
|
19
|
+
integrations_2_02:2:1,\
|
20
|
+
integrations_0_03:3:1,\
|
18
21
|
integrations_1_03:3:1,\
|
19
22
|
integrations_2_03:3:1,\
|
20
23
|
integrations_0_10:10:1,\
|
data/karafka.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_dependency 'dry-validation', '~> 1.7'
|
22
22
|
spec.add_dependency 'rdkafka', '>= 0.10'
|
23
23
|
spec.add_dependency 'thor', '>= 0.20'
|
24
|
-
spec.add_dependency 'waterdrop', '>= 2.
|
24
|
+
spec.add_dependency 'waterdrop', '>= 2.3.0', '< 3.0.0'
|
25
25
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
26
26
|
|
27
27
|
spec.required_ruby_version = '>= 2.6.0'
|
@@ -10,8 +10,8 @@ module Karafka
|
|
10
10
|
attr_accessor :messages
|
11
11
|
# @return [Karafka::Connection::Client] kafka connection client
|
12
12
|
attr_accessor :client
|
13
|
-
# @return [Karafka::TimeTrackers::Pause] current topic partition pause
|
14
|
-
attr_accessor :
|
13
|
+
# @return [Karafka::TimeTrackers::Pause] current topic partition pause tracker
|
14
|
+
attr_accessor :pause_tracker
|
15
15
|
# @return [Waterdrop::Producer] producer instance
|
16
16
|
attr_accessor :producer
|
17
17
|
|
@@ -21,18 +21,18 @@ module Karafka
|
|
21
21
|
# that may not yet kick in when error occurs. That way we pause always on the last processed
|
22
22
|
# message.
|
23
23
|
def on_consume
|
24
|
-
Karafka.monitor.instrument('consumer.
|
24
|
+
Karafka.monitor.instrument('consumer.consumed', caller: self) do
|
25
25
|
consume
|
26
|
-
end
|
27
26
|
|
28
|
-
|
27
|
+
pause_tracker.reset
|
29
28
|
|
30
|
-
|
31
|
-
|
29
|
+
# Mark as consumed only if manual offset management is not on
|
30
|
+
return if topic.manual_offset_management
|
32
31
|
|
33
|
-
|
34
|
-
|
35
|
-
|
32
|
+
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
33
|
+
# with manual offset management
|
34
|
+
mark_as_consumed(messages.last)
|
35
|
+
end
|
36
36
|
rescue StandardError => e
|
37
37
|
Karafka.monitor.instrument(
|
38
38
|
'error.occurred',
|
@@ -40,8 +40,8 @@ module Karafka
|
|
40
40
|
caller: self,
|
41
41
|
type: 'consumer.consume.error'
|
42
42
|
)
|
43
|
-
|
44
|
-
pause.
|
43
|
+
|
44
|
+
pause(@seek_offset || messages.first.offset)
|
45
45
|
end
|
46
46
|
|
47
47
|
# Trigger method for running on shutdown.
|
@@ -76,8 +76,31 @@ module Karafka
|
|
76
76
|
)
|
77
77
|
end
|
78
78
|
|
79
|
+
# Can be used to run preparation code
|
80
|
+
#
|
81
|
+
# @private
|
82
|
+
# @note This should not be used by the end users as it is part of the lifecycle of things but
|
83
|
+
# not as part of the public api. This can act as a hook when creating non-blocking
|
84
|
+
# consumers and doing other advanced stuff
|
85
|
+
def on_prepared
|
86
|
+
Karafka.monitor.instrument('consumer.prepared', caller: self) do
|
87
|
+
prepared
|
88
|
+
end
|
89
|
+
rescue StandardError => e
|
90
|
+
Karafka.monitor.instrument(
|
91
|
+
'error.occurred',
|
92
|
+
error: e,
|
93
|
+
caller: self,
|
94
|
+
type: 'consumer.prepared.error'
|
95
|
+
)
|
96
|
+
end
|
97
|
+
|
79
98
|
private
|
80
99
|
|
100
|
+
# Method that gets called in the blocking flow allowing to setup any type of resources or to
|
101
|
+
# send additional commands to Kafka before the proper execution starts.
|
102
|
+
def prepared; end
|
103
|
+
|
81
104
|
# Method that will perform business logic and on data received from Kafka (it will consume
|
82
105
|
# the data)
|
83
106
|
# @note This method needs bo be implemented in a subclass. We stub it here as a failover if
|
@@ -97,6 +120,10 @@ module Karafka
|
|
97
120
|
# Marks message as consumed in an async way.
|
98
121
|
#
|
99
122
|
# @param message [Messages::Message] last successfully processed message.
|
123
|
+
# @note We keep track of this offset in case we would mark as consumed and got error when
|
124
|
+
# processing another message. In case like this we do not pause on the message we've already
|
125
|
+
# processed but rather at the next one. This applies to both sync and async versions of this
|
126
|
+
# method.
|
100
127
|
def mark_as_consumed(message)
|
101
128
|
client.mark_as_consumed(message)
|
102
129
|
@seek_offset = message.offset + 1
|
@@ -110,6 +137,32 @@ module Karafka
|
|
110
137
|
@seek_offset = message.offset + 1
|
111
138
|
end
|
112
139
|
|
140
|
+
# Pauses processing on a given offset for the current topic partition
|
141
|
+
#
|
142
|
+
# After given partition is resumed, it will continue processing from the given offset
|
143
|
+
# @param offset [Integer] offset from which we want to restart the processing
|
144
|
+
# @param timeout [Integer, nil] how long in milliseconds do we want to pause or nil to use the
|
145
|
+
# default exponential pausing strategy defined for retries
|
146
|
+
def pause(offset, timeout = nil)
|
147
|
+
client.pause(
|
148
|
+
messages.metadata.topic,
|
149
|
+
messages.metadata.partition,
|
150
|
+
offset
|
151
|
+
)
|
152
|
+
|
153
|
+
timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
|
154
|
+
end
|
155
|
+
|
156
|
+
# Resumes processing of the current topic partition
|
157
|
+
def resume
|
158
|
+
client.resume(
|
159
|
+
messages.metadata.topic,
|
160
|
+
messages.metadata.partition
|
161
|
+
)
|
162
|
+
|
163
|
+
pause_tracker.expire
|
164
|
+
end
|
165
|
+
|
113
166
|
# Seeks in the context of current topic and partition
|
114
167
|
#
|
115
168
|
# @param offset [Integer] offset where we want to seek
|
@@ -48,6 +48,7 @@ module Karafka
|
|
48
48
|
time_poll.start
|
49
49
|
|
50
50
|
@buffer.clear
|
51
|
+
@rebalance_manager.clear
|
51
52
|
|
52
53
|
loop do
|
53
54
|
# Don't fetch more messages if we do not have any time left
|
@@ -58,13 +59,23 @@ module Karafka
|
|
58
59
|
# Fetch message within our time boundaries
|
59
60
|
message = poll(time_poll.remaining)
|
60
61
|
|
61
|
-
#
|
62
|
-
|
63
|
-
|
64
|
-
@buffer << message
|
62
|
+
# Put a message to the buffer if there is one
|
63
|
+
@buffer << message if message
|
65
64
|
|
66
65
|
# Track time spent on all of the processing and polling
|
67
66
|
time_poll.checkpoint
|
67
|
+
|
68
|
+
# Upon polling rebalance manager might have been updated.
|
69
|
+
# If partition revocation happens, we need to remove messages from revoked partitions
|
70
|
+
# as well as ensure we do not have duplicated due to the offset reset for partitions
|
71
|
+
# that we got assigned
|
72
|
+
remove_revoked_and_duplicated_messages if @rebalance_manager.revoked_partitions?
|
73
|
+
|
74
|
+
# Finally once we've (potentially) removed revoked, etc, if no messages were returned
|
75
|
+
# we can break.
|
76
|
+
# Worth keeping in mind, that the rebalance manager might have been updated despite no
|
77
|
+
# messages being returned during a poll
|
78
|
+
break unless message
|
68
79
|
end
|
69
80
|
|
70
81
|
@buffer
|
@@ -84,6 +95,9 @@ module Karafka
|
|
84
95
|
# Ignoring a case where there would not be an offset (for example when rebalance occurs).
|
85
96
|
#
|
86
97
|
# @param async [Boolean] should the commit happen async or sync (async by default)
|
98
|
+
# @return [Boolean] did committing was successful. It may be not, when we no longer own
|
99
|
+
# given partition.
|
100
|
+
#
|
87
101
|
# @note This will commit all the offsets for the whole consumer. In order to achieve
|
88
102
|
# granular control over where the offset should be for particular topic partitions, the
|
89
103
|
# store_offset should be used to only store new offset when we want to to be flushed
|
@@ -212,6 +226,8 @@ module Karafka
|
|
212
226
|
::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
|
213
227
|
|
214
228
|
@kafka.close
|
229
|
+
@buffer.clear
|
230
|
+
@rebalance_manager.clear
|
215
231
|
end
|
216
232
|
end
|
217
233
|
|
@@ -232,7 +248,7 @@ module Karafka
|
|
232
248
|
# Performs a single poll operation.
|
233
249
|
#
|
234
250
|
# @param timeout [Integer] timeout for a single poll
|
235
|
-
# @return [
|
251
|
+
# @return [Rdkafka::Consumer::Message, nil] fetched message or nil if nothing polled
|
236
252
|
def poll(timeout)
|
237
253
|
time_poll ||= TimeTrackers::Poll.new(timeout)
|
238
254
|
|
@@ -301,6 +317,20 @@ module Karafka
|
|
301
317
|
|
302
318
|
consumer
|
303
319
|
end
|
320
|
+
|
321
|
+
# We may have a case where in the middle of data polling, we've lost a partition.
|
322
|
+
# In a case like this we should remove all the pre-buffered messages from list partitions as
|
323
|
+
# we are no longer responsible in a given process for processing those messages and they
|
324
|
+
# should have been picked up by a different process.
|
325
|
+
def remove_revoked_and_duplicated_messages
|
326
|
+
@rebalance_manager.revoked_partitions.each do |topic, partitions|
|
327
|
+
partitions.each do |partition|
|
328
|
+
@buffer.delete(topic, partition)
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
@buffer.uniq!
|
333
|
+
end
|
304
334
|
end
|
305
335
|
end
|
306
336
|
end
|
@@ -15,6 +15,8 @@ module Karafka
|
|
15
15
|
@pauses_manager = PausesManager.new
|
16
16
|
@client = Client.new(@subscription_group)
|
17
17
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
18
|
+
# We reference scheduler here as it is much faster than fetching this each time
|
19
|
+
@scheduler = ::Karafka::App.config.internal.scheduler
|
18
20
|
end
|
19
21
|
|
20
22
|
# Runs the main listener fetch loop.
|
@@ -66,9 +68,9 @@ module Karafka
|
|
66
68
|
# distributing consuming jobs as upon revoking, we might get assigned to the same
|
67
69
|
# partitions, thus getting their jobs. The revoking jobs need to finish before
|
68
70
|
# appropriate consumers are taken down and re-created
|
69
|
-
wait(@subscription_group) if
|
71
|
+
wait(@subscription_group) if schedule_revoke_lost_partitions_jobs
|
70
72
|
|
71
|
-
|
73
|
+
schedule_partitions_jobs(messages_buffer)
|
72
74
|
|
73
75
|
# We wait only on jobs from our subscription group. Other groups are independent.
|
74
76
|
wait(@subscription_group)
|
@@ -103,15 +105,17 @@ module Karafka
|
|
103
105
|
|
104
106
|
# Enqueues revoking jobs for partitions that were taken away from the running process.
|
105
107
|
# @return [Boolean] was there anything to revoke
|
106
|
-
|
108
|
+
# @note We do not use scheduler here as those jobs are not meant to be order optimized in
|
109
|
+
# any way. Since they operate occasionally it is irrelevant.
|
110
|
+
def schedule_revoke_lost_partitions_jobs
|
107
111
|
revoked_partitions = @client.rebalance_manager.revoked_partitions
|
108
112
|
|
109
113
|
return false if revoked_partitions.empty?
|
110
114
|
|
111
115
|
revoked_partitions.each do |topic, partitions|
|
112
116
|
partitions.each do |partition|
|
113
|
-
|
114
|
-
executor = @executors.fetch(topic, partition,
|
117
|
+
pause_tracker = @pauses_manager.fetch(topic, partition)
|
118
|
+
executor = @executors.fetch(topic, partition, pause_tracker)
|
115
119
|
@jobs_queue << Processing::Jobs::Revoked.new(executor)
|
116
120
|
end
|
117
121
|
end
|
@@ -122,8 +126,8 @@ module Karafka
|
|
122
126
|
# Takes the messages per topic partition and enqueues processing jobs in threads.
|
123
127
|
#
|
124
128
|
# @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
|
125
|
-
def
|
126
|
-
messages_buffer
|
129
|
+
def schedule_partitions_jobs(messages_buffer)
|
130
|
+
@scheduler.call(messages_buffer) do |topic, partition, messages|
|
127
131
|
pause = @pauses_manager.fetch(topic, partition)
|
128
132
|
|
129
133
|
next if pause.paused?
|
@@ -10,6 +10,10 @@ module Karafka
|
|
10
10
|
class MessagesBuffer
|
11
11
|
attr_reader :size
|
12
12
|
|
13
|
+
extend Forwardable
|
14
|
+
|
15
|
+
def_delegators :@groups, :each
|
16
|
+
|
13
17
|
# @return [Karafka::Connection::MessagesBuffer] buffer instance
|
14
18
|
def initialize
|
15
19
|
@size = 0
|
@@ -20,19 +24,6 @@ module Karafka
|
|
20
24
|
end
|
21
25
|
end
|
22
26
|
|
23
|
-
# Iterates over aggregated data providing messages per topic partition.
|
24
|
-
#
|
25
|
-
# @yieldparam [String] topic name
|
26
|
-
# @yieldparam [Integer] partition number
|
27
|
-
# @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
|
28
|
-
def each
|
29
|
-
@groups.each do |topic, partitions|
|
30
|
-
partitions.each do |partition, messages|
|
31
|
-
yield(topic, partition, messages)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
27
|
# Adds a message to the buffer.
|
37
28
|
#
|
38
29
|
# @param message [Rdkafka::Consumer::Message] raw rdkafka message
|
@@ -42,6 +33,37 @@ module Karafka
|
|
42
33
|
@groups[message.topic][message.partition] << message
|
43
34
|
end
|
44
35
|
|
36
|
+
# Removes given topic and partition data out of the buffer
|
37
|
+
# This is used when there's a partition revocation
|
38
|
+
# @param topic [String] topic we're interested in
|
39
|
+
# @param partition [Integer] partition of which data we want to remove
|
40
|
+
def delete(topic, partition)
|
41
|
+
return unless @groups.key?(topic)
|
42
|
+
return unless @groups.fetch(topic).key?(partition)
|
43
|
+
|
44
|
+
topic_data = @groups.fetch(topic)
|
45
|
+
topic_data.delete(partition)
|
46
|
+
|
47
|
+
recount!
|
48
|
+
|
49
|
+
# If there are no more partitions to handle in a given topic, remove it completely
|
50
|
+
@groups.delete(topic) if topic_data.empty?
|
51
|
+
end
|
52
|
+
|
53
|
+
# Removes duplicated messages from the same partitions
|
54
|
+
# This should be used only when rebalance occurs, as we may get data again we already have
|
55
|
+
# due to the processing from the last offset. In cases like this, we may get same data
|
56
|
+
# again and we do want to ensure as few duplications as possible
|
57
|
+
def uniq!
|
58
|
+
@groups.each_value do |partitions|
|
59
|
+
partitions.each_value do |messages|
|
60
|
+
messages.uniq!(&:offset)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
recount!
|
65
|
+
end
|
66
|
+
|
45
67
|
# Removes all the data from the buffer.
|
46
68
|
#
|
47
69
|
# @note We do not clear the whole groups hash but rather we clear the partition hashes, so
|
@@ -52,6 +74,15 @@ module Karafka
|
|
52
74
|
@size = 0
|
53
75
|
@groups.each_value(&:clear)
|
54
76
|
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# Updates the messages count if we performed any operations that could change the state
|
81
|
+
def recount!
|
82
|
+
@size = @groups.each_value.sum do |partitions|
|
83
|
+
partitions.each_value.map(&:count).sum
|
84
|
+
end
|
85
|
+
end
|
55
86
|
end
|
56
87
|
end
|
57
88
|
end
|
@@ -12,11 +12,11 @@ module Karafka
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
# Creates or fetches pause of a given topic partition.
|
15
|
+
# Creates or fetches pause tracker of a given topic partition.
|
16
16
|
#
|
17
17
|
# @param topic [String] topic name
|
18
18
|
# @param partition [Integer] partition number
|
19
|
-
# @return [Karafka::TimeTrackers::Pause] pause instance
|
19
|
+
# @return [Karafka::TimeTrackers::Pause] pause tracker instance
|
20
20
|
def fetch(topic, partition)
|
21
21
|
@pauses[topic][partition] ||= TimeTrackers::Pause.new(
|
22
22
|
timeout: Karafka::App.config.pause_timeout,
|