karafka 2.0.17 → 2.0.19
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +0 -1
- data/CHANGELOG.md +16 -0
- data/Gemfile.lock +1 -1
- data/karafka.gemspec +5 -0
- data/lib/active_job/karafka.rb +3 -1
- data/lib/karafka/app.rb +3 -2
- data/lib/karafka/connection/client.rb +36 -19
- data/lib/karafka/connection/consumer_group_coordinator.rb +47 -0
- data/lib/karafka/connection/listener.rb +49 -9
- data/lib/karafka/connection/listeners_batch.rb +12 -2
- data/lib/karafka/embedded.rb +10 -0
- data/lib/karafka/instrumentation/logger_listener.rb +15 -10
- data/lib/karafka/instrumentation/notifications.rb +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +3 -3
- data/lib/karafka/pro/performance_tracker.rb +7 -5
- data/lib/karafka/pro/processing/coordinator.rb +24 -21
- data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +2 -2
- data/lib/karafka/pro/processing/strategies/dlq.rb +8 -2
- data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +2 -3
- data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +5 -3
- data/lib/karafka/pro/processing/strategies/dlq_mom.rb +6 -2
- data/lib/karafka/process.rb +10 -7
- data/lib/karafka/processing/jobs_queue.rb +10 -4
- data/lib/karafka/railtie.rb +2 -4
- data/lib/karafka/routing/features/dead_letter_queue/contract.rb +2 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +2 -1
- data/lib/karafka/server.rb +18 -7
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/status.rb +1 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +6 -0
- data.tar.gz.sig +0 -0
- metadata +8 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be91c3848b757c6af4c25f905df2b081629532bd29dbcea23ed2ef0af2e4e4a2
|
4
|
+
data.tar.gz: 6823d4335e4b395546642101d6754b97958c86810cbcd12819559acff74bd90d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fce0259ee987e37c01ea037f81ea91b4eb770ea8eabcb9f93c66aa1a1960c903030648b5441945ef28f43a88660d18240e6db61f6885a169d70eb46174543616
|
7
|
+
data.tar.gz: f93985c98daba5965f8f0597da4744d1dae0603f24a6a44f4b337462f66c8b0f08d0c22dd734205a0aebe7c3dbdb73237ff568f7f1ff842b38c05a7f4b5ce463
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/.github/workflows/ci.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,21 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.0.19 (2022-11-20)
|
4
|
+
- **[Feature]** Provide ability to skip failing messages without dispatching them to an alternative topic (DLQ).
|
5
|
+
- [Improvement] Improve the integration with Ruby on Rails by preventing double-require of components.
|
6
|
+
- [Improvement] Improve stability of the shutdown process upon critical errors.
|
7
|
+
- [Improvement] Improve stability of the integrations spec suite.
|
8
|
+
- [Fix] Fix an issue where upon fast startup of multiple subscription groups from the same consumer group, a ghost queue would be created due to problems in `Concurrent::Hash`.
|
9
|
+
|
10
|
+
## 2.0.18 (2022-11-18)
|
11
|
+
- **[Feature]** Support quiet mode via `TSTP` signal. When used, Karafka will finish processing current messages, run `shutdown` jobs, and switch to a quiet mode where no new work is being accepted. At the same time, it will keep the consumer group quiet, and thus no rebalance will be triggered. This can be particularly useful during deployments.
|
12
|
+
- [Improvement] Trigger `#revoked` for jobs in case revocation would happen during shutdown when jobs are still running. This should ensure, we get a notion of revocation for Pro LRJ jobs even when revocation happening upon shutdown (#1150).
|
13
|
+
- [Improvement] Stabilize the shutdown procedure for consumer groups with many subscription groups that have non-aligned processing cost per batch.
|
14
|
+
- [Improvement] Remove double loading of Karafka via Rails railtie.
|
15
|
+
- [Fix] Fix invalid class references in YARD docs.
|
16
|
+
- [Fix] prevent parallel closing of many clients.
|
17
|
+
- [Fix] fix a case where information about revocation for a combination of LRJ + VP would not be dispatched until all VP work is done.
|
18
|
+
|
3
19
|
## 2.0.17 (2022-11-10)
|
4
20
|
- [Fix] Few typos around DLQ and Pro DLQ Dispatch original metadata naming.
|
5
21
|
- [Fix] Narrow the components lookup to the appropriate scope (#1114)
|
data/Gemfile.lock
CHANGED
data/karafka.gemspec
CHANGED
@@ -34,7 +34,12 @@ Gem::Specification.new do |spec|
|
|
34
34
|
spec.require_paths = %w[lib]
|
35
35
|
|
36
36
|
spec.metadata = {
|
37
|
+
'funding_uri' => 'https://karafka.io/#become-pro',
|
38
|
+
'homepage_uri' => 'https://karafka.io',
|
39
|
+
'changelog_uri' => 'https://github.com/karafka/karafka/blob/master/CHANGELOG.md',
|
40
|
+
'bug_tracker_uri' => 'https://github.com/karafka/karafka/issues',
|
37
41
|
'source_code_uri' => 'https://github.com/karafka/karafka',
|
42
|
+
'documentation_uri' => 'https://karafka.io/docs',
|
38
43
|
'rubygems_mfa_required' => 'true'
|
39
44
|
}
|
40
45
|
end
|
data/lib/active_job/karafka.rb
CHANGED
data/lib/karafka/app.rb
CHANGED
@@ -14,11 +14,12 @@ module Karafka
|
|
14
14
|
.builder
|
15
15
|
end
|
16
16
|
|
17
|
-
# @return [
|
17
|
+
# @return [Hash] active subscription groups grouped based on consumer group in a hash
|
18
18
|
def subscription_groups
|
19
19
|
consumer_groups
|
20
20
|
.active
|
21
|
-
.
|
21
|
+
.map { |consumer_group| [consumer_group, consumer_group.subscription_groups] }
|
22
|
+
.to_h
|
22
23
|
end
|
23
24
|
|
24
25
|
# Just a nicer name for the consumer groups
|
@@ -17,7 +17,11 @@ module Karafka
|
|
17
17
|
# How many times should we retry polling in case of a failure
|
18
18
|
MAX_POLL_RETRIES = 20
|
19
19
|
|
20
|
-
|
20
|
+
# We want to make sure we never close several clients in the same moment to prevent
|
21
|
+
# potential race conditions and other issues
|
22
|
+
SHUTDOWN_MUTEX = Mutex.new
|
23
|
+
|
24
|
+
private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX
|
21
25
|
|
22
26
|
# Creates a new consumer instance.
|
23
27
|
#
|
@@ -237,6 +241,17 @@ module Karafka
|
|
237
241
|
end
|
238
242
|
end
|
239
243
|
|
244
|
+
# Runs a single poll ignoring all the potential errors
|
245
|
+
# This is used as a keep-alive in the shutdown stage and any errors that happen here are
|
246
|
+
# irrelevant from the shutdown process perspective
|
247
|
+
#
|
248
|
+
# This is used only to trigger rebalance callbacks
|
249
|
+
def ping
|
250
|
+
poll(100)
|
251
|
+
rescue Rdkafka::RdkafkaError
|
252
|
+
nil
|
253
|
+
end
|
254
|
+
|
240
255
|
private
|
241
256
|
|
242
257
|
# When we cannot store an offset, it means we no longer own the partition
|
@@ -281,24 +296,26 @@ module Karafka
|
|
281
296
|
|
282
297
|
# Commits the stored offsets in a sync way and closes the consumer.
|
283
298
|
def close
|
284
|
-
|
285
|
-
|
286
|
-
#
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
299
|
+
# Allow only one client to be closed at the same time
|
300
|
+
SHUTDOWN_MUTEX.synchronize do
|
301
|
+
# Make sure that no other operations are happening on this client when we close it
|
302
|
+
@mutex.synchronize do
|
303
|
+
# Once client is closed, we should not close it again
|
304
|
+
# This could only happen in case of a race-condition when forceful shutdown happens
|
305
|
+
# and triggers this from a different thread
|
306
|
+
return if @closed
|
307
|
+
|
308
|
+
@closed = true
|
309
|
+
|
310
|
+
# Remove callbacks runners that were registered
|
311
|
+
::Karafka::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
|
312
|
+
::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
|
313
|
+
|
314
|
+
@kafka.close
|
315
|
+
@buffer.clear
|
316
|
+
# @note We do not clear rebalance manager here as we may still have revocation info
|
317
|
+
# here that we want to consider valid prior to running another reconnection
|
318
|
+
end
|
302
319
|
end
|
303
320
|
end
|
304
321
|
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Connection
|
5
|
+
# This object represents a collective status of execution of group of listeners running inside
|
6
|
+
# of one consumer group but in separate subscription groups.
|
7
|
+
#
|
8
|
+
# There are cases when we do not want to close a given client when others from the same
|
9
|
+
# consumer group are running because it can cause instabilities due to early shutdown of some
|
10
|
+
# of the clients out of same consumer group.
|
11
|
+
#
|
12
|
+
# We also want to make sure, we close one consumer at a time while others can continue polling.
|
13
|
+
#
|
14
|
+
# This prevents a scenario, where a rebalance is not acknowledged and we loose assignment
|
15
|
+
# without having a chance to commit changes.
|
16
|
+
class ConsumerGroupCoordinator
|
17
|
+
# @param group_size [Integer] number of separate subscription groups in a consumer group
|
18
|
+
def initialize(group_size)
|
19
|
+
# We need two locks here:
|
20
|
+
# - first one is to decrement the number of listeners doing work
|
21
|
+
# - second to ensure only one client is being closed the same time and that others can
|
22
|
+
# wait actively (not locked)
|
23
|
+
@work_mutex = Mutex.new
|
24
|
+
@shutdown_lock = Mutex.new
|
25
|
+
@group_size = group_size
|
26
|
+
@finished = Set.new
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [Boolean] can we start shutdown on a given listener
|
30
|
+
# @note If true, will also obtain a lock so no-one else will be closing the same time we do
|
31
|
+
def shutdown?
|
32
|
+
@finished.size == @group_size && @shutdown_lock.try_lock
|
33
|
+
end
|
34
|
+
|
35
|
+
# Unlocks the shutdown lock
|
36
|
+
def unlock
|
37
|
+
@shutdown_lock.unlock if @shutdown_lock.owned?
|
38
|
+
end
|
39
|
+
|
40
|
+
# Marks given listener as finished
|
41
|
+
# @param listener_id [String]
|
42
|
+
def finish_work(listener_id)
|
43
|
+
@finished << listener_id
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -14,13 +14,15 @@ module Karafka
|
|
14
14
|
# @return [String] id of this listener
|
15
15
|
attr_reader :id
|
16
16
|
|
17
|
+
# @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
|
17
18
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
18
19
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
19
20
|
# @return [Karafka::Connection::Listener] listener instance
|
20
|
-
def initialize(subscription_group, jobs_queue)
|
21
|
+
def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
|
21
22
|
proc_config = ::Karafka::App.config.internal.processing
|
22
23
|
|
23
24
|
@id = SecureRandom.uuid
|
25
|
+
@consumer_group_coordinator = consumer_group_coordinator
|
24
26
|
@subscription_group = subscription_group
|
25
27
|
@jobs_queue = jobs_queue
|
26
28
|
@coordinators = Processing::CoordinatorsBuffer.new
|
@@ -82,7 +84,8 @@ module Karafka
|
|
82
84
|
# Kafka connections / Internet connection issues / Etc. Business logic problems should not
|
83
85
|
# propagate this far.
|
84
86
|
def fetch_loop
|
85
|
-
|
87
|
+
# Run the main loop as long as we are not stopping or moving into quiet mode
|
88
|
+
until Karafka::App.stopping? || Karafka::App.quieting?
|
86
89
|
Karafka.monitor.instrument(
|
87
90
|
'connection.listener.fetch_loop',
|
88
91
|
caller: self,
|
@@ -122,7 +125,7 @@ module Karafka
|
|
122
125
|
wait
|
123
126
|
end
|
124
127
|
|
125
|
-
# If we are stopping we will no longer schedule any jobs despite polling.
|
128
|
+
# If we are stopping we will no longer schedule any regular jobs despite polling.
|
126
129
|
# We need to keep polling not to exceed the `max.poll.interval` for long-running
|
127
130
|
# non-blocking jobs and we need to allow them to finish. We however do not want to
|
128
131
|
# enqueue any new jobs. It's worth keeping in mind that it is the end user responsibility
|
@@ -131,7 +134,14 @@ module Karafka
|
|
131
134
|
#
|
132
135
|
# We do not care about resuming any partitions or lost jobs as we do not plan to do
|
133
136
|
# anything with them as we're in the shutdown phase.
|
134
|
-
|
137
|
+
#
|
138
|
+
# What we do care however is the ability to still run revocation jobs in case anything
|
139
|
+
# would change in the cluster. We still want to notify the long-running jobs about changes
|
140
|
+
# that occurred in the cluster.
|
141
|
+
wait_pinging(
|
142
|
+
wait_until: -> { @jobs_queue.empty?(@subscription_group.id) },
|
143
|
+
after_ping: -> { build_and_schedule_revoke_lost_partitions_jobs }
|
144
|
+
)
|
135
145
|
|
136
146
|
# We do not want to schedule the shutdown jobs prior to finishing all the jobs
|
137
147
|
# (including non-blocking) as there might be a long-running job with a shutdown and then
|
@@ -139,7 +149,24 @@ module Karafka
|
|
139
149
|
# as it could create a race-condition.
|
140
150
|
build_and_schedule_shutdown_jobs
|
141
151
|
|
142
|
-
|
152
|
+
# Wait until all the shutdown jobs are done
|
153
|
+
wait_pinging(wait_until: -> { @jobs_queue.empty?(@subscription_group.id) })
|
154
|
+
|
155
|
+
# Once all the work is done, we need to decrement counter of active subscription groups
|
156
|
+
# within this consumer group
|
157
|
+
@consumer_group_coordinator.finish_work(id)
|
158
|
+
|
159
|
+
# Wait if we're in the quiet mode
|
160
|
+
wait_pinging(wait_until: -> { !Karafka::App.quieting? })
|
161
|
+
|
162
|
+
# We need to wait until all the work in the whole consumer group (local to the process)
|
163
|
+
# is done. Otherwise we may end up with locks and `Timed out LeaveGroupRequest in flight`
|
164
|
+
# warning notifications.
|
165
|
+
wait_pinging(wait_until: -> { @consumer_group_coordinator.shutdown? })
|
166
|
+
|
167
|
+
# This extra ping will make sure we've refreshed the rebalance state after other instances
|
168
|
+
# potentially shutdown. This will prevent us from closing with a dangling callback
|
169
|
+
@client.ping
|
143
170
|
|
144
171
|
shutdown
|
145
172
|
|
@@ -157,6 +184,8 @@ module Karafka
|
|
157
184
|
restart
|
158
185
|
|
159
186
|
sleep(1) && retry
|
187
|
+
ensure
|
188
|
+
@consumer_group_coordinator.unlock
|
160
189
|
end
|
161
190
|
|
162
191
|
# Resumes processing of partitions that were paused due to an error.
|
@@ -256,10 +285,21 @@ module Karafka
|
|
256
285
|
end
|
257
286
|
|
258
287
|
# Waits without blocking the polling
|
259
|
-
#
|
260
|
-
#
|
261
|
-
|
262
|
-
|
288
|
+
#
|
289
|
+
# This should be used only when we no longer plan to use any incoming messages data and we
|
290
|
+
# can safely discard it. We can however use the rebalance information if needed.
|
291
|
+
#
|
292
|
+
# @param wait_until [Proc] until this evaluates to true, we will poll data
|
293
|
+
# @param after_ping [Proc] code that we want to run after each ping (if any)
|
294
|
+
#
|
295
|
+
# @note Performance of this is not relevant (in regards to blocks) because it is used only
|
296
|
+
# on shutdown and quiet, hence not in the running mode
|
297
|
+
def wait_pinging(wait_until:, after_ping: -> {})
|
298
|
+
until wait_until.call
|
299
|
+
@client.ping
|
300
|
+
after_ping.call
|
301
|
+
sleep(0.2)
|
302
|
+
end
|
263
303
|
end
|
264
304
|
|
265
305
|
# We can stop client without a problem, as it will reinitialize itself when running the
|
@@ -9,8 +9,18 @@ module Karafka
|
|
9
9
|
# @param jobs_queue [JobsQueue]
|
10
10
|
# @return [ListenersBatch]
|
11
11
|
def initialize(jobs_queue)
|
12
|
-
@batch = App.subscription_groups.
|
13
|
-
Connection::
|
12
|
+
@batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
|
13
|
+
consumer_group_coordinator = Connection::ConsumerGroupCoordinator.new(
|
14
|
+
subscription_groups.size
|
15
|
+
)
|
16
|
+
|
17
|
+
subscription_groups.map do |subscription_group|
|
18
|
+
Connection::Listener.new(
|
19
|
+
consumer_group_coordinator,
|
20
|
+
subscription_group,
|
21
|
+
jobs_queue
|
22
|
+
)
|
23
|
+
end
|
14
24
|
end
|
15
25
|
end
|
16
26
|
|
data/lib/karafka/embedded.rb
CHANGED
@@ -18,6 +18,16 @@ module Karafka
|
|
18
18
|
# Stop needs to be blocking to wait for all the things to finalize
|
19
19
|
Karafka::Server.stop
|
20
20
|
end
|
21
|
+
|
22
|
+
# Quiets Karafka upon any event
|
23
|
+
#
|
24
|
+
# @note This method is not blocking and will not wait for Karafka to fully quiet.
|
25
|
+
# It will trigger the quiet procedure but won't wait.
|
26
|
+
#
|
27
|
+
# @note Please keep in mind you need to `#stop` to actually stop the server anyhow.
|
28
|
+
def quiet
|
29
|
+
Karafka::Server.quiet
|
30
|
+
end
|
21
31
|
end
|
22
32
|
end
|
23
33
|
end
|
@@ -18,7 +18,7 @@ module Karafka
|
|
18
18
|
|
19
19
|
# Logs each messages fetching attempt
|
20
20
|
#
|
21
|
-
# @param event [
|
21
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
22
22
|
def on_connection_listener_fetch_loop(event)
|
23
23
|
listener = event[:caller]
|
24
24
|
debug "[#{listener.id}] Polling messages..."
|
@@ -26,7 +26,7 @@ module Karafka
|
|
26
26
|
|
27
27
|
# Logs about messages that we've received from Kafka
|
28
28
|
#
|
29
|
-
# @param event [
|
29
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
30
30
|
def on_connection_listener_fetch_loop_received(event)
|
31
31
|
listener = event[:caller]
|
32
32
|
time = event[:time]
|
@@ -42,7 +42,7 @@ module Karafka
|
|
42
42
|
|
43
43
|
# Prints info about the fact that a given job has started
|
44
44
|
#
|
45
|
-
# @param event [
|
45
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
46
46
|
def on_worker_process(event)
|
47
47
|
job = event[:job]
|
48
48
|
job_type = job.class.to_s.split('::').last
|
@@ -53,7 +53,7 @@ module Karafka
|
|
53
53
|
|
54
54
|
# Prints info about the fact that a given job has finished
|
55
55
|
#
|
56
|
-
# @param event [
|
56
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
57
57
|
def on_worker_processed(event)
|
58
58
|
job = event[:job]
|
59
59
|
time = event[:time]
|
@@ -66,7 +66,7 @@ module Karafka
|
|
66
66
|
# Logs info about system signals that Karafka received and prints backtrace for threads in
|
67
67
|
# case of ttin
|
68
68
|
#
|
69
|
-
# @param event [
|
69
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
70
70
|
def on_process_notice_signal(event)
|
71
71
|
info "Received #{event[:signal]} system signal"
|
72
72
|
|
@@ -89,7 +89,7 @@ module Karafka
|
|
89
89
|
|
90
90
|
# Logs info that we're running Karafka app.
|
91
91
|
#
|
92
|
-
# @param _event [
|
92
|
+
# @param _event [Karafka::Core::Monitoring::Event] event details including payload
|
93
93
|
def on_app_running(_event)
|
94
94
|
info "Running in #{RUBY_DESCRIPTION}"
|
95
95
|
info "Running Karafka #{Karafka::VERSION} server"
|
@@ -99,23 +99,28 @@ module Karafka
|
|
99
99
|
info 'See LICENSE and the LGPL-3.0 for licensing details.'
|
100
100
|
end
|
101
101
|
|
102
|
+
# @param _event [Karafka::Core::Monitoring::Event] event details including payload
|
103
|
+
def on_app_quieting(_event)
|
104
|
+
info 'Switching to quiet mode. New messages will not be processed.'
|
105
|
+
end
|
106
|
+
|
102
107
|
# Logs info that we're going to stop the Karafka server.
|
103
108
|
#
|
104
|
-
# @param _event [
|
109
|
+
# @param _event [Karafka::Core::Monitoring::Event] event details including payload
|
105
110
|
def on_app_stopping(_event)
|
106
111
|
info 'Stopping Karafka server'
|
107
112
|
end
|
108
113
|
|
109
114
|
# Logs info that we stopped the Karafka server.
|
110
115
|
#
|
111
|
-
# @param _event [
|
116
|
+
# @param _event [Karafka::Core::Monitoring::Event] event details including payload
|
112
117
|
def on_app_stopped(_event)
|
113
118
|
info 'Stopped Karafka server'
|
114
119
|
end
|
115
120
|
|
116
121
|
# Logs info when we have dispatched a message the the DLQ
|
117
122
|
#
|
118
|
-
# @param event [
|
123
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
119
124
|
def on_dead_letter_queue_dispatched(event)
|
120
125
|
message = event[:message]
|
121
126
|
offset = message.offset
|
@@ -128,7 +133,7 @@ module Karafka
|
|
128
133
|
|
129
134
|
# There are many types of errors that can occur in many places, but we provide a single
|
130
135
|
# handler for all of them to simplify error instrumentation.
|
131
|
-
# @param event [
|
136
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
132
137
|
def on_error_occurred(event)
|
133
138
|
type = event[:type]
|
134
139
|
error = event[:error]
|
@@ -42,7 +42,7 @@ module Karafka
|
|
42
42
|
|
43
43
|
# Prints info about the fact that a given job has started
|
44
44
|
#
|
45
|
-
# @param event [
|
45
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
46
46
|
def on_worker_process(event)
|
47
47
|
current_span = client.trace('karafka.consumer')
|
48
48
|
push_tags
|
@@ -60,7 +60,7 @@ module Karafka
|
|
60
60
|
|
61
61
|
# Prints info about the fact that a given job has finished
|
62
62
|
#
|
63
|
-
# @param event [
|
63
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
64
64
|
def on_worker_processed(event)
|
65
65
|
push_tags
|
66
66
|
|
@@ -80,7 +80,7 @@ module Karafka
|
|
80
80
|
|
81
81
|
# There are many types of errors that can occur in many places, but we provide a single
|
82
82
|
# handler for all of them to simplify error instrumentation.
|
83
|
-
# @param event [
|
83
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
84
84
|
def on_error_occurred(event)
|
85
85
|
push_tags
|
86
86
|
|
@@ -25,11 +25,13 @@ module Karafka
|
|
25
25
|
|
26
26
|
# Builds up nested concurrent hash for data tracking
|
27
27
|
def initialize
|
28
|
-
@processing_times = Concurrent::
|
29
|
-
topics_hash
|
30
|
-
|
31
|
-
|
32
|
-
|
28
|
+
@processing_times = Concurrent::Map.new do |topics_hash, topic|
|
29
|
+
topics_hash.compute_if_absent(topic) do
|
30
|
+
Concurrent::Map.new do |partitions_hash, partition|
|
31
|
+
# This array does not have to be concurrent because we always access single
|
32
|
+
# partition data via instrumentation that operates in a single thread via consumer
|
33
|
+
partitions_hash.compute_if_absent(partition) { [] }
|
34
|
+
end
|
33
35
|
end
|
34
36
|
end
|
35
37
|
end
|
@@ -20,10 +20,8 @@ module Karafka
|
|
20
20
|
# @param args [Object] anything the base coordinator accepts
|
21
21
|
def initialize(*args)
|
22
22
|
super
|
23
|
-
|
24
|
-
@
|
25
|
-
@on_finished_invoked = false
|
26
|
-
@on_revoked_invoked = false
|
23
|
+
|
24
|
+
@executed = []
|
27
25
|
@flow_lock = Mutex.new
|
28
26
|
end
|
29
27
|
|
@@ -34,9 +32,7 @@ module Karafka
|
|
34
32
|
super
|
35
33
|
|
36
34
|
@mutex.synchronize do
|
37
|
-
@
|
38
|
-
@on_started_invoked = false
|
39
|
-
@on_finished_invoked = false
|
35
|
+
@executed.clear
|
40
36
|
@last_message = messages.last
|
41
37
|
end
|
42
38
|
end
|
@@ -50,9 +46,7 @@ module Karafka
|
|
50
46
|
# enqueued
|
51
47
|
def on_enqueued
|
52
48
|
@flow_lock.synchronize do
|
53
|
-
return
|
54
|
-
|
55
|
-
@on_enqueued_invoked = true
|
49
|
+
return unless executable?(:on_enqueued)
|
56
50
|
|
57
51
|
yield(@last_message)
|
58
52
|
end
|
@@ -61,9 +55,7 @@ module Karafka
|
|
61
55
|
# Runs given code only once per all the coordinated jobs upon starting first of them
|
62
56
|
def on_started
|
63
57
|
@flow_lock.synchronize do
|
64
|
-
return
|
65
|
-
|
66
|
-
@on_started_invoked = true
|
58
|
+
return unless executable?(:on_started)
|
67
59
|
|
68
60
|
yield(@last_message)
|
69
61
|
end
|
@@ -75,25 +67,36 @@ module Karafka
|
|
75
67
|
def on_finished
|
76
68
|
@flow_lock.synchronize do
|
77
69
|
return unless finished?
|
78
|
-
return
|
79
|
-
|
80
|
-
@on_finished_invoked = true
|
70
|
+
return unless executable?(:on_finished)
|
81
71
|
|
82
72
|
yield(@last_message)
|
83
73
|
end
|
84
74
|
end
|
85
75
|
|
86
|
-
# Runs once
|
76
|
+
# Runs once after a partition is revoked
|
87
77
|
def on_revoked
|
88
78
|
@flow_lock.synchronize do
|
89
|
-
return unless
|
90
|
-
return if @on_revoked_invoked
|
91
|
-
|
92
|
-
@on_revoked_invoked = true
|
79
|
+
return unless executable?(:on_revoked)
|
93
80
|
|
94
81
|
yield(@last_message)
|
95
82
|
end
|
96
83
|
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
# Checks if given action is executable once. If it is and true is returned, this method
|
88
|
+
# will return false next time it is used.
|
89
|
+
#
|
90
|
+
# @param action [Symbol] what action we want to perform
|
91
|
+
# @return [Boolean] true if we can
|
92
|
+
# @note This method needs to run behind a mutex.
|
93
|
+
def executable?(action)
|
94
|
+
return false if @executed.include?(action)
|
95
|
+
|
96
|
+
@executed << action
|
97
|
+
|
98
|
+
true
|
99
|
+
end
|
97
100
|
end
|
98
101
|
end
|
99
102
|
end
|
@@ -21,7 +21,7 @@ module Karafka
|
|
21
21
|
#
|
22
22
|
# AJ has manual offset management on by default and the offset management is delegated to
|
23
23
|
# the AJ consumer. This means, we cannot mark as consumed always. We can only mark as
|
24
|
-
# consumed when we skip given job upon errors. In all the other
|
24
|
+
# consumed when we skip given job upon errors. In all the other scenarios marking as
|
25
25
|
# consumed needs to happen in the AJ consumer on a per job basis.
|
26
26
|
module AjDlqMom
|
27
27
|
include DlqMom
|
@@ -46,7 +46,7 @@ module Karafka
|
|
46
46
|
else
|
47
47
|
coordinator.pause_tracker.reset
|
48
48
|
skippable_message = find_skippable_message
|
49
|
-
dispatch_to_dlq(skippable_message)
|
49
|
+
dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
|
50
50
|
# We can commit the offset here because we know that we skip it "forever" and
|
51
51
|
# since AJ consumer commits the offset after each job, we also know that the
|
52
52
|
# previous job was successful
|
@@ -42,7 +42,7 @@ module Karafka
|
|
42
42
|
# We reset the pause to indicate we will now consider it as "ok".
|
43
43
|
coordinator.pause_tracker.reset
|
44
44
|
skippable_message = find_skippable_message
|
45
|
-
dispatch_to_dlq(skippable_message)
|
45
|
+
dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
|
46
46
|
mark_as_consumed(skippable_message)
|
47
47
|
pause(coordinator.seek_offset)
|
48
48
|
end
|
@@ -59,7 +59,6 @@ module Karafka
|
|
59
59
|
|
60
60
|
# Moves the broken message into a separate queue defined via the settings
|
61
61
|
#
|
62
|
-
# @private
|
63
62
|
# @param skippable_message [Array<Karafka::Messages::Message>] message we want to
|
64
63
|
# dispatch to DLQ
|
65
64
|
def dispatch_to_dlq(skippable_message)
|
@@ -81,6 +80,13 @@ module Karafka
|
|
81
80
|
message: skippable_message
|
82
81
|
)
|
83
82
|
end
|
83
|
+
|
84
|
+
# @return [Boolean] should we dispatch the message to DLQ or not. When the dispatch topic
|
85
|
+
# is set to false, we will skip the dispatch, effectively ignoring the broken message
|
86
|
+
# without taking any action.
|
87
|
+
def dispatch_to_dlq?
|
88
|
+
topic.dead_letter_queue.topic
|
89
|
+
end
|
84
90
|
end
|
85
91
|
end
|
86
92
|
end
|
@@ -43,10 +43,9 @@ module Karafka
|
|
43
43
|
else
|
44
44
|
coordinator.pause_tracker.reset
|
45
45
|
|
46
|
-
skippable_message = find_skippable_message
|
47
|
-
|
48
46
|
unless revoked?
|
49
|
-
|
47
|
+
skippable_message = find_skippable_message
|
48
|
+
dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
|
50
49
|
mark_as_consumed(skippable_message)
|
51
50
|
end
|
52
51
|
|
@@ -42,10 +42,12 @@ module Karafka
|
|
42
42
|
else
|
43
43
|
coordinator.pause_tracker.reset
|
44
44
|
|
45
|
-
skippable_message = find_skippable_message
|
46
|
-
|
47
45
|
unless revoked?
|
48
|
-
dispatch_to_dlq
|
46
|
+
if dispatch_to_dlq?
|
47
|
+
skippable_message = find_skippable_message
|
48
|
+
dispatch_to_dlq(skippable_message)
|
49
|
+
end
|
50
|
+
|
49
51
|
seek(coordinator.seek_offset)
|
50
52
|
end
|
51
53
|
|
@@ -45,8 +45,12 @@ module Karafka
|
|
45
45
|
else
|
46
46
|
# We reset the pause to indicate we will now consider it as "ok".
|
47
47
|
coordinator.pause_tracker.reset
|
48
|
-
|
49
|
-
dispatch_to_dlq
|
48
|
+
|
49
|
+
if dispatch_to_dlq?
|
50
|
+
skippable_message = find_skippable_message
|
51
|
+
dispatch_to_dlq(skippable_message)
|
52
|
+
end
|
53
|
+
|
50
54
|
pause(coordinator.seek_offset)
|
51
55
|
end
|
52
56
|
end
|
data/lib/karafka/process.rb
CHANGED
@@ -10,6 +10,7 @@ module Karafka
|
|
10
10
|
SIGQUIT
|
11
11
|
SIGTERM
|
12
12
|
SIGTTIN
|
13
|
+
SIGTSTP
|
13
14
|
].freeze
|
14
15
|
|
15
16
|
HANDLED_SIGNALS.each do |signal|
|
@@ -48,21 +49,23 @@ module Karafka
|
|
48
49
|
|
49
50
|
# Traps a single signal and performs callbacks (if any) or just ignores this signal
|
50
51
|
# @param [Symbol] signal type that we want to catch
|
52
|
+
# @note Since we do a lot of threading and queuing, we don't want to handle signals from the
|
53
|
+
# trap context s some things may not work there as expected, that is why we spawn a separate
|
54
|
+
# thread to handle the signals process
|
51
55
|
def trap_signal(signal)
|
52
56
|
trap(signal) do
|
53
|
-
|
54
|
-
|
57
|
+
Thread.new do
|
58
|
+
notice_signal(signal)
|
59
|
+
|
60
|
+
(@callbacks[signal] || []).each(&:call)
|
61
|
+
end
|
55
62
|
end
|
56
63
|
end
|
57
64
|
|
58
65
|
# Informs monitoring about trapped signal
|
59
66
|
# @param [Symbol] signal type that we received
|
60
|
-
# @note We cannot perform logging from trap context, that's why
|
61
|
-
# we have to spin up a new thread to do this
|
62
67
|
def notice_signal(signal)
|
63
|
-
|
64
|
-
Karafka.monitor.instrument('process.notice_signal', caller: self, signal: signal)
|
65
|
-
end
|
68
|
+
Karafka.monitor.instrument('process.notice_signal', caller: self, signal: signal)
|
66
69
|
end
|
67
70
|
end
|
68
71
|
end
|
@@ -20,8 +20,12 @@ module Karafka
|
|
20
20
|
# scheduled by Ruby hundreds of thousands of times per group.
|
21
21
|
# We cannot use a single semaphore as it could potentially block in listeners that should
|
22
22
|
# process with their data and also could unlock when a given group needs to remain locked
|
23
|
-
@semaphores =
|
23
|
+
@semaphores = Concurrent::Map.new do |h, k|
|
24
|
+
h.compute_if_absent(k) { Queue.new }
|
25
|
+
end
|
26
|
+
|
24
27
|
@in_processing = Hash.new { |h, k| h[k] = [] }
|
28
|
+
|
25
29
|
@mutex = Mutex.new
|
26
30
|
end
|
27
31
|
|
@@ -47,9 +51,9 @@ module Karafka
|
|
47
51
|
raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
|
48
52
|
|
49
53
|
group << job
|
50
|
-
end
|
51
54
|
|
52
|
-
|
55
|
+
@queue << job
|
56
|
+
end
|
53
57
|
end
|
54
58
|
|
55
59
|
# @return [Jobs::Base, nil] waits for a job from the main queue and returns it once available
|
@@ -105,7 +109,9 @@ module Karafka
|
|
105
109
|
# @return [Boolean] tell us if we have anything in the processing (or for processing) from
|
106
110
|
# a given group.
|
107
111
|
def empty?(group_id)
|
108
|
-
@
|
112
|
+
@mutex.synchronize do
|
113
|
+
@in_processing[group_id].empty?
|
114
|
+
end
|
109
115
|
end
|
110
116
|
|
111
117
|
# Blocks when there are things in the queue in a given group and waits until all the blocking
|
data/lib/karafka/railtie.rb
CHANGED
@@ -5,7 +5,8 @@
|
|
5
5
|
rails = false
|
6
6
|
|
7
7
|
begin
|
8
|
-
|
8
|
+
# Do not load Rails again if already loaded
|
9
|
+
Object.const_defined?('Rails::Railtie') || require('rails')
|
9
10
|
|
10
11
|
rails = true
|
11
12
|
rescue LoadError
|
@@ -17,9 +18,6 @@ rescue LoadError
|
|
17
18
|
end
|
18
19
|
|
19
20
|
if rails
|
20
|
-
# Load Karafka
|
21
|
-
require 'karafka'
|
22
|
-
|
23
21
|
# Load ActiveJob adapter
|
24
22
|
require 'active_job/karafka'
|
25
23
|
|
@@ -29,6 +29,8 @@ module Karafka
|
|
29
29
|
|
30
30
|
topic = dead_letter_queue[:topic]
|
31
31
|
|
32
|
+
# When topic is set to false, it means we just want to skip dispatch on DLQ
|
33
|
+
next if topic == false
|
32
34
|
next if topic.is_a?(String) && Contracts::TOPIC_REGEXP.match?(topic)
|
33
35
|
|
34
36
|
[[%i[dead_letter_queue topic], :format]]
|
@@ -12,7 +12,8 @@ module Karafka
|
|
12
12
|
private_constant :DEFAULT_MAX_RETRIES
|
13
13
|
|
14
14
|
# @param max_retries [Integer] after how many retries should we move data to dlq
|
15
|
-
# @param topic [String] where the messages should be moved if failing
|
15
|
+
# @param topic [String, false] where the messages should be moved if failing or false
|
16
|
+
# if we do not want to move it anywhere and just skip
|
16
17
|
# @return [Config] defined config
|
17
18
|
def dead_letter_queue(max_retries: DEFAULT_MAX_RETRIES, topic: nil)
|
18
19
|
@dead_letter_queue ||= Config.new(
|
data/lib/karafka/server.rb
CHANGED
@@ -25,12 +25,10 @@ module Karafka
|
|
25
25
|
|
26
26
|
# Method which runs app
|
27
27
|
def run
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
process.
|
32
|
-
process.on_sigquit { Thread.new { stop } }
|
33
|
-
process.on_sigterm { Thread.new { stop } }
|
28
|
+
process.on_sigint { stop }
|
29
|
+
process.on_sigquit { stop }
|
30
|
+
process.on_sigterm { stop }
|
31
|
+
process.on_sigtstp { quiet }
|
34
32
|
process.supervise
|
35
33
|
|
36
34
|
# Start is blocking until stop is called and when we stop, it will wait until
|
@@ -74,7 +72,8 @@ module Karafka
|
|
74
72
|
# please start a separate thread to do so.
|
75
73
|
def stop
|
76
74
|
# Initialize the stopping process only if Karafka was running
|
77
|
-
return if Karafka::App.stopping?
|
75
|
+
return if Karafka::App.stopping?
|
76
|
+
return if Karafka::App.stopped?
|
78
77
|
|
79
78
|
Karafka::App.stop!
|
80
79
|
|
@@ -125,6 +124,18 @@ module Karafka
|
|
125
124
|
Karafka::App.stopped! if timeout
|
126
125
|
end
|
127
126
|
|
127
|
+
# Quiets the Karafka server.
|
128
|
+
# Karafka will stop processing but won't quiet to consumer group, so no rebalance will be
|
129
|
+
# triggered until final shutdown.
|
130
|
+
def quiet
|
131
|
+
# If we are already quieting or in the stop procedures, we should not do it again.
|
132
|
+
return if Karafka::App.quieting?
|
133
|
+
return if Karafka::App.stopping?
|
134
|
+
return if Karafka::App.stopped?
|
135
|
+
|
136
|
+
Karafka::App.quiet!
|
137
|
+
end
|
138
|
+
|
128
139
|
private
|
129
140
|
|
130
141
|
# @return [Karafka::Process] process wrapper instance used to catch system signal calls
|
@@ -84,6 +84,7 @@ module Karafka
|
|
84
84
|
reconnect.backoff.jitter.ms
|
85
85
|
reconnect.backoff.max.ms
|
86
86
|
reconnect.backoff.ms
|
87
|
+
resolve_cb
|
87
88
|
sasl.kerberos.keytab
|
88
89
|
sasl.kerberos.kinit.cmd
|
89
90
|
sasl.kerberos.min.time.before.relogin
|
@@ -215,6 +216,7 @@ module Karafka
|
|
215
216
|
reconnect.backoff.ms
|
216
217
|
request.required.acks
|
217
218
|
request.timeout.ms
|
219
|
+
resolve_cb
|
218
220
|
retries
|
219
221
|
retry.backoff.ms
|
220
222
|
sasl.kerberos.keytab
|
data/lib/karafka/status.rb
CHANGED
data/lib/karafka/version.rb
CHANGED
data/lib/karafka.rb
CHANGED
@@ -86,6 +86,9 @@ end
|
|
86
86
|
loader = Zeitwerk::Loader.for_gem
|
87
87
|
# Do not load Rails extensions by default, this will be handled by Railtie if they are needed
|
88
88
|
loader.ignore(Karafka.gem_root.join('lib/active_job'))
|
89
|
+
# Do not load Railtie. It will load if after everything is ready, so we don't have to load any
|
90
|
+
# Karafka components when we require this railtie. Railtie needs to be loaded last.
|
91
|
+
loader.ignore(Karafka.gem_root.join('lib/karafka/railtie'))
|
89
92
|
# Do not load pro components as they will be loaded if needed and allowed
|
90
93
|
loader.ignore(Karafka.core_root.join('pro/'))
|
91
94
|
# Do not load vendors instrumentation components. Those need to be required manually if needed
|
@@ -96,3 +99,6 @@ loader.eager_load
|
|
96
99
|
# This will load features but since Pro are not loaded automatically, they will not be visible
|
97
100
|
# nor included here
|
98
101
|
::Karafka::Routing::Features::Base.load_all
|
102
|
+
|
103
|
+
# Load railtie after everything else is ready so we know we can rely on it.
|
104
|
+
require 'karafka/railtie'
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
|
36
36
|
MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-11-
|
38
|
+
date: 2022-11-20 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
@@ -176,6 +176,7 @@ files:
|
|
176
176
|
- lib/karafka/cli/install.rb
|
177
177
|
- lib/karafka/cli/server.rb
|
178
178
|
- lib/karafka/connection/client.rb
|
179
|
+
- lib/karafka/connection/consumer_group_coordinator.rb
|
179
180
|
- lib/karafka/connection/listener.rb
|
180
181
|
- lib/karafka/connection/listeners_batch.rb
|
181
182
|
- lib/karafka/connection/messages_buffer.rb
|
@@ -326,7 +327,12 @@ licenses:
|
|
326
327
|
- LGPL-3.0
|
327
328
|
- Commercial
|
328
329
|
metadata:
|
330
|
+
funding_uri: https://karafka.io/#become-pro
|
331
|
+
homepage_uri: https://karafka.io
|
332
|
+
changelog_uri: https://github.com/karafka/karafka/blob/master/CHANGELOG.md
|
333
|
+
bug_tracker_uri: https://github.com/karafka/karafka/issues
|
329
334
|
source_code_uri: https://github.com/karafka/karafka
|
335
|
+
documentation_uri: https://karafka.io/docs
|
330
336
|
rubygems_mfa_required: 'true'
|
331
337
|
post_install_message:
|
332
338
|
rdoc_options: []
|
metadata.gz.sig
CHANGED
Binary file
|