karafka 2.0.17 → 2.0.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +0 -1
  4. data/CHANGELOG.md +16 -0
  5. data/Gemfile.lock +1 -1
  6. data/karafka.gemspec +5 -0
  7. data/lib/active_job/karafka.rb +3 -1
  8. data/lib/karafka/app.rb +3 -2
  9. data/lib/karafka/connection/client.rb +36 -19
  10. data/lib/karafka/connection/consumer_group_coordinator.rb +47 -0
  11. data/lib/karafka/connection/listener.rb +49 -9
  12. data/lib/karafka/connection/listeners_batch.rb +12 -2
  13. data/lib/karafka/embedded.rb +10 -0
  14. data/lib/karafka/instrumentation/logger_listener.rb +15 -10
  15. data/lib/karafka/instrumentation/notifications.rb +1 -0
  16. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +3 -3
  17. data/lib/karafka/pro/performance_tracker.rb +7 -5
  18. data/lib/karafka/pro/processing/coordinator.rb +24 -21
  19. data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +2 -2
  20. data/lib/karafka/pro/processing/strategies/dlq.rb +8 -2
  21. data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +2 -3
  22. data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +5 -3
  23. data/lib/karafka/pro/processing/strategies/dlq_mom.rb +6 -2
  24. data/lib/karafka/process.rb +10 -7
  25. data/lib/karafka/processing/jobs_queue.rb +10 -4
  26. data/lib/karafka/railtie.rb +2 -4
  27. data/lib/karafka/routing/features/dead_letter_queue/contract.rb +2 -0
  28. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +2 -1
  29. data/lib/karafka/server.rb +18 -7
  30. data/lib/karafka/setup/attributes_map.rb +2 -0
  31. data/lib/karafka/status.rb +1 -0
  32. data/lib/karafka/version.rb +1 -1
  33. data/lib/karafka.rb +6 -0
  34. data.tar.gz.sig +0 -0
  35. metadata +8 -2
  36. metadata.gz.sig +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d15d4c803dc84a009e4a1ab02e90a9f034a396f718f742060c545f00422ffd5
4
- data.tar.gz: fd69d2a4dcb11a9ea94b8a9f7a4dbb505034a304e94b1bd5e9a0a9fe44a666f0
3
+ metadata.gz: be91c3848b757c6af4c25f905df2b081629532bd29dbcea23ed2ef0af2e4e4a2
4
+ data.tar.gz: 6823d4335e4b395546642101d6754b97958c86810cbcd12819559acff74bd90d
5
5
  SHA512:
6
- metadata.gz: fe75bb62fecbca6b541d1c1737a596b0a937a02a294ec62a13220915194807568be84c0ebb338de6de8a124714164ed7f7f060cdb573f5606357cce861ba364f
7
- data.tar.gz: 6c622f4d2ce80b86807e0e05b3108f7fab66ba2696cd2b662a3412986bc98ba954d03bf70141c3a5526e8e4954495efa34c846a0ec594e2e05b05ef2fa01291a
6
+ metadata.gz: fce0259ee987e37c01ea037f81ea91b4eb770ea8eabcb9f93c66aa1a1960c903030648b5441945ef28f43a88660d18240e6db61f6885a169d70eb46174543616
7
+ data.tar.gz: f93985c98daba5965f8f0597da4744d1dae0603f24a6a44f4b337462f66c8b0f08d0c22dd734205a0aebe7c3dbdb73237ff568f7f1ff842b38c05a7f4b5ce463
checksums.yaml.gz.sig CHANGED
Binary file
@@ -109,7 +109,6 @@ jobs:
109
109
  uses: ruby/setup-ruby@v1
110
110
  with:
111
111
  ruby-version: ${{matrix.ruby}}
112
- bundler-cache: true
113
112
 
114
113
  - name: Install latest Bundler
115
114
  run: |
data/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.19 (2022-11-20)
4
+ - **[Feature]** Provide ability to skip failing messages without dispatching them to an alternative topic (DLQ).
5
+ - [Improvement] Improve the integration with Ruby on Rails by preventing double-require of components.
6
+ - [Improvement] Improve stability of the shutdown process upon critical errors.
7
+ - [Improvement] Improve stability of the integrations spec suite.
8
+ - [Fix] Fix an issue where upon fast startup of multiple subscription groups from the same consumer group, a ghost queue would be created due to problems in `Concurrent::Hash`.
9
+
10
+ ## 2.0.18 (2022-11-18)
11
+ - **[Feature]** Support quiet mode via `TSTP` signal. When used, Karafka will finish processing current messages, run `shutdown` jobs, and switch to a quiet mode where no new work is being accepted. At the same time, it will keep the consumer group quiet, and thus no rebalance will be triggered. This can be particularly useful during deployments.
12
+ - [Improvement] Trigger `#revoked` for jobs in case revocation would happen during shutdown when jobs are still running. This should ensure, we get a notion of revocation for Pro LRJ jobs even when revocation happening upon shutdown (#1150).
13
+ - [Improvement] Stabilize the shutdown procedure for consumer groups with many subscription groups that have non-aligned processing cost per batch.
14
+ - [Improvement] Remove double loading of Karafka via Rails railtie.
15
+ - [Fix] Fix invalid class references in YARD docs.
16
+ - [Fix] prevent parallel closing of many clients.
17
+ - [Fix] fix a case where information about revocation for a combination of LRJ + VP would not be dispatched until all VP work is done.
18
+
3
19
  ## 2.0.17 (2022-11-10)
4
20
  - [Fix] Few typos around DLQ and Pro DLQ Dispatch original metadata naming.
5
21
  - [Fix] Narrow the components lookup to the appropriate scope (#1114)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.17)
4
+ karafka (2.0.19)
5
5
  karafka-core (>= 2.0.2, < 3.0.0)
6
6
  rdkafka (>= 0.12)
7
7
  thor (>= 0.20)
data/karafka.gemspec CHANGED
@@ -34,7 +34,12 @@ Gem::Specification.new do |spec|
34
34
  spec.require_paths = %w[lib]
35
35
 
36
36
  spec.metadata = {
37
+ 'funding_uri' => 'https://karafka.io/#become-pro',
38
+ 'homepage_uri' => 'https://karafka.io',
39
+ 'changelog_uri' => 'https://github.com/karafka/karafka/blob/master/CHANGELOG.md',
40
+ 'bug_tracker_uri' => 'https://github.com/karafka/karafka/issues',
37
41
  'source_code_uri' => 'https://github.com/karafka/karafka',
42
+ 'documentation_uri' => 'https://karafka.io/docs',
38
43
  'rubygems_mfa_required' => 'true'
39
44
  }
40
45
  end
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  begin
4
- require 'active_job'
4
+ # Do not load active job if already loaded
5
+ require 'active_job' unless Object.const_defined?('ActiveJob')
6
+
5
7
  require_relative 'queue_adapters/karafka_adapter'
6
8
 
7
9
  module ActiveJob
data/lib/karafka/app.rb CHANGED
@@ -14,11 +14,12 @@ module Karafka
14
14
  .builder
15
15
  end
16
16
 
17
- # @return [Array<Karafka::Routing::SubscriptionGroup>] active subscription groups
17
+ # @return [Hash] active subscription groups grouped based on consumer group in a hash
18
18
  def subscription_groups
19
19
  consumer_groups
20
20
  .active
21
- .flat_map(&:subscription_groups)
21
+ .map { |consumer_group| [consumer_group, consumer_group.subscription_groups] }
22
+ .to_h
22
23
  end
23
24
 
24
25
  # Just a nicer name for the consumer groups
@@ -17,7 +17,11 @@ module Karafka
17
17
  # How many times should we retry polling in case of a failure
18
18
  MAX_POLL_RETRIES = 20
19
19
 
20
- private_constant :MAX_POLL_RETRIES
20
+ # We want to make sure we never close several clients in the same moment to prevent
21
+ # potential race conditions and other issues
22
+ SHUTDOWN_MUTEX = Mutex.new
23
+
24
+ private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX
21
25
 
22
26
  # Creates a new consumer instance.
23
27
  #
@@ -237,6 +241,17 @@ module Karafka
237
241
  end
238
242
  end
239
243
 
244
+ # Runs a single poll ignoring all the potential errors
245
+ # This is used as a keep-alive in the shutdown stage and any errors that happen here are
246
+ # irrelevant from the shutdown process perspective
247
+ #
248
+ # This is used only to trigger rebalance callbacks
249
+ def ping
250
+ poll(100)
251
+ rescue Rdkafka::RdkafkaError
252
+ nil
253
+ end
254
+
240
255
  private
241
256
 
242
257
  # When we cannot store an offset, it means we no longer own the partition
@@ -281,24 +296,26 @@ module Karafka
281
296
 
282
297
  # Commits the stored offsets in a sync way and closes the consumer.
283
298
  def close
284
- @mutex.synchronize do
285
- # Once client is closed, we should not close it again
286
- # This could only happen in case of a race-condition when forceful shutdown happens
287
- # and triggers this from a different thread
288
- return if @closed
289
-
290
- @closed = true
291
-
292
- internal_commit_offsets(async: false)
293
-
294
- # Remove callbacks runners that were registered
295
- ::Karafka::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
296
- ::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
297
-
298
- @kafka.close
299
- @buffer.clear
300
- # @note We do not clear rebalance manager here as we may still have revocation info here
301
- # that we want to consider valid prior to running another reconnection
299
+ # Allow only one client to be closed at the same time
300
+ SHUTDOWN_MUTEX.synchronize do
301
+ # Make sure that no other operations are happening on this client when we close it
302
+ @mutex.synchronize do
303
+ # Once client is closed, we should not close it again
304
+ # This could only happen in case of a race-condition when forceful shutdown happens
305
+ # and triggers this from a different thread
306
+ return if @closed
307
+
308
+ @closed = true
309
+
310
+ # Remove callbacks runners that were registered
311
+ ::Karafka::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
312
+ ::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
313
+
314
+ @kafka.close
315
+ @buffer.clear
316
+ # @note We do not clear rebalance manager here as we may still have revocation info
317
+ # here that we want to consider valid prior to running another reconnection
318
+ end
302
319
  end
303
320
  end
304
321
 
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # This object represents a collective status of execution of group of listeners running inside
6
+ # of one consumer group but in separate subscription groups.
7
+ #
8
+ # There are cases when we do not want to close a given client when others from the same
9
+ # consumer group are running because it can cause instabilities due to early shutdown of some
10
+ # of the clients out of same consumer group.
11
+ #
12
+ # We also want to make sure, we close one consumer at a time while others can continue polling.
13
+ #
14
+ # This prevents a scenario, where a rebalance is not acknowledged and we loose assignment
15
+ # without having a chance to commit changes.
16
+ class ConsumerGroupCoordinator
17
+ # @param group_size [Integer] number of separate subscription groups in a consumer group
18
+ def initialize(group_size)
19
+ # We need two locks here:
20
+ # - first one is to decrement the number of listeners doing work
21
+ # - second to ensure only one client is being closed the same time and that others can
22
+ # wait actively (not locked)
23
+ @work_mutex = Mutex.new
24
+ @shutdown_lock = Mutex.new
25
+ @group_size = group_size
26
+ @finished = Set.new
27
+ end
28
+
29
+ # @return [Boolean] can we start shutdown on a given listener
30
+ # @note If true, will also obtain a lock so no-one else will be closing the same time we do
31
+ def shutdown?
32
+ @finished.size == @group_size && @shutdown_lock.try_lock
33
+ end
34
+
35
+ # Unlocks the shutdown lock
36
+ def unlock
37
+ @shutdown_lock.unlock if @shutdown_lock.owned?
38
+ end
39
+
40
+ # Marks given listener as finished
41
+ # @param listener_id [String]
42
+ def finish_work(listener_id)
43
+ @finished << listener_id
44
+ end
45
+ end
46
+ end
47
+ end
@@ -14,13 +14,15 @@ module Karafka
14
14
  # @return [String] id of this listener
15
15
  attr_reader :id
16
16
 
17
+ # @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
17
18
  # @param subscription_group [Karafka::Routing::SubscriptionGroup]
18
19
  # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
19
20
  # @return [Karafka::Connection::Listener] listener instance
20
- def initialize(subscription_group, jobs_queue)
21
+ def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
21
22
  proc_config = ::Karafka::App.config.internal.processing
22
23
 
23
24
  @id = SecureRandom.uuid
25
+ @consumer_group_coordinator = consumer_group_coordinator
24
26
  @subscription_group = subscription_group
25
27
  @jobs_queue = jobs_queue
26
28
  @coordinators = Processing::CoordinatorsBuffer.new
@@ -82,7 +84,8 @@ module Karafka
82
84
  # Kafka connections / Internet connection issues / Etc. Business logic problems should not
83
85
  # propagate this far.
84
86
  def fetch_loop
85
- until Karafka::App.stopping?
87
+ # Run the main loop as long as we are not stopping or moving into quiet mode
88
+ until Karafka::App.stopping? || Karafka::App.quieting?
86
89
  Karafka.monitor.instrument(
87
90
  'connection.listener.fetch_loop',
88
91
  caller: self,
@@ -122,7 +125,7 @@ module Karafka
122
125
  wait
123
126
  end
124
127
 
125
- # If we are stopping we will no longer schedule any jobs despite polling.
128
+ # If we are stopping we will no longer schedule any regular jobs despite polling.
126
129
  # We need to keep polling not to exceed the `max.poll.interval` for long-running
127
130
  # non-blocking jobs and we need to allow them to finish. We however do not want to
128
131
  # enqueue any new jobs. It's worth keeping in mind that it is the end user responsibility
@@ -131,7 +134,14 @@ module Karafka
131
134
  #
132
135
  # We do not care about resuming any partitions or lost jobs as we do not plan to do
133
136
  # anything with them as we're in the shutdown phase.
134
- wait_with_poll
137
+ #
138
+ # What we do care however is the ability to still run revocation jobs in case anything
139
+ # would change in the cluster. We still want to notify the long-running jobs about changes
140
+ # that occurred in the cluster.
141
+ wait_pinging(
142
+ wait_until: -> { @jobs_queue.empty?(@subscription_group.id) },
143
+ after_ping: -> { build_and_schedule_revoke_lost_partitions_jobs }
144
+ )
135
145
 
136
146
  # We do not want to schedule the shutdown jobs prior to finishing all the jobs
137
147
  # (including non-blocking) as there might be a long-running job with a shutdown and then
@@ -139,7 +149,24 @@ module Karafka
139
149
  # as it could create a race-condition.
140
150
  build_and_schedule_shutdown_jobs
141
151
 
142
- wait_with_poll
152
+ # Wait until all the shutdown jobs are done
153
+ wait_pinging(wait_until: -> { @jobs_queue.empty?(@subscription_group.id) })
154
+
155
+ # Once all the work is done, we need to decrement counter of active subscription groups
156
+ # within this consumer group
157
+ @consumer_group_coordinator.finish_work(id)
158
+
159
+ # Wait if we're in the quiet mode
160
+ wait_pinging(wait_until: -> { !Karafka::App.quieting? })
161
+
162
+ # We need to wait until all the work in the whole consumer group (local to the process)
163
+ # is done. Otherwise we may end up with locks and `Timed out LeaveGroupRequest in flight`
164
+ # warning notifications.
165
+ wait_pinging(wait_until: -> { @consumer_group_coordinator.shutdown? })
166
+
167
+ # This extra ping will make sure we've refreshed the rebalance state after other instances
168
+ # potentially shutdown. This will prevent us from closing with a dangling callback
169
+ @client.ping
143
170
 
144
171
  shutdown
145
172
 
@@ -157,6 +184,8 @@ module Karafka
157
184
  restart
158
185
 
159
186
  sleep(1) && retry
187
+ ensure
188
+ @consumer_group_coordinator.unlock
160
189
  end
161
190
 
162
191
  # Resumes processing of partitions that were paused due to an error.
@@ -256,10 +285,21 @@ module Karafka
256
285
  end
257
286
 
258
287
  # Waits without blocking the polling
259
- # This should be used only when we no longer plan to use any incoming data and we can safely
260
- # discard it
261
- def wait_with_poll
262
- @client.batch_poll until @jobs_queue.empty?(@subscription_group.id)
288
+ #
289
+ # This should be used only when we no longer plan to use any incoming messages data and we
290
+ # can safely discard it. We can however use the rebalance information if needed.
291
+ #
292
+ # @param wait_until [Proc] until this evaluates to true, we will poll data
293
+ # @param after_ping [Proc] code that we want to run after each ping (if any)
294
+ #
295
+ # @note Performance of this is not relevant (in regards to blocks) because it is used only
296
+ # on shutdown and quiet, hence not in the running mode
297
+ def wait_pinging(wait_until:, after_ping: -> {})
298
+ until wait_until.call
299
+ @client.ping
300
+ after_ping.call
301
+ sleep(0.2)
302
+ end
263
303
  end
264
304
 
265
305
  # We can stop client without a problem, as it will reinitialize itself when running the
@@ -9,8 +9,18 @@ module Karafka
9
9
  # @param jobs_queue [JobsQueue]
10
10
  # @return [ListenersBatch]
11
11
  def initialize(jobs_queue)
12
- @batch = App.subscription_groups.map do |subscription_group|
13
- Connection::Listener.new(subscription_group, jobs_queue)
12
+ @batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
13
+ consumer_group_coordinator = Connection::ConsumerGroupCoordinator.new(
14
+ subscription_groups.size
15
+ )
16
+
17
+ subscription_groups.map do |subscription_group|
18
+ Connection::Listener.new(
19
+ consumer_group_coordinator,
20
+ subscription_group,
21
+ jobs_queue
22
+ )
23
+ end
14
24
  end
15
25
  end
16
26
 
@@ -18,6 +18,16 @@ module Karafka
18
18
  # Stop needs to be blocking to wait for all the things to finalize
19
19
  Karafka::Server.stop
20
20
  end
21
+
22
+ # Quiets Karafka upon any event
23
+ #
24
+ # @note This method is not blocking and will not wait for Karafka to fully quiet.
25
+ # It will trigger the quiet procedure but won't wait.
26
+ #
27
+ # @note Please keep in mind you need to `#stop` to actually stop the server anyhow.
28
+ def quiet
29
+ Karafka::Server.quiet
30
+ end
21
31
  end
22
32
  end
23
33
  end
@@ -18,7 +18,7 @@ module Karafka
18
18
 
19
19
  # Logs each messages fetching attempt
20
20
  #
21
- # @param event [Dry::Events::Event] event details including payload
21
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
22
22
  def on_connection_listener_fetch_loop(event)
23
23
  listener = event[:caller]
24
24
  debug "[#{listener.id}] Polling messages..."
@@ -26,7 +26,7 @@ module Karafka
26
26
 
27
27
  # Logs about messages that we've received from Kafka
28
28
  #
29
- # @param event [Dry::Events::Event] event details including payload
29
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
30
30
  def on_connection_listener_fetch_loop_received(event)
31
31
  listener = event[:caller]
32
32
  time = event[:time]
@@ -42,7 +42,7 @@ module Karafka
42
42
 
43
43
  # Prints info about the fact that a given job has started
44
44
  #
45
- # @param event [Dry::Events::Event] event details including payload
45
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
46
46
  def on_worker_process(event)
47
47
  job = event[:job]
48
48
  job_type = job.class.to_s.split('::').last
@@ -53,7 +53,7 @@ module Karafka
53
53
 
54
54
  # Prints info about the fact that a given job has finished
55
55
  #
56
- # @param event [Dry::Events::Event] event details including payload
56
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
57
57
  def on_worker_processed(event)
58
58
  job = event[:job]
59
59
  time = event[:time]
@@ -66,7 +66,7 @@ module Karafka
66
66
  # Logs info about system signals that Karafka received and prints backtrace for threads in
67
67
  # case of ttin
68
68
  #
69
- # @param event [Dry::Events::Event] event details including payload
69
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
70
70
  def on_process_notice_signal(event)
71
71
  info "Received #{event[:signal]} system signal"
72
72
 
@@ -89,7 +89,7 @@ module Karafka
89
89
 
90
90
  # Logs info that we're running Karafka app.
91
91
  #
92
- # @param _event [Dry::Events::Event] event details including payload
92
+ # @param _event [Karafka::Core::Monitoring::Event] event details including payload
93
93
  def on_app_running(_event)
94
94
  info "Running in #{RUBY_DESCRIPTION}"
95
95
  info "Running Karafka #{Karafka::VERSION} server"
@@ -99,23 +99,28 @@ module Karafka
99
99
  info 'See LICENSE and the LGPL-3.0 for licensing details.'
100
100
  end
101
101
 
102
+ # @param _event [Karafka::Core::Monitoring::Event] event details including payload
103
+ def on_app_quieting(_event)
104
+ info 'Switching to quiet mode. New messages will not be processed.'
105
+ end
106
+
102
107
  # Logs info that we're going to stop the Karafka server.
103
108
  #
104
- # @param _event [Dry::Events::Event] event details including payload
109
+ # @param _event [Karafka::Core::Monitoring::Event] event details including payload
105
110
  def on_app_stopping(_event)
106
111
  info 'Stopping Karafka server'
107
112
  end
108
113
 
109
114
  # Logs info that we stopped the Karafka server.
110
115
  #
111
- # @param _event [Dry::Events::Event] event details including payload
116
+ # @param _event [Karafka::Core::Monitoring::Event] event details including payload
112
117
  def on_app_stopped(_event)
113
118
  info 'Stopped Karafka server'
114
119
  end
115
120
 
116
121
  # Logs info when we have dispatched a message the the DLQ
117
122
  #
118
- # @param event [Dry::Events::Event] event details including payload
123
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
119
124
  def on_dead_letter_queue_dispatched(event)
120
125
  message = event[:message]
121
126
  offset = message.offset
@@ -128,7 +133,7 @@ module Karafka
128
133
 
129
134
  # There are many types of errors that can occur in many places, but we provide a single
130
135
  # handler for all of them to simplify error instrumentation.
131
- # @param event [Dry::Events::Event] event details including payload
136
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
132
137
  def on_error_occurred(event)
133
138
  type = event[:type]
134
139
  error = event[:error]
@@ -19,6 +19,7 @@ module Karafka
19
19
  EVENTS = %w[
20
20
  app.initialized
21
21
  app.running
22
+ app.quieting
22
23
  app.stopping
23
24
  app.stopped
24
25
 
@@ -42,7 +42,7 @@ module Karafka
42
42
 
43
43
  # Prints info about the fact that a given job has started
44
44
  #
45
- # @param event [Dry::Events::Event] event details including payload
45
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
46
46
  def on_worker_process(event)
47
47
  current_span = client.trace('karafka.consumer')
48
48
  push_tags
@@ -60,7 +60,7 @@ module Karafka
60
60
 
61
61
  # Prints info about the fact that a given job has finished
62
62
  #
63
- # @param event [Dry::Events::Event] event details including payload
63
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
64
64
  def on_worker_processed(event)
65
65
  push_tags
66
66
 
@@ -80,7 +80,7 @@ module Karafka
80
80
 
81
81
  # There are many types of errors that can occur in many places, but we provide a single
82
82
  # handler for all of them to simplify error instrumentation.
83
- # @param event [Dry::Events::Event] event details including payload
83
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
84
84
  def on_error_occurred(event)
85
85
  push_tags
86
86
 
@@ -25,11 +25,13 @@ module Karafka
25
25
 
26
26
  # Builds up nested concurrent hash for data tracking
27
27
  def initialize
28
- @processing_times = Concurrent::Hash.new do |topics_hash, topic|
29
- topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
30
- # This array does not have to be concurrent because we always access single partition
31
- # data via instrumentation that operates in a single thread via consumer
32
- partitions_hash[partition] = []
28
+ @processing_times = Concurrent::Map.new do |topics_hash, topic|
29
+ topics_hash.compute_if_absent(topic) do
30
+ Concurrent::Map.new do |partitions_hash, partition|
31
+ # This array does not have to be concurrent because we always access single
32
+ # partition data via instrumentation that operates in a single thread via consumer
33
+ partitions_hash.compute_if_absent(partition) { [] }
34
+ end
33
35
  end
34
36
  end
35
37
  end
@@ -20,10 +20,8 @@ module Karafka
20
20
  # @param args [Object] anything the base coordinator accepts
21
21
  def initialize(*args)
22
22
  super
23
- @on_enqueued_invoked = false
24
- @on_started_invoked = false
25
- @on_finished_invoked = false
26
- @on_revoked_invoked = false
23
+
24
+ @executed = []
27
25
  @flow_lock = Mutex.new
28
26
  end
29
27
 
@@ -34,9 +32,7 @@ module Karafka
34
32
  super
35
33
 
36
34
  @mutex.synchronize do
37
- @on_enqueued_invoked = false
38
- @on_started_invoked = false
39
- @on_finished_invoked = false
35
+ @executed.clear
40
36
  @last_message = messages.last
41
37
  end
42
38
  end
@@ -50,9 +46,7 @@ module Karafka
50
46
  # enqueued
51
47
  def on_enqueued
52
48
  @flow_lock.synchronize do
53
- return if @on_enqueued_invoked
54
-
55
- @on_enqueued_invoked = true
49
+ return unless executable?(:on_enqueued)
56
50
 
57
51
  yield(@last_message)
58
52
  end
@@ -61,9 +55,7 @@ module Karafka
61
55
  # Runs given code only once per all the coordinated jobs upon starting first of them
62
56
  def on_started
63
57
  @flow_lock.synchronize do
64
- return if @on_started_invoked
65
-
66
- @on_started_invoked = true
58
+ return unless executable?(:on_started)
67
59
 
68
60
  yield(@last_message)
69
61
  end
@@ -75,25 +67,36 @@ module Karafka
75
67
  def on_finished
76
68
  @flow_lock.synchronize do
77
69
  return unless finished?
78
- return if @on_finished_invoked
79
-
80
- @on_finished_invoked = true
70
+ return unless executable?(:on_finished)
81
71
 
82
72
  yield(@last_message)
83
73
  end
84
74
  end
85
75
 
86
- # Runs once when a partition is revoked
76
+ # Runs once after a partition is revoked
87
77
  def on_revoked
88
78
  @flow_lock.synchronize do
89
- return unless finished?
90
- return if @on_revoked_invoked
91
-
92
- @on_revoked_invoked = true
79
+ return unless executable?(:on_revoked)
93
80
 
94
81
  yield(@last_message)
95
82
  end
96
83
  end
84
+
85
+ private
86
+
87
+ # Checks if given action is executable once. If it is and true is returned, this method
88
+ # will return false next time it is used.
89
+ #
90
+ # @param action [Symbol] what action we want to perform
91
+ # @return [Boolean] true if we can
92
+ # @note This method needs to run behind a mutex.
93
+ def executable?(action)
94
+ return false if @executed.include?(action)
95
+
96
+ @executed << action
97
+
98
+ true
99
+ end
97
100
  end
98
101
  end
99
102
  end
@@ -21,7 +21,7 @@ module Karafka
21
21
  #
22
22
  # AJ has manual offset management on by default and the offset management is delegated to
23
23
  # the AJ consumer. This means, we cannot mark as consumed always. We can only mark as
24
- # consumed when we skip given job upon errors. In all the other scenarions marking as
24
+ # consumed when we skip given job upon errors. In all the other scenarios marking as
25
25
  # consumed needs to happen in the AJ consumer on a per job basis.
26
26
  module AjDlqMom
27
27
  include DlqMom
@@ -46,7 +46,7 @@ module Karafka
46
46
  else
47
47
  coordinator.pause_tracker.reset
48
48
  skippable_message = find_skippable_message
49
- dispatch_to_dlq(skippable_message)
49
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
50
50
  # We can commit the offset here because we know that we skip it "forever" and
51
51
  # since AJ consumer commits the offset after each job, we also know that the
52
52
  # previous job was successful
@@ -42,7 +42,7 @@ module Karafka
42
42
  # We reset the pause to indicate we will now consider it as "ok".
43
43
  coordinator.pause_tracker.reset
44
44
  skippable_message = find_skippable_message
45
- dispatch_to_dlq(skippable_message)
45
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
46
46
  mark_as_consumed(skippable_message)
47
47
  pause(coordinator.seek_offset)
48
48
  end
@@ -59,7 +59,6 @@ module Karafka
59
59
 
60
60
  # Moves the broken message into a separate queue defined via the settings
61
61
  #
62
- # @private
63
62
  # @param skippable_message [Array<Karafka::Messages::Message>] message we want to
64
63
  # dispatch to DLQ
65
64
  def dispatch_to_dlq(skippable_message)
@@ -81,6 +80,13 @@ module Karafka
81
80
  message: skippable_message
82
81
  )
83
82
  end
83
+
84
+ # @return [Boolean] should we dispatch the message to DLQ or not. When the dispatch topic
85
+ # is set to false, we will skip the dispatch, effectively ignoring the broken message
86
+ # without taking any action.
87
+ def dispatch_to_dlq?
88
+ topic.dead_letter_queue.topic
89
+ end
84
90
  end
85
91
  end
86
92
  end
@@ -43,10 +43,9 @@ module Karafka
43
43
  else
44
44
  coordinator.pause_tracker.reset
45
45
 
46
- skippable_message = find_skippable_message
47
-
48
46
  unless revoked?
49
- dispatch_to_dlq(skippable_message)
47
+ skippable_message = find_skippable_message
48
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
50
49
  mark_as_consumed(skippable_message)
51
50
  end
52
51
 
@@ -42,10 +42,12 @@ module Karafka
42
42
  else
43
43
  coordinator.pause_tracker.reset
44
44
 
45
- skippable_message = find_skippable_message
46
-
47
45
  unless revoked?
48
- dispatch_to_dlq(skippable_message)
46
+ if dispatch_to_dlq?
47
+ skippable_message = find_skippable_message
48
+ dispatch_to_dlq(skippable_message)
49
+ end
50
+
49
51
  seek(coordinator.seek_offset)
50
52
  end
51
53
 
@@ -45,8 +45,12 @@ module Karafka
45
45
  else
46
46
  # We reset the pause to indicate we will now consider it as "ok".
47
47
  coordinator.pause_tracker.reset
48
- skippable_message = find_skippable_message
49
- dispatch_to_dlq(skippable_message)
48
+
49
+ if dispatch_to_dlq?
50
+ skippable_message = find_skippable_message
51
+ dispatch_to_dlq(skippable_message)
52
+ end
53
+
50
54
  pause(coordinator.seek_offset)
51
55
  end
52
56
  end
@@ -10,6 +10,7 @@ module Karafka
10
10
  SIGQUIT
11
11
  SIGTERM
12
12
  SIGTTIN
13
+ SIGTSTP
13
14
  ].freeze
14
15
 
15
16
  HANDLED_SIGNALS.each do |signal|
@@ -48,21 +49,23 @@ module Karafka
48
49
 
49
50
  # Traps a single signal and performs callbacks (if any) or just ignores this signal
50
51
  # @param [Symbol] signal type that we want to catch
52
+ # @note Since we do a lot of threading and queuing, we don't want to handle signals from the
53
+ # trap context s some things may not work there as expected, that is why we spawn a separate
54
+ # thread to handle the signals process
51
55
  def trap_signal(signal)
52
56
  trap(signal) do
53
- notice_signal(signal)
54
- (@callbacks[signal] || []).each(&:call)
57
+ Thread.new do
58
+ notice_signal(signal)
59
+
60
+ (@callbacks[signal] || []).each(&:call)
61
+ end
55
62
  end
56
63
  end
57
64
 
58
65
  # Informs monitoring about trapped signal
59
66
  # @param [Symbol] signal type that we received
60
- # @note We cannot perform logging from trap context, that's why
61
- # we have to spin up a new thread to do this
62
67
  def notice_signal(signal)
63
- Thread.new do
64
- Karafka.monitor.instrument('process.notice_signal', caller: self, signal: signal)
65
- end
68
+ Karafka.monitor.instrument('process.notice_signal', caller: self, signal: signal)
66
69
  end
67
70
  end
68
71
  end
@@ -20,8 +20,12 @@ module Karafka
20
20
  # scheduled by Ruby hundreds of thousands of times per group.
21
21
  # We cannot use a single semaphore as it could potentially block in listeners that should
22
22
  # process with their data and also could unlock when a given group needs to remain locked
23
- @semaphores = Hash.new { |h, k| h[k] = Queue.new }
23
+ @semaphores = Concurrent::Map.new do |h, k|
24
+ h.compute_if_absent(k) { Queue.new }
25
+ end
26
+
24
27
  @in_processing = Hash.new { |h, k| h[k] = [] }
28
+
25
29
  @mutex = Mutex.new
26
30
  end
27
31
 
@@ -47,9 +51,9 @@ module Karafka
47
51
  raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
48
52
 
49
53
  group << job
50
- end
51
54
 
52
- @queue << job
55
+ @queue << job
56
+ end
53
57
  end
54
58
 
55
59
  # @return [Jobs::Base, nil] waits for a job from the main queue and returns it once available
@@ -105,7 +109,9 @@ module Karafka
105
109
  # @return [Boolean] tell us if we have anything in the processing (or for processing) from
106
110
  # a given group.
107
111
  def empty?(group_id)
108
- @in_processing[group_id].empty?
112
+ @mutex.synchronize do
113
+ @in_processing[group_id].empty?
114
+ end
109
115
  end
110
116
 
111
117
  # Blocks when there are things in the queue in a given group and waits until all the blocking
@@ -5,7 +5,8 @@
5
5
  rails = false
6
6
 
7
7
  begin
8
- require 'rails'
8
+ # Do not load Rails again if already loaded
9
+ Object.const_defined?('Rails::Railtie') || require('rails')
9
10
 
10
11
  rails = true
11
12
  rescue LoadError
@@ -17,9 +18,6 @@ rescue LoadError
17
18
  end
18
19
 
19
20
  if rails
20
- # Load Karafka
21
- require 'karafka'
22
-
23
21
  # Load ActiveJob adapter
24
22
  require 'active_job/karafka'
25
23
 
@@ -29,6 +29,8 @@ module Karafka
29
29
 
30
30
  topic = dead_letter_queue[:topic]
31
31
 
32
+ # When topic is set to false, it means we just want to skip dispatch on DLQ
33
+ next if topic == false
32
34
  next if topic.is_a?(String) && Contracts::TOPIC_REGEXP.match?(topic)
33
35
 
34
36
  [[%i[dead_letter_queue topic], :format]]
@@ -12,7 +12,8 @@ module Karafka
12
12
  private_constant :DEFAULT_MAX_RETRIES
13
13
 
14
14
  # @param max_retries [Integer] after how many retries should we move data to dlq
15
- # @param topic [String] where the messages should be moved if failing
15
+ # @param topic [String, false] where the messages should be moved if failing or false
16
+ # if we do not want to move it anywhere and just skip
16
17
  # @return [Config] defined config
17
18
  def dead_letter_queue(max_retries: DEFAULT_MAX_RETRIES, topic: nil)
18
19
  @dead_letter_queue ||= Config.new(
@@ -25,12 +25,10 @@ module Karafka
25
25
 
26
26
  # Method which runs app
27
27
  def run
28
- # Since we do a lot of threading and queuing, we don't want to stop from the trap context
29
- # as some things may not work there as expected, that is why we spawn a separate thread to
30
- # handle the stopping process
31
- process.on_sigint { Thread.new { stop } }
32
- process.on_sigquit { Thread.new { stop } }
33
- process.on_sigterm { Thread.new { stop } }
28
+ process.on_sigint { stop }
29
+ process.on_sigquit { stop }
30
+ process.on_sigterm { stop }
31
+ process.on_sigtstp { quiet }
34
32
  process.supervise
35
33
 
36
34
  # Start is blocking until stop is called and when we stop, it will wait until
@@ -74,7 +72,8 @@ module Karafka
74
72
  # please start a separate thread to do so.
75
73
  def stop
76
74
  # Initialize the stopping process only if Karafka was running
77
- return if Karafka::App.stopping? || Karafka::App.stopped?
75
+ return if Karafka::App.stopping?
76
+ return if Karafka::App.stopped?
78
77
 
79
78
  Karafka::App.stop!
80
79
 
@@ -125,6 +124,18 @@ module Karafka
125
124
  Karafka::App.stopped! if timeout
126
125
  end
127
126
 
127
+ # Quiets the Karafka server.
128
+ # Karafka will stop processing but won't quiet to consumer group, so no rebalance will be
129
+ # triggered until final shutdown.
130
+ def quiet
131
+ # If we are already quieting or in the stop procedures, we should not do it again.
132
+ return if Karafka::App.quieting?
133
+ return if Karafka::App.stopping?
134
+ return if Karafka::App.stopped?
135
+
136
+ Karafka::App.quiet!
137
+ end
138
+
128
139
  private
129
140
 
130
141
  # @return [Karafka::Process] process wrapper instance used to catch system signal calls
@@ -84,6 +84,7 @@ module Karafka
84
84
  reconnect.backoff.jitter.ms
85
85
  reconnect.backoff.max.ms
86
86
  reconnect.backoff.ms
87
+ resolve_cb
87
88
  sasl.kerberos.keytab
88
89
  sasl.kerberos.kinit.cmd
89
90
  sasl.kerberos.min.time.before.relogin
@@ -215,6 +216,7 @@ module Karafka
215
216
  reconnect.backoff.ms
216
217
  request.required.acks
217
218
  request.timeout.ms
219
+ resolve_cb
218
220
  retries
219
221
  retry.backoff.ms
220
222
  sasl.kerberos.keytab
@@ -8,6 +8,7 @@ module Karafka
8
8
  initializing: :initialize!,
9
9
  initialized: :initialized!,
10
10
  running: :run!,
11
+ quieting: :quiet!,
11
12
  stopping: :stop!,
12
13
  stopped: :stopped!
13
14
  }.freeze
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.17'
6
+ VERSION = '2.0.19'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -86,6 +86,9 @@ end
86
86
  loader = Zeitwerk::Loader.for_gem
87
87
  # Do not load Rails extensions by default, this will be handled by Railtie if they are needed
88
88
  loader.ignore(Karafka.gem_root.join('lib/active_job'))
89
+ # Do not load Railtie. It will load if after everything is ready, so we don't have to load any
90
+ # Karafka components when we require this railtie. Railtie needs to be loaded last.
91
+ loader.ignore(Karafka.gem_root.join('lib/karafka/railtie'))
89
92
  # Do not load pro components as they will be loaded if needed and allowed
90
93
  loader.ignore(Karafka.core_root.join('pro/'))
91
94
  # Do not load vendors instrumentation components. Those need to be required manually if needed
@@ -96,3 +99,6 @@ loader.eager_load
96
99
  # This will load features but since Pro are not loaded automatically, they will not be visible
97
100
  # nor included here
98
101
  ::Karafka::Routing::Features::Base.load_all
102
+
103
+ # Load railtie after everything else is ready so we know we can rely on it.
104
+ require 'karafka/railtie'
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.17
4
+ version: 2.0.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
36
36
  MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
37
37
  -----END CERTIFICATE-----
38
- date: 2022-11-10 00:00:00.000000000 Z
38
+ date: 2022-11-20 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -176,6 +176,7 @@ files:
176
176
  - lib/karafka/cli/install.rb
177
177
  - lib/karafka/cli/server.rb
178
178
  - lib/karafka/connection/client.rb
179
+ - lib/karafka/connection/consumer_group_coordinator.rb
179
180
  - lib/karafka/connection/listener.rb
180
181
  - lib/karafka/connection/listeners_batch.rb
181
182
  - lib/karafka/connection/messages_buffer.rb
@@ -326,7 +327,12 @@ licenses:
326
327
  - LGPL-3.0
327
328
  - Commercial
328
329
  metadata:
330
+ funding_uri: https://karafka.io/#become-pro
331
+ homepage_uri: https://karafka.io
332
+ changelog_uri: https://github.com/karafka/karafka/blob/master/CHANGELOG.md
333
+ bug_tracker_uri: https://github.com/karafka/karafka/issues
329
334
  source_code_uri: https://github.com/karafka/karafka
335
+ documentation_uri: https://karafka.io/docs
330
336
  rubygems_mfa_required: 'true'
331
337
  post_install_message:
332
338
  rdoc_options: []
metadata.gz.sig CHANGED
Binary file