karafka 2.4.15 → 2.4.16

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +5 -5
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +10 -0
  6. data/Gemfile +1 -0
  7. data/Gemfile.lock +16 -13
  8. data/config/locales/errors.yml +1 -0
  9. data/docker-compose.yml +0 -2
  10. data/lib/karafka/base_consumer.rb +63 -9
  11. data/lib/karafka/connection/client.rb +2 -16
  12. data/lib/karafka/connection/rebalance_manager.rb +24 -13
  13. data/lib/karafka/instrumentation/logger_listener.rb +3 -0
  14. data/lib/karafka/instrumentation/notifications.rb +3 -0
  15. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +1 -26
  16. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +2 -0
  17. data/lib/karafka/pro/processing/jobs/eofed_non_blocking.rb +2 -0
  18. data/lib/karafka/pro/processing/jobs/periodic.rb +2 -0
  19. data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +2 -0
  20. data/lib/karafka/pro/processing/jobs/revoked_non_blocking.rb +2 -0
  21. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -2
  22. data/lib/karafka/pro/processing/strategies/default.rb +87 -26
  23. data/lib/karafka/pro/processing/strategies/dlq/default.rb +1 -1
  24. data/lib/karafka/pro/processing/strategies/ftr/default.rb +2 -2
  25. data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -2
  26. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -2
  27. data/lib/karafka/pro/processing/strategies/vp/default.rb +3 -0
  28. data/lib/karafka/processing/executor.rb +7 -0
  29. data/lib/karafka/processing/jobs/base.rb +13 -0
  30. data/lib/karafka/processing/jobs/consume.rb +2 -0
  31. data/lib/karafka/processing/jobs/eofed.rb +2 -0
  32. data/lib/karafka/processing/jobs/idle.rb +2 -0
  33. data/lib/karafka/processing/jobs/revoked.rb +2 -0
  34. data/lib/karafka/processing/jobs/shutdown.rb +2 -0
  35. data/lib/karafka/processing/strategies/default.rb +22 -11
  36. data/lib/karafka/processing/strategies/dlq.rb +1 -1
  37. data/lib/karafka/processing/worker.rb +21 -18
  38. data/lib/karafka/version.rb +1 -1
  39. data.tar.gz.sig +0 -0
  40. metadata +3 -6
  41. metadata.gz.sig +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5b49de31147b2a64d8927c91e25e6b607fdf1a8a7280b7cc3ba8c3663cf96b6f
4
- data.tar.gz: 4a3d9e439e21b79b9ac6a2c8bf4e76219f970be1f6bb7316feb5c7f2f792271e
3
+ metadata.gz: 8510d261a0722b764aff2c59e214bb2b87481ce606488e8f1323c4bf29ee4528
4
+ data.tar.gz: 4574f71706749ec5effa830b9c04a6e3072c880220a08fa33f81648f7d3c79be
5
5
  SHA512:
6
- metadata.gz: 527a1c169ddc5a0f978f69dc3dbae6d30288997d2cfc95104ad6343d7a468c67afbde4c55051c1e7902d2c161fc1e615f8d703b9dfba123f734b9fbd7ccec5d1
7
- data.tar.gz: a895657a8dece0c9bdb65afeeb00bc8df2e03e1957d345bc71ceab9d6cce74e5d0b4afcc251e26dfe230778a05b8742e40505a779695325844c6be630307ad82
6
+ metadata.gz: 37c43b9634b43e6608fa7f2ab9320d49d00b2582c211095968d3200412029e0301d17556d31be2e24d8401457fd660473cc46adc4d5835de968a2eba9c51fbef
7
+ data.tar.gz: 15d2e2dbd7ee99ab1c726a47a76959f1e3d877cd723f084550711fd9ba4662a30eb39af26f1354847ab906aaec9580e0cde5f7858b0cda521f5f39844195a3c3
checksums.yaml.gz.sig CHANGED
Binary file
@@ -27,7 +27,7 @@ jobs:
27
27
  - name: Set up Ruby
28
28
  uses: ruby/setup-ruby@v1
29
29
  with:
30
- ruby-version: 3.3
30
+ ruby-version: 3.4
31
31
  bundler-cache: true
32
32
 
33
33
  - name: Install Diffend plugin
@@ -73,12 +73,12 @@ jobs:
73
73
  fail-fast: false
74
74
  matrix:
75
75
  ruby:
76
- - '3.4.0-preview1'
76
+ - '3.4'
77
77
  - '3.3'
78
78
  - '3.2'
79
79
  - '3.1'
80
80
  include:
81
- - ruby: '3.3'
81
+ - ruby: '3.4'
82
82
  coverage: 'true'
83
83
  steps:
84
84
  - uses: actions/checkout@v4
@@ -120,7 +120,7 @@ jobs:
120
120
  fail-fast: false
121
121
  matrix:
122
122
  ruby:
123
- - '3.4.0-preview1'
123
+ - '3.4'
124
124
  - '3.3'
125
125
  - '3.2'
126
126
  - '3.1'
@@ -178,7 +178,7 @@ jobs:
178
178
  fail-fast: false
179
179
  matrix:
180
180
  ruby:
181
- - '3.4.0-preview1'
181
+ - '3.4'
182
182
  - '3.3'
183
183
  - '3.2'
184
184
  - '3.1'
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.3.6
1
+ 3.4.1
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Karafka Framework Changelog
2
2
 
3
+ ## 2.4.16 (2024-12-27)
4
+ - [Enhancement] Improve post-rebalance revocation messages filtering.
5
+ - [Enhancement] Introduce `Consumer#wrap` for connection pooling management and other wrapped operations.
6
+ - [Enhancement] Guard transactional operations from marking beyond assignment ownership under some extreme edge-cases.
7
+ - [Enhancement] Improve VPs work with transactional producers.
8
+ - [Enhancement] Prevent non-transactional operations leakage into transactional managed offset management consumers.
9
+ - [Fix] Prevent transactions from being marked with a non-transactional default producer when automatic offset management and other advanced features are on.
10
+ - [Fix] Fix `kafka_format` `KeyError` that occurs when a non-hash is assigned to the kafka scope of the settings.
11
+ - [Fix] Non cooperative-sticky transactional offset management can refetch reclaimed partitions.
12
+
3
13
  ## 2.4.15 (2024-12-04)
4
14
  - [Fix] Assignment tracker current state fetch during a rebalance loop can cause an error on multi CG setup.
5
15
  - [Fix] Prevent double post-transaction offset dispatch to Kafka.
data/Gemfile CHANGED
@@ -12,6 +12,7 @@ gemspec
12
12
  group :integrations, :test do
13
13
  gem 'fugit', require: false
14
14
  gem 'rspec', require: false
15
+ gem 'stringio'
15
16
  end
16
17
 
17
18
  group :integrations do
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.4.15)
4
+ karafka (2.4.16)
5
5
  base64 (~> 0.2)
6
6
  karafka-core (>= 2.4.4, < 2.5.0)
7
7
  karafka-rdkafka (>= 0.17.2)
@@ -11,10 +11,10 @@ PATH
11
11
  GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
- activejob (8.0.0)
15
- activesupport (= 8.0.0)
14
+ activejob (8.0.1)
15
+ activesupport (= 8.0.1)
16
16
  globalid (>= 0.3.6)
17
- activesupport (8.0.0)
17
+ activesupport (8.0.1)
18
18
  base64
19
19
  benchmark (>= 0.3)
20
20
  bigdecimal
@@ -49,9 +49,10 @@ GEM
49
49
  activesupport (>= 6.1)
50
50
  i18n (1.14.6)
51
51
  concurrent-ruby (~> 1.0)
52
- karafka-core (2.4.5)
53
- karafka-rdkafka (>= 0.17.6, < 0.18.0)
54
- karafka-rdkafka (0.17.6)
52
+ karafka-core (2.4.8)
53
+ karafka-rdkafka (>= 0.17.6, < 0.19.0)
54
+ logger (>= 1.6.0)
55
+ karafka-rdkafka (0.18.1)
55
56
  ffi (~> 1.15)
56
57
  mini_portile2 (~> 2.6)
57
58
  rake (> 12)
@@ -64,9 +65,9 @@ GEM
64
65
  karafka-core (>= 2.4.0, < 2.5.0)
65
66
  roda (~> 3.68, >= 3.69)
66
67
  tilt (~> 2.0)
67
- logger (1.6.1)
68
+ logger (1.6.3)
68
69
  mini_portile2 (2.8.8)
69
- minitest (5.25.1)
70
+ minitest (5.25.4)
70
71
  ostruct (0.6.1)
71
72
  raabro (1.4.0)
72
73
  rack (3.1.8)
@@ -86,22 +87,23 @@ GEM
86
87
  diff-lcs (>= 1.2.0, < 2.0)
87
88
  rspec-support (~> 3.13.0)
88
89
  rspec-support (3.13.1)
89
- securerandom (0.3.1)
90
+ securerandom (0.3.2)
90
91
  simplecov (0.22.0)
91
92
  docile (~> 1.1)
92
93
  simplecov-html (~> 0.11)
93
94
  simplecov_json_formatter (~> 0.1)
94
95
  simplecov-html (0.12.3)
95
96
  simplecov_json_formatter (0.1.4)
97
+ stringio (3.1.2)
96
98
  tilt (2.4.0)
97
99
  tzinfo (2.0.6)
98
100
  concurrent-ruby (~> 1.0)
99
- uri (1.0.0)
100
- waterdrop (2.8.0)
101
+ uri (1.0.2)
102
+ waterdrop (2.8.1)
101
103
  karafka-core (>= 2.4.3, < 3.0.0)
102
104
  karafka-rdkafka (>= 0.17.5)
103
105
  zeitwerk (~> 2.3)
104
- zeitwerk (2.6.18)
106
+ zeitwerk (2.7.1)
105
107
 
106
108
  PLATFORMS
107
109
  ruby
@@ -118,6 +120,7 @@ DEPENDENCIES
118
120
  ostruct
119
121
  rspec
120
122
  simplecov
123
+ stringio
121
124
 
122
125
  BUNDLED WITH
123
126
  2.4.22
@@ -95,6 +95,7 @@ en:
95
95
 
96
96
  topic:
97
97
  kafka: needs to be a hash with kafka scope settings details
98
+ kafka_format: needs to be a filled hash
98
99
  missing: needs to be present
99
100
  max_messages_format: 'needs to be an integer bigger than 0'
100
101
  max_wait_time_format: 'needs to be an integer bigger than 0'
data/docker-compose.yml CHANGED
@@ -1,5 +1,3 @@
1
- version: '2'
2
-
3
1
  services:
4
2
  kafka:
5
3
  container_name: kafka
@@ -6,6 +6,9 @@ module Karafka
6
6
  class BaseConsumer
7
7
  # Allow for consumer instance tagging for instrumentation
8
8
  include ::Karafka::Core::Taggable
9
+ include Helpers::ConfigImporter.new(
10
+ monitor: %i[monitor]
11
+ )
9
12
 
10
13
  extend Forwardable
11
14
 
@@ -39,7 +42,7 @@ module Karafka
39
42
  def on_initialized
40
43
  handle_initialized
41
44
  rescue StandardError => e
42
- Karafka.monitor.instrument(
45
+ monitor.instrument(
43
46
  'error.occurred',
44
47
  error: e,
45
48
  caller: self,
@@ -73,6 +76,23 @@ module Karafka
73
76
  handle_before_consume
74
77
  end
75
78
 
79
+ # Executes the default wrapping flow
80
+ #
81
+ # @private
82
+ #
83
+ # @param action [Symbol]
84
+ # @param block [Proc]
85
+ def on_wrap(action, &block)
86
+ handle_wrap(action, &block)
87
+ rescue StandardError => e
88
+ monitor.instrument(
89
+ 'error.occurred',
90
+ error: e,
91
+ caller: self,
92
+ type: 'consumer.wrap.error'
93
+ )
94
+ end
95
+
76
96
  # Executes the default consumer flow.
77
97
  #
78
98
  # @private
@@ -85,7 +105,7 @@ module Karafka
85
105
  def on_consume
86
106
  handle_consume
87
107
  rescue StandardError => e
88
- Karafka.monitor.instrument(
108
+ monitor.instrument(
89
109
  'error.occurred',
90
110
  error: e,
91
111
  caller: self,
@@ -105,7 +125,7 @@ module Karafka
105
125
  def on_after_consume
106
126
  handle_after_consume
107
127
  rescue StandardError => e
108
- Karafka.monitor.instrument(
128
+ monitor.instrument(
109
129
  'error.occurred',
110
130
  error: e,
111
131
  caller: self,
@@ -125,7 +145,7 @@ module Karafka
125
145
  def on_eofed
126
146
  handle_eofed
127
147
  rescue StandardError => e
128
- Karafka.monitor.instrument(
148
+ monitor.instrument(
129
149
  'error.occurred',
130
150
  error: e,
131
151
  caller: self,
@@ -161,7 +181,7 @@ module Karafka
161
181
  def on_revoked
162
182
  handle_revoked
163
183
  rescue StandardError => e
164
- Karafka.monitor.instrument(
184
+ monitor.instrument(
165
185
  'error.occurred',
166
186
  error: e,
167
187
  caller: self,
@@ -182,7 +202,7 @@ module Karafka
182
202
  def on_shutdown
183
203
  handle_shutdown
184
204
  rescue StandardError => e
185
- Karafka.monitor.instrument(
205
+ monitor.instrument(
186
206
  'error.occurred',
187
207
  error: e,
188
208
  caller: self,
@@ -209,6 +229,40 @@ module Karafka
209
229
  raise NotImplementedError, 'Implement this in a subclass'
210
230
  end
211
231
 
232
+ # This method can be redefined to build a wrapping API around user code + karafka flow control
233
+ # code starting from the user code (operations prior to that are not part of this).
234
+ # The wrapping relates to a single job flow.
235
+ #
236
+ # Karafka framework may require user configured "state" like for example a selected
237
+ # transactional producer that should be used not only by the user but also by the framework.
238
+ # By using this API user can checkout a producer and return it to the pool.
239
+ #
240
+ # @param _action [Symbol] what action are we wrapping. Useful if we want for example to only
241
+ # wrap the `:consume` action.
242
+ # @yield Runs the execution block
243
+ #
244
+ # @note User related errors should not leak to this level of execution. This should not be used
245
+ # for anything consumption related but only for setting up state that that Karafka code
246
+ # may need outside of user code.
247
+ #
248
+ # @example Redefine to use a producer from a pool for consume
249
+ # def wrap(action)
250
+ # # Do not checkout producer for any other actions
251
+ # return yield unless action == :consume
252
+ #
253
+ # default_producer = self.producer
254
+ #
255
+ # $producers.with do |producer|
256
+ # self.producer = producer
257
+ # yield
258
+ # end
259
+ #
260
+ # self.producer = default_producer
261
+ # end
262
+ def wrap(_action)
263
+ yield
264
+ end
265
+
212
266
  # Method that will be executed when a given topic partition reaches eof without any new
213
267
  # incoming messages alongside
214
268
  def eofed; end
@@ -255,7 +309,7 @@ module Karafka
255
309
  # Indicate, that user took a manual action of pausing
256
310
  coordinator.manual_pause if manual_pause
257
311
 
258
- Karafka.monitor.instrument(
312
+ monitor.instrument(
259
313
  'consumer.consuming.pause',
260
314
  caller: self,
261
315
  manual: manual_pause,
@@ -299,7 +353,7 @@ module Karafka
299
353
  offset
300
354
  )
301
355
 
302
- Karafka.monitor.instrument(
356
+ monitor.instrument(
303
357
  'consumer.consuming.seek',
304
358
  caller: self,
305
359
  topic: topic.name,
@@ -347,7 +401,7 @@ module Karafka
347
401
 
348
402
  # Instrumentation needs to run **after** `#pause` invocation because we rely on the states
349
403
  # set by `#pause`
350
- Karafka.monitor.instrument(
404
+ monitor.instrument(
351
405
  'consumer.consuming.retry',
352
406
  caller: self,
353
407
  topic: topic.name,
@@ -66,7 +66,7 @@ module Karafka
66
66
  @subscription_group = subscription_group
67
67
  @buffer = RawMessagesBuffer.new
68
68
  @tick_interval = ::Karafka::App.config.internal.tick_interval
69
- @rebalance_manager = RebalanceManager.new(@subscription_group.id)
69
+ @rebalance_manager = RebalanceManager.new(@subscription_group.id, @buffer)
70
70
  @rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group)
71
71
 
72
72
  @interval_runner = Helpers::IntervalRunner.new do
@@ -141,7 +141,7 @@ module Karafka
141
141
  # Since rebalances do not occur often, we can run events polling as well without
142
142
  # any throttling
143
143
  events_poll
144
- remove_revoked_and_duplicated_messages
144
+
145
145
  break
146
146
  end
147
147
 
@@ -717,20 +717,6 @@ module Karafka
717
717
  consumer
718
718
  end
719
719
 
720
- # We may have a case where in the middle of data polling, we've lost a partition.
721
- # In a case like this we should remove all the pre-buffered messages from list partitions as
722
- # we are no longer responsible in a given process for processing those messages and they
723
- # should have been picked up by a different process.
724
- def remove_revoked_and_duplicated_messages
725
- @rebalance_manager.lost_partitions.each do |topic, partitions|
726
- partitions.each do |partition|
727
- @buffer.delete(topic, partition)
728
- end
729
- end
730
-
731
- @buffer.uniq!
732
- end
733
-
734
720
  # @return [Rdkafka::Consumer] librdkafka consumer instance
735
721
  def kafka
736
722
  return @kafka if @kafka
@@ -2,7 +2,9 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # Manager for tracking changes in the partitions assignment after the assignment is done.
5
+ # Manager for tracking changes in the partitions assignment after the assignment is done and
6
+ # for ensuring, that proper buffer related operations that may be impacted by the rebalance
7
+ # state are applied.
6
8
  #
7
9
  # We need tracking of those to clean up consumers that will no longer process given partitions
8
10
  # as they were taken away.
@@ -30,13 +32,15 @@ module Karafka
30
32
  private_constant :EMPTY_ARRAY
31
33
 
32
34
  # @param subscription_group_id [String] subscription group id
35
+ # @param buffer [Karafka::Connection::RawMessagesBuffer]
33
36
  # @return [RebalanceManager]
34
- def initialize(subscription_group_id)
37
+ def initialize(subscription_group_id, buffer)
35
38
  @assigned_partitions = {}
36
39
  @revoked_partitions = {}
37
40
  @changed = false
38
41
  @active = false
39
42
  @subscription_group_id = subscription_group_id
43
+ @buffer = buffer
40
44
 
41
45
  # Connects itself to the instrumentation pipeline so rebalances can be tracked
42
46
  ::Karafka.monitor.subscribe(self)
@@ -64,17 +68,6 @@ module Karafka
64
68
  @active
65
69
  end
66
70
 
67
- # We consider as lost only partitions that were taken away and not re-assigned back to us
68
- def lost_partitions
69
- lost_partitions = {}
70
-
71
- revoked_partitions.each do |topic, partitions|
72
- lost_partitions[topic] = partitions - assigned_partitions.fetch(topic, EMPTY_ARRAY)
73
- end
74
-
75
- lost_partitions
76
- end
77
-
78
71
  # Callback that kicks in inside of rdkafka, when new partitions were assigned.
79
72
  #
80
73
  # @private
@@ -99,6 +92,24 @@ module Karafka
99
92
  @active = true
100
93
  @revoked_partitions = event[:tpl].to_h.transform_values { |part| part.map(&:partition) }
101
94
  @changed = true
95
+
96
+ remove_revoked_and_duplicated_messages
97
+ end
98
+
99
+ private
100
+
101
+ # We may have a case where in the middle of data polling, we've lost a partition.
102
+ # In a case like this we should remove all the pre-buffered messages from list partitions as
103
+ # we are no longer responsible in a given process for processing those messages and they
104
+ # should have been picked up by a different process.
105
+ def remove_revoked_and_duplicated_messages
106
+ @revoked_partitions.each do |topic, partitions|
107
+ partitions.each do |partition|
108
+ @buffer.delete(topic, partition)
109
+ end
110
+ end
111
+
112
+ @buffer.uniq!
102
113
  end
103
114
  end
104
115
  end
@@ -293,6 +293,9 @@ module Karafka
293
293
  when 'consumer.initialized.error'
294
294
  error "Consumer initialized error: #{error}"
295
295
  error details
296
+ when 'consumer.wrap.error'
297
+ error "Consumer wrap failed due to an error: #{error}"
298
+ error details
296
299
  when 'consumer.consume.error'
297
300
  error "Consumer consuming error: #{error}"
298
301
  error details
@@ -77,6 +77,9 @@ module Karafka
77
77
  consumer.shutting_down
78
78
  consumer.shutdown
79
79
 
80
+ consumer.wrap
81
+ consumer.wrapped
82
+
80
83
  dead_letter_queue.dispatched
81
84
 
82
85
  filtering.throttled
@@ -56,32 +56,7 @@ module Karafka
56
56
  consumer = job.executor.topic.consumer
57
57
  topic = job.executor.topic.name
58
58
 
59
- action = case job_type
60
- when 'Periodic'
61
- 'tick'
62
- when 'PeriodicNonBlocking'
63
- 'tick'
64
- when 'Shutdown'
65
- 'shutdown'
66
- when 'Revoked'
67
- 'revoked'
68
- when 'RevokedNonBlocking'
69
- 'revoked'
70
- when 'Idle'
71
- 'idle'
72
- when 'Eofed'
73
- 'eofed'
74
- when 'EofedNonBlocking'
75
- 'eofed'
76
- when 'ConsumeNonBlocking'
77
- 'consume'
78
- when 'Consume'
79
- 'consume'
80
- else
81
- raise Errors::UnsupportedCaseError, job_type
82
- end
83
-
84
- current_span.resource = "#{consumer}##{action}"
59
+ current_span.resource = "#{consumer}##{job.class.action}"
85
60
  info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
86
61
 
87
62
  pop_tags
@@ -26,6 +26,8 @@ module Karafka
26
26
  # @note It needs to be working with a proper consumer that will handle the partition
27
27
  # management. This layer of the framework knows nothing about Kafka messages consumption.
28
28
  class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
29
+ self.action = :consume
30
+
29
31
  # Makes this job non-blocking from the start
30
32
  # @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Consume`
31
33
  def initialize(*args)
@@ -20,6 +20,8 @@ module Karafka
20
20
  # to run this job for extended period of time. Under such scenarios, if we would not use
21
21
  # a non-blocking one, we would reach max.poll.interval.ms.
22
22
  class EofedNonBlocking < ::Karafka::Processing::Jobs::Eofed
23
+ self.action = :eofed
24
+
23
25
  # @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Eofed`
24
26
  def initialize(*args)
25
27
  super
@@ -18,6 +18,8 @@ module Karafka
18
18
  # Job that represents a "ticking" work. Work that we run periodically for the Periodics
19
19
  # enabled topics.
20
20
  class Periodic < ::Karafka::Processing::Jobs::Base
21
+ self.action = :tick
22
+
21
23
  # @param executor [Karafka::Pro::Processing::Executor] pro executor that is suppose to
22
24
  # run a given job
23
25
  def initialize(executor)
@@ -20,6 +20,8 @@ module Karafka
20
20
  # to run this job for extended period of time. Under such scenarios, if we would not use
21
21
  # a non-blocking one, we would reach max.poll.interval.ms.
22
22
  class PeriodicNonBlocking < Periodic
23
+ self.action = :tick
24
+
23
25
  # @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Periodic`
24
26
  def initialize(*args)
25
27
  super
@@ -24,6 +24,8 @@ module Karafka
24
24
  # in scenarios where there are more jobs than threads, without this being async we
25
25
  # would potentially stop polling
26
26
  class RevokedNonBlocking < ::Karafka::Processing::Jobs::Revoked
27
+ self.action = :revoked
28
+
27
29
  # Makes this job non-blocking from the start
28
30
  # @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Revoked`
29
31
  def initialize(*args)
@@ -69,8 +69,8 @@ module Karafka
69
69
  coordinator.revoke
70
70
  end
71
71
 
72
- Karafka.monitor.instrument('consumer.revoke', caller: self)
73
- Karafka.monitor.instrument('consumer.revoked', caller: self) do
72
+ monitor.instrument('consumer.revoke', caller: self)
73
+ monitor.instrument('consumer.revoked', caller: self) do
74
74
  revoked
75
75
  end
76
76
  ensure
@@ -54,8 +54,11 @@ module Karafka
54
54
  # already processed but rather at the next one. This applies to both sync and async
55
55
  # versions of this method.
56
56
  def mark_as_consumed(message, offset_metadata = @_current_offset_metadata)
57
+ # If we are inside a transaction than we can just mark as consumed within it
57
58
  if @_in_transaction
58
59
  mark_in_transaction(message, offset_metadata, true)
60
+ elsif @_in_transaction_marked
61
+ mark_in_memory(message)
59
62
  else
60
63
  # seek offset can be nil only in case `#seek` was invoked with offset reset request
61
64
  # In case like this we ignore marking
@@ -64,16 +67,15 @@ module Karafka
64
67
  return true if coordinator.seek_offset > message.offset
65
68
  return false if revoked?
66
69
 
67
- # If we have already marked this successfully in a transaction that was running
68
- # we should not mark it again with the client offset delegation but instead we should
69
- # just align the in-memory state
70
- if @_in_transaction_marked
71
- coordinator.seek_offset = message.offset + 1
70
+ # If we are not inside a transaction but this is a transactional topic, we mark with
71
+ # artificially created transaction
72
+ stored = if producer.transactional?
73
+ mark_with_transaction(message, offset_metadata, true)
74
+ else
75
+ client.mark_as_consumed(message, offset_metadata)
76
+ end
72
77
 
73
- return true
74
- end
75
-
76
- return revoked? unless client.mark_as_consumed(message, offset_metadata)
78
+ return revoked? unless stored
77
79
 
78
80
  coordinator.seek_offset = message.offset + 1
79
81
  end
@@ -92,6 +94,8 @@ module Karafka
92
94
  def mark_as_consumed!(message, offset_metadata = @_current_offset_metadata)
93
95
  if @_in_transaction
94
96
  mark_in_transaction(message, offset_metadata, false)
97
+ elsif @_in_transaction_marked
98
+ mark_in_memory(message)
95
99
  else
96
100
  # seek offset can be nil only in case `#seek` was invoked with offset reset request
97
101
  # In case like this we ignore marking
@@ -100,13 +104,15 @@ module Karafka
100
104
  return true if coordinator.seek_offset > message.offset
101
105
  return false if revoked?
102
106
 
103
- if @_in_transaction_marked
104
- coordinator.seek_offset = message.offset + 1
105
-
106
- return true
107
- end
107
+ # If we are not inside a transaction but this is a transactional topic, we mark with
108
+ # artificially created transaction
109
+ stored = if producer.transactional?
110
+ mark_with_transaction(message, offset_metadata, false)
111
+ else
112
+ client.mark_as_consumed!(message, offset_metadata)
113
+ end
108
114
 
109
- return revoked? unless client.mark_as_consumed!(message, offset_metadata)
115
+ return revoked? unless stored
110
116
 
111
117
  coordinator.seek_offset = message.offset + 1
112
118
  end
@@ -128,7 +134,7 @@ module Karafka
128
134
  # managing multiple producers. If not provided, default producer taken from `#producer`
129
135
  # will be used.
130
136
  #
131
- # @param block [Proc] code that we want to run in a transaction
137
+ # @yield code that we want to run in a transaction
132
138
  #
133
139
  # @note Please note, that if you provide the producer, it will reassign the producer of
134
140
  # the consumer for the transaction time. This means, that in case you would even
@@ -136,7 +142,7 @@ module Karafka
136
142
  # reassigned producer and not the initially used/assigned producer. It is done that
137
143
  # way, so the message producing aliases operate from within transactions and since the
138
144
  # producer in transaction is locked, it will prevent other threads from using it.
139
- def transaction(active_producer = producer, &block)
145
+ def transaction(active_producer = producer)
140
146
  default_producer = producer
141
147
  self.producer = active_producer
142
148
 
@@ -150,7 +156,14 @@ module Karafka
150
156
  @_in_transaction = true
151
157
  @_in_transaction_marked = false
152
158
 
153
- producer.transaction(&block)
159
+ producer.transaction do
160
+ yield
161
+
162
+ # Ensure this transaction is rolled back if we have lost the ownership of this
163
+ # transaction. We do it only for transactions that contain offset management as for
164
+ # producer only, this is not relevant.
165
+ raise Errors::AssignmentLostError if @_in_transaction_marked && revoked?
166
+ end
154
167
 
155
168
  @_in_transaction = false
156
169
 
@@ -174,6 +187,8 @@ module Karafka
174
187
  @_transaction_marked.each do |marking|
175
188
  marking.pop ? mark_as_consumed(*marking) : mark_as_consumed!(*marking)
176
189
  end
190
+
191
+ true
177
192
  ensure
178
193
  self.producer = default_producer
179
194
 
@@ -206,9 +221,55 @@ module Karafka
206
221
  @_transaction_marked << [message, offset_metadata, async]
207
222
  end
208
223
 
224
+ # @private
225
+ # @param message [Messages::Message] message we want to commit inside of a transaction
226
+ # @param offset_metadata [String, nil] offset metadata or nil if none
227
+ # @param async [Boolean] should we mark in async or sync way (applicable only to post
228
+ # transaction state synchronization usage as within transaction it is always sync)
229
+ # @return [Boolean] false if marking failed otherwise true
230
+ def mark_with_transaction(message, offset_metadata, async)
231
+ # This flag is used by VPs to differentiate between user initiated transactions and
232
+ # post-execution system transactions.
233
+ @_transaction_internal = true
234
+
235
+ transaction do
236
+ mark_in_transaction(message, offset_metadata, async)
237
+ end
238
+
239
+ true
240
+ # We handle both cases here because this is a private API for internal usage and we want
241
+ # the post-user code execution marking with transactional producer to result in a
242
+ # boolean state of marking for further framework flow. This is a normalization to make it
243
+ # behave the same way as it would behave with a non-transactional one
244
+ rescue ::Rdkafka::RdkafkaError, Errors::AssignmentLostError
245
+ false
246
+ ensure
247
+ @_transaction_internal = false
248
+ end
249
+
250
+ # Marks the current state only in memory as the offset marking has already happened
251
+ # using the producer transaction
252
+ # @param message [Messages::Message] last successfully processed message.
253
+ # @return [Boolean] true if all good, false if we lost assignment and no point in marking
254
+ def mark_in_memory(message)
255
+ # seek offset can be nil only in case `#seek` was invoked with offset reset request
256
+ # In case like this we ignore marking
257
+ return true if coordinator.seek_offset.nil?
258
+ # Ignore earlier offsets than the one we already committed
259
+ return true if coordinator.seek_offset > message.offset
260
+ return false if revoked?
261
+
262
+ # If we have already marked this successfully in a transaction that was running
263
+ # we should not mark it again with the client offset delegation but instead we should
264
+ # just align the in-memory state
265
+ coordinator.seek_offset = message.offset + 1
266
+
267
+ true
268
+ end
269
+
209
270
  # No actions needed for the standard flow here
210
271
  def handle_before_schedule_consume
211
- Karafka.monitor.instrument('consumer.before_schedule_consume', caller: self)
272
+ monitor.instrument('consumer.before_schedule_consume', caller: self)
212
273
 
213
274
  nil
214
275
  end
@@ -227,8 +288,8 @@ module Karafka
227
288
  # This can happen primarily when an LRJ job gets to the internal worker queue and
228
289
  # this partition is revoked prior processing.
229
290
  unless revoked?
230
- Karafka.monitor.instrument('consumer.consume', caller: self)
231
- Karafka.monitor.instrument('consumer.consumed', caller: self) do
291
+ monitor.instrument('consumer.consume', caller: self)
292
+ monitor.instrument('consumer.consumed', caller: self) do
232
293
  consume
233
294
  end
234
295
  end
@@ -274,8 +335,8 @@ module Karafka
274
335
  coordinator.revoke
275
336
  end
276
337
 
277
- Karafka.monitor.instrument('consumer.revoke', caller: self)
278
- Karafka.monitor.instrument('consumer.revoked', caller: self) do
338
+ monitor.instrument('consumer.revoke', caller: self)
339
+ monitor.instrument('consumer.revoked', caller: self) do
279
340
  revoked
280
341
  end
281
342
  ensure
@@ -284,15 +345,15 @@ module Karafka
284
345
 
285
346
  # No action needed for the tick standard flow
286
347
  def handle_before_schedule_tick
287
- Karafka.monitor.instrument('consumer.before_schedule_tick', caller: self)
348
+ monitor.instrument('consumer.before_schedule_tick', caller: self)
288
349
 
289
350
  nil
290
351
  end
291
352
 
292
353
  # Runs the consumer `#tick` method with reporting
293
354
  def handle_tick
294
- Karafka.monitor.instrument('consumer.tick', caller: self)
295
- Karafka.monitor.instrument('consumer.ticked', caller: self) do
355
+ monitor.instrument('consumer.tick', caller: self)
356
+ monitor.instrument('consumer.ticked', caller: self) do
296
357
  tick
297
358
  end
298
359
  ensure
@@ -119,7 +119,7 @@ module Karafka
119
119
  )
120
120
 
121
121
  # Notify about dispatch on the events bus
122
- Karafka.monitor.instrument(
122
+ monitor.instrument(
123
123
  'dead_letter_queue.dispatched',
124
124
  caller: self,
125
125
  message: skippable_message
@@ -89,7 +89,7 @@ module Karafka
89
89
 
90
90
  throttle_message = filter.cursor
91
91
 
92
- Karafka.monitor.instrument(
92
+ monitor.instrument(
93
93
  'filtering.seek',
94
94
  caller: self,
95
95
  message: throttle_message
@@ -104,7 +104,7 @@ module Karafka
104
104
 
105
105
  throttle_message = filter.cursor
106
106
 
107
- Karafka.monitor.instrument(
107
+ monitor.instrument(
108
108
  'filtering.throttled',
109
109
  caller: self,
110
110
  message: throttle_message,
@@ -72,8 +72,8 @@ module Karafka
72
72
  coordinator.revoke
73
73
  end
74
74
 
75
- Karafka.monitor.instrument('consumer.revoke', caller: self)
76
- Karafka.monitor.instrument('consumer.revoked', caller: self) do
75
+ monitor.instrument('consumer.revoke', caller: self)
76
+ monitor.instrument('consumer.revoked', caller: self) do
77
77
  revoked
78
78
  end
79
79
  ensure
@@ -64,8 +64,8 @@ module Karafka
64
64
  coordinator.revoke
65
65
  end
66
66
 
67
- Karafka.monitor.instrument('consumer.revoke', caller: self)
68
- Karafka.monitor.instrument('consumer.revoked', caller: self) do
67
+ monitor.instrument('consumer.revoke', caller: self)
68
+ monitor.instrument('consumer.revoked', caller: self) do
69
69
  revoked
70
70
  end
71
71
  ensure
@@ -101,6 +101,9 @@ module Karafka
101
101
  raise Errors::AssignmentLostError if revoked?
102
102
 
103
103
  return super if collapsed?
104
+ # If this is user post-execution transaction (one initiated by the system) we should
105
+ # delegate to the original implementation that will store the offset via the producer
106
+ return super if @_transaction_internal
104
107
 
105
108
  @_transaction_marked << [message, offset_metadata, async]
106
109
  end
@@ -81,6 +81,13 @@ module Karafka
81
81
  consumer.on_before_consume
82
82
  end
83
83
 
84
+ # Runs the wrap/around execution context appropriate for a given action
85
+ # @param action [Symbol] action execution wrapped with our block
86
+ # @param block [Proc] execution context
87
+ def wrap(action, &block)
88
+ consumer.on_wrap(action, &block)
89
+ end
90
+
84
91
  # Runs consumer data processing against given batch and handles failures and errors.
85
92
  def consume
86
93
  # We run the consumer client logic...
@@ -15,6 +15,11 @@ module Karafka
15
15
 
16
16
  attr_reader :executor
17
17
 
18
+ class << self
19
+ # @return [Symbol] Job matching appropriate action
20
+ attr_accessor :action
21
+ end
22
+
18
23
  # Creates a new job instance
19
24
  def initialize
20
25
  # All jobs are blocking by default and they can release the lock when blocking operations
@@ -23,6 +28,14 @@ module Karafka
23
28
  @status = :pending
24
29
  end
25
30
 
31
+ # Runs the wrap/around job hook within which the rest of the flow happens
32
+ # @param block [Proc] whole user related processing flow
33
+ # @note We inject the action name so user can decide whether to run custom logic on a
34
+ # given action or not.
35
+ def wrap(&block)
36
+ executor.wrap(self.class.action, &block)
37
+ end
38
+
26
39
  # When redefined can run any code prior to the job being scheduled
27
40
  # @note This will run in the listener thread and not in the worker
28
41
  def before_schedule
@@ -9,6 +9,8 @@ module Karafka
9
9
  # @return [Array<Rdkafka::Consumer::Message>] array with messages
10
10
  attr_reader :messages
11
11
 
12
+ self.action = :consume
13
+
12
14
  # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
13
15
  # job
14
16
  # @param messages [Karafka::Messages::Messages] karafka messages batch
@@ -5,6 +5,8 @@ module Karafka
5
5
  module Jobs
6
6
  # Job that runs the eofed operation when we receive eof without messages alongside.
7
7
  class Eofed < Base
8
+ self.action = :eofed
9
+
8
10
  # @param executor [Karafka::Processing::Executor] executor that is suppose to run the job
9
11
  # @return [Eofed]
10
12
  def initialize(executor)
@@ -6,6 +6,8 @@ module Karafka
6
6
  # Type of job that we may use to run some extra handling that happens without the user
7
7
  # related lifecycle event like consumption, revocation, etc.
8
8
  class Idle < Base
9
+ self.action = :idle
10
+
9
11
  # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
10
12
  # job on an active consumer
11
13
  # @return [Shutdown]
@@ -5,6 +5,8 @@ module Karafka
5
5
  module Jobs
6
6
  # Job that runs the revoked operation when we loose a partition on a consumer that lost it.
7
7
  class Revoked < Base
8
+ self.action = :revoked
9
+
8
10
  # @param executor [Karafka::Processing::Executor] executor that is suppose to run the job
9
11
  # @return [Revoked]
10
12
  def initialize(executor)
@@ -5,6 +5,8 @@ module Karafka
5
5
  module Jobs
6
6
  # Job that runs on each active consumer upon process shutdown (one job per consumer).
7
7
  class Shutdown < Base
8
+ self.action = :shutdown
9
+
8
10
  # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
9
11
  # job on an active consumer
10
12
  # @return [Shutdown]
@@ -24,7 +24,7 @@ module Karafka
24
24
  class_eval <<~RUBY, __FILE__, __LINE__ + 1
25
25
  # No actions needed for the standard flow here
26
26
  def handle_before_schedule_#{action}
27
- Karafka.monitor.instrument('consumer.before_schedule_#{action}', caller: self)
27
+ monitor.instrument('consumer.before_schedule_#{action}', caller: self)
28
28
 
29
29
  nil
30
30
  end
@@ -35,8 +35,8 @@ module Karafka
35
35
  # @note It runs in the listener loop. Should **not** be used for anything heavy or
36
36
  # with any potential errors. Mostly for initialization of states, etc.
37
37
  def handle_initialized
38
- Karafka.monitor.instrument('consumer.initialize', caller: self)
39
- Karafka.monitor.instrument('consumer.initialized', caller: self) do
38
+ monitor.instrument('consumer.initialize', caller: self)
39
+ monitor.instrument('consumer.initialized', caller: self) do
40
40
  initialized
41
41
  end
42
42
  end
@@ -115,10 +115,21 @@ module Karafka
115
115
  coordinator.pause_tracker.increment
116
116
  end
117
117
 
118
+ # Runs the wrapping to execute appropriate action wrapped with the wrapper method code
119
+ #
120
+ # @param action [Symbol]
121
+ # @param block [Proc]
122
+ def handle_wrap(action, &block)
123
+ monitor.instrument('consumer.wrap', caller: self)
124
+ monitor.instrument('consumer.wrapped', caller: self) do
125
+ wrap(action, &block)
126
+ end
127
+ end
128
+
118
129
  # Run the user consumption code
119
130
  def handle_consume
120
- Karafka.monitor.instrument('consumer.consume', caller: self)
121
- Karafka.monitor.instrument('consumer.consumed', caller: self) do
131
+ monitor.instrument('consumer.consume', caller: self)
132
+ monitor.instrument('consumer.consumed', caller: self) do
122
133
  consume
123
134
  end
124
135
 
@@ -164,8 +175,8 @@ module Karafka
164
175
 
165
176
  # Runs the consumer `#eofed` method with reporting
166
177
  def handle_eofed
167
- Karafka.monitor.instrument('consumer.eof', caller: self)
168
- Karafka.monitor.instrument('consumer.eofed', caller: self) do
178
+ monitor.instrument('consumer.eof', caller: self)
179
+ monitor.instrument('consumer.eofed', caller: self) do
169
180
  eofed
170
181
  end
171
182
  ensure
@@ -180,8 +191,8 @@ module Karafka
180
191
 
181
192
  coordinator.revoke
182
193
 
183
- Karafka.monitor.instrument('consumer.revoke', caller: self)
184
- Karafka.monitor.instrument('consumer.revoked', caller: self) do
194
+ monitor.instrument('consumer.revoke', caller: self)
195
+ monitor.instrument('consumer.revoked', caller: self) do
185
196
  revoked
186
197
  end
187
198
  ensure
@@ -190,8 +201,8 @@ module Karafka
190
201
 
191
202
  # Runs the shutdown code
192
203
  def handle_shutdown
193
- Karafka.monitor.instrument('consumer.shutting_down', caller: self)
194
- Karafka.monitor.instrument('consumer.shutdown', caller: self) do
204
+ monitor.instrument('consumer.shutting_down', caller: self)
205
+ monitor.instrument('consumer.shutdown', caller: self) do
195
206
  shutdown
196
207
  end
197
208
  ensure
@@ -117,7 +117,7 @@ module Karafka
117
117
  )
118
118
 
119
119
  # Notify about dispatch on the events bus
120
- Karafka.monitor.instrument(
120
+ monitor.instrument(
121
121
  'dead_letter_queue.dispatched',
122
122
  caller: self,
123
123
  message: skippable_message
@@ -19,7 +19,8 @@ module Karafka
19
19
  class Worker
20
20
  include Helpers::Async
21
21
  include Helpers::ConfigImporter.new(
22
- worker_job_call_wrapper: %i[internal processing worker_job_call_wrapper]
22
+ worker_job_call_wrapper: %i[internal processing worker_job_call_wrapper],
23
+ monitor: %i[monitor]
23
24
  )
24
25
 
25
26
  # @return [String] id of this worker
@@ -53,28 +54,30 @@ module Karafka
53
54
  instrument_details = { caller: self, job: job, jobs_queue: @jobs_queue }
54
55
 
55
56
  if job
56
- Karafka.monitor.instrument('worker.process', instrument_details)
57
+ job.wrap do
58
+ monitor.instrument('worker.process', instrument_details)
57
59
 
58
- Karafka.monitor.instrument('worker.processed', instrument_details) do
59
- job.before_call
60
+ monitor.instrument('worker.processed', instrument_details) do
61
+ job.before_call
60
62
 
61
- # If a job is marked as non blocking, we can run a tick in the job queue and if there
62
- # are no other blocking factors, the job queue will be unlocked.
63
- # If this does not run, all the things will be blocking and job queue won't allow to
64
- # pass it until done.
65
- @jobs_queue.tick(job.group_id) if job.non_blocking?
63
+ # If a job is marked as non blocking, we can run a tick in the job queue and if there
64
+ # are no other blocking factors, the job queue will be unlocked.
65
+ # If this does not run, all the things will be blocking and job queue won't allow to
66
+ # pass it until done.
67
+ @jobs_queue.tick(job.group_id) if job.non_blocking?
66
68
 
67
- if @non_wrapped_flow
68
- job.call
69
- else
70
- worker_job_call_wrapper.wrap do
69
+ if @non_wrapped_flow
71
70
  job.call
71
+ else
72
+ worker_job_call_wrapper.wrap do
73
+ job.call
74
+ end
72
75
  end
73
- end
74
76
 
75
- job.after_call
77
+ job.after_call
76
78
 
77
- true
79
+ true
80
+ end
78
81
  end
79
82
  else
80
83
  false
@@ -83,7 +86,7 @@ module Karafka
83
86
  # rubocop:disable Lint/RescueException
84
87
  rescue Exception => e
85
88
  # rubocop:enable Lint/RescueException
86
- Karafka.monitor.instrument(
89
+ monitor.instrument(
87
90
  'error.occurred',
88
91
  caller: self,
89
92
  job: job,
@@ -99,7 +102,7 @@ module Karafka
99
102
  end
100
103
 
101
104
  # Always publish info, that we completed all the work despite its result
102
- Karafka.monitor.instrument('worker.completed', instrument_details)
105
+ monitor.instrument('worker.completed', instrument_details)
103
106
  end
104
107
  end
105
108
  end
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.4.15'
6
+ VERSION = '2.4.16'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,11 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.15
4
+ version: 2.4.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain:
11
10
  - |
@@ -35,7 +34,7 @@ cert_chain:
35
34
  i9zWxov0mr44TWegTVeypcWGd/0nxu1+QHVNHJrpqlPBRvwQsUm7fwmRInGpcaB8
36
35
  ap8wNYvryYzrzvzUxIVFBVM5PacgkFqRmolCa8I7tdKQN+R1
37
36
  -----END CERTIFICATE-----
38
- date: 2024-12-04 00:00:00.000000000 Z
37
+ date: 2024-12-27 00:00:00.000000000 Z
39
38
  dependencies:
40
39
  - !ruby/object:Gem::Dependency
41
40
  name: base64
@@ -605,7 +604,6 @@ metadata:
605
604
  source_code_uri: https://github.com/karafka/karafka
606
605
  documentation_uri: https://karafka.io/docs
607
606
  rubygems_mfa_required: 'true'
608
- post_install_message:
609
607
  rdoc_options: []
610
608
  require_paths:
611
609
  - lib
@@ -620,8 +618,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
620
618
  - !ruby/object:Gem::Version
621
619
  version: '0'
622
620
  requirements: []
623
- rubygems_version: 3.5.22
624
- signing_key:
621
+ rubygems_version: 3.6.2
625
622
  specification_version: 4
626
623
  summary: Karafka is Ruby and Rails efficient Kafka processing framework.
627
624
  test_files: []
metadata.gz.sig CHANGED
Binary file