karafka 2.1.4 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +28 -0
  4. data/Gemfile.lock +20 -20
  5. data/karafka.gemspec +2 -2
  6. data/lib/karafka/admin.rb +37 -4
  7. data/lib/karafka/base_consumer.rb +21 -5
  8. data/lib/karafka/connection/client.rb +118 -95
  9. data/lib/karafka/connection/rebalance_manager.rb +2 -4
  10. data/lib/karafka/errors.rb +4 -1
  11. data/lib/karafka/messages/builders/message.rb +0 -3
  12. data/lib/karafka/messages/seek.rb +3 -0
  13. data/lib/karafka/patches/rdkafka/bindings.rb +4 -6
  14. data/lib/karafka/pro/iterator/expander.rb +95 -0
  15. data/lib/karafka/pro/iterator/tpl_builder.rb +145 -0
  16. data/lib/karafka/pro/iterator.rb +2 -87
  17. data/lib/karafka/pro/processing/filters_applier.rb +1 -0
  18. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +3 -1
  19. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +3 -1
  20. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
  21. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +3 -1
  22. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +3 -1
  23. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +4 -1
  24. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +2 -2
  25. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +2 -2
  26. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +2 -1
  27. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -1
  28. data/lib/karafka/pro/processing/strategies/ftr/default.rb +8 -1
  29. data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
  30. data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +2 -2
  31. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +2 -2
  32. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +3 -1
  33. data/lib/karafka/pro/processing/virtual_offset_manager.rb +1 -1
  34. data/lib/karafka/processing/coordinator.rb +14 -0
  35. data/lib/karafka/processing/strategies/default.rb +27 -11
  36. data/lib/karafka/railtie.rb +2 -2
  37. data/lib/karafka/setup/attributes_map.rb +1 -0
  38. data/lib/karafka/version.rb +1 -1
  39. data.tar.gz.sig +0 -0
  40. metadata +10 -9
  41. metadata.gz.sig +0 -0
  42. data/lib/karafka/patches/rdkafka/consumer.rb +0 -22
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e54dc80fd5a3a93857fd6a3a175030f7cb2eee1f77668d6936b705b0789c4228
4
- data.tar.gz: e7f3b02811432987048bc599c2c4a6f0444fe13d98ca4aa8e87cebd5aaa8037a
3
+ metadata.gz: a6994a6d579728a877f84c87086d093aae8a1f830b891fcb4904883085432fe4
4
+ data.tar.gz: 13b21009a471194a72971ca81ddc718e044bb96587db0e8f186974f554e9ec62
5
5
  SHA512:
6
- metadata.gz: 871ed7a55421041b1b232c47b3da0f625b8f2114c00057daf883223d3da4dc16637c9f0b4714cebe4a5f33249beac9bd7abbe15ec122f5b1a45a3c953dcb1465
7
- data.tar.gz: ac2cc50c3277bd0bac7e3a2af40cc6d5a9c3aa92fc8cc3d2b12b8645329d269d0ed0d80c462d2fa6a03a677213e84802a1dfe8fdc1c494348b1a8b4fd9ef8459
6
+ metadata.gz: e4711880bde1d2cd1cb34959f740459979b74ff4d28a671a232f88adbe7473cf67e366fc2b492fac761c572f3a6dfc147a59d46fc08e1c5e18df8ac5f108afdd
7
+ data.tar.gz: c094600c2bd421ce309c0125d60ea82ed0106d5ce4566b3bb8c1aab13c553e7bd2f6651b98029e42ac831b132563b2c502dd1c76defbf8307cd9bd2393b258f7
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,33 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.1.6 (2023-06-29)
4
+ - [Improvement] Provide time support for iterator
5
+ - [Improvement] Provide time support for admin `#read_topic`
6
+ - [Improvement] Provide time support for consumer `#seek`.
7
+ - [Improvement] Remove no longer needed locks for client operations.
8
+ - [Improvement] Raise `Karafka::Errors::TopicNotFoundError` when trying to iterate over non-existing topic.
9
+ - [Improvement] Ensure that Kafka multi-command operations run under mutex together.
10
+ - [Change] Require `waterdrop` `>= 2.6.2`
11
+ - [Change] Require `karafka-core` `>= 2.1.1`
12
+ - [Refactor] Clean-up iterator code.
13
+ - [Fix] Improve performance in dev environment for a Rails app (juike)
14
+ - [Fix] Rename `InvalidRealOffsetUsage` to `InvalidRealOffsetUsageError` to align with naming of other errors.
15
+ - [Fix] Fix unstable spec.
16
+ - [Fix] Fix a case where automatic `#seek` would overwrite manual seek of a user when running LRJ.
17
+ - [Fix] Make sure, that user direct `#seek` and `#pause` operations take precedence over system actions.
18
+ - [Fix] Make sure, that `#pause` and `#resume` with one underlying connection do not race-condition.
19
+
20
+ ## 2.1.5 (2023-06-19)
21
+ - [Improvement] Drastically improve `#revoked?` response quality by checking the real time assignment lost state on librdkafka.
22
+ - [Improvement] Improve eviction of saturated jobs that would run on already revoked assignments.
23
+ - [Improvement] Expose `#commit_offsets` and `#commit_offsets!` methods in the consumer to provide ability to commit offsets directly to Kafka without having to mark new messages as consumed.
24
+ - [Improvement] No longer skip offset commit when no messages marked as consumed as `librdkafka` has fixed the crashes there.
25
+ - [Improvement] Remove no longer needed patches.
26
+ - [Improvement] Ensure, that the coordinator revocation status is switched upon revocation detection when using `#revoked?`
27
+ - [Improvement] Add benchmarks for marking as consumed (sync and async).
28
+ - [Change] Require `karafka-core` `>= 2.1.0`
29
+ - [Change] Require `waterdrop` `>= 2.6.1`
30
+
3
31
  ## 2.1.4 (2023-06-06)
4
32
  - [Fix] `processing_lag` and `consumption_lag` on empty batch fail on shutdown usage (#1475)
5
33
 
data/Gemfile.lock CHANGED
@@ -1,19 +1,19 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.1.4)
5
- karafka-core (>= 2.0.13, < 3.0.0)
4
+ karafka (2.1.6)
5
+ karafka-core (>= 2.1.1, < 2.2.0)
6
6
  thor (>= 0.20)
7
- waterdrop (>= 2.5.3, < 3.0.0)
7
+ waterdrop (>= 2.6.2, < 3.0.0)
8
8
  zeitwerk (~> 2.3)
9
9
 
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- activejob (7.0.4.3)
14
- activesupport (= 7.0.4.3)
13
+ activejob (7.0.5)
14
+ activesupport (= 7.0.5)
15
15
  globalid (>= 0.3.6)
16
- activesupport (7.0.4.3)
16
+ activesupport (7.0.5)
17
17
  concurrent-ruby (~> 1.0, >= 1.0.2)
18
18
  i18n (>= 1.6, < 2)
19
19
  minitest (>= 5.1)
@@ -28,26 +28,26 @@ GEM
28
28
  ffi (1.15.5)
29
29
  globalid (1.1.0)
30
30
  activesupport (>= 5.0)
31
- i18n (1.13.0)
31
+ i18n (1.14.1)
32
32
  concurrent-ruby (~> 1.0)
33
- karafka-core (2.0.13)
33
+ karafka-core (2.1.1)
34
34
  concurrent-ruby (>= 1.1)
35
- karafka-rdkafka (>= 0.12.3)
36
- karafka-rdkafka (0.12.3)
35
+ karafka-rdkafka (>= 0.13.1, < 0.14.0)
36
+ karafka-rdkafka (0.13.1)
37
37
  ffi (~> 1.15)
38
38
  mini_portile2 (~> 2.6)
39
39
  rake (> 12)
40
- karafka-web (0.5.2)
40
+ karafka-web (0.6.1)
41
41
  erubi (~> 1.4)
42
- karafka (>= 2.0.40, < 3.0.0)
43
- karafka-core (>= 2.0.12, < 3.0.0)
44
- roda (~> 3.63)
42
+ karafka (>= 2.1.4, < 3.0.0)
43
+ karafka-core (>= 2.0.13, < 3.0.0)
44
+ roda (~> 3.68, >= 3.68)
45
45
  tilt (~> 2.0)
46
46
  mini_portile2 (2.8.2)
47
- minitest (5.18.0)
48
- rack (3.0.7)
47
+ minitest (5.18.1)
48
+ rack (3.0.8)
49
49
  rake (13.0.6)
50
- roda (3.68.0)
50
+ roda (3.69.0)
51
51
  rack
52
52
  rspec (3.12.0)
53
53
  rspec-core (~> 3.12.0)
@@ -69,11 +69,11 @@ GEM
69
69
  simplecov-html (0.12.3)
70
70
  simplecov_json_formatter (0.1.4)
71
71
  thor (1.2.2)
72
- tilt (2.1.0)
72
+ tilt (2.2.0)
73
73
  tzinfo (2.0.6)
74
74
  concurrent-ruby (~> 1.0)
75
- waterdrop (2.5.3)
76
- karafka-core (>= 2.0.13, < 3.0.0)
75
+ waterdrop (2.6.2)
76
+ karafka-core (>= 2.1.0, < 3.0.0)
77
77
  zeitwerk (~> 2.3)
78
78
  zeitwerk (2.6.8)
79
79
 
data/karafka.gemspec CHANGED
@@ -21,9 +21,9 @@ Gem::Specification.new do |spec|
21
21
  without having to focus on things that are not your business domain.
22
22
  DESC
23
23
 
24
- spec.add_dependency 'karafka-core', '>= 2.0.13', '< 3.0.0'
24
+ spec.add_dependency 'karafka-core', '>= 2.1.1', '< 2.2.0'
25
25
  spec.add_dependency 'thor', '>= 0.20'
26
- spec.add_dependency 'waterdrop', '>= 2.5.3', '< 3.0.0'
26
+ spec.add_dependency 'waterdrop', '>= 2.6.2', '< 3.0.0'
27
27
  spec.add_dependency 'zeitwerk', '~> 2.3'
28
28
 
29
29
  if $PROGRAM_NAME.end_with?('gem')
data/lib/karafka/admin.rb CHANGED
@@ -18,6 +18,9 @@ module Karafka
18
18
  # retry after checking that the operation was finished or failed using external factor.
19
19
  MAX_WAIT_TIMEOUT = 1
20
20
 
21
+ # Max time for a TPL request. We increase it to compensate for remote clusters latency
22
+ TPL_REQUEST_TIMEOUT = 2_000
23
+
21
24
  # How many times should be try. 1 x 60 => 60 seconds wait in total
22
25
  MAX_ATTEMPTS = 60
23
26
 
@@ -34,7 +37,8 @@ module Karafka
34
37
  'enable.auto.commit': false
35
38
  }.freeze
36
39
 
37
- private_constant :Topic, :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :MAX_ATTEMPTS
40
+ private_constant :Topic, :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :TPL_REQUEST_TIMEOUT,
41
+ :MAX_ATTEMPTS
38
42
 
39
43
  class << self
40
44
  # Allows us to read messages from the topic
@@ -42,8 +46,9 @@ module Karafka
42
46
  # @param name [String, Symbol] topic name
43
47
  # @param partition [Integer] partition
44
48
  # @param count [Integer] how many messages we want to get at most
45
- # @param start_offset [Integer] offset from which we should start. If -1 is provided
46
- # (default) we will start from the latest offset
49
+ # @param start_offset [Integer, Time] offset from which we should start. If -1 is provided
50
+ # (default) we will start from the latest offset. If time is provided, the appropriate
51
+ # offset will be resolved.
47
52
  # @param settings [Hash] kafka extra settings (optional)
48
53
  #
49
54
  # @return [Array<Karafka::Messages::Message>] array with messages
@@ -53,6 +58,9 @@ module Karafka
53
58
  low_offset, high_offset = nil
54
59
 
55
60
  with_consumer(settings) do |consumer|
61
+ # Convert the time offset (if needed)
62
+ start_offset = resolve_offset(consumer, name.to_s, partition, start_offset)
63
+
56
64
  low_offset, high_offset = consumer.query_watermark_offsets(name, partition)
57
65
 
58
66
  # Select offset dynamically if -1 or less
@@ -171,7 +179,9 @@ module Karafka
171
179
  # @return [Rdkafka::Metadata] cluster metadata info
172
180
  def cluster_info
173
181
  with_admin do |admin|
174
- Rdkafka::Metadata.new(admin.instance_variable_get('@native_kafka'))
182
+ admin.instance_variable_get('@native_kafka').with_inner do |inner|
183
+ Rdkafka::Metadata.new(inner)
184
+ end
175
185
  end
176
186
  end
177
187
 
@@ -241,6 +251,29 @@ module Karafka
241
251
 
242
252
  ::Rdkafka::Config.new(config_hash)
243
253
  end
254
+
255
+ # Resolves the offset if offset is in a time format. Otherwise returns the offset without
256
+ # resolving.
257
+ # @param consumer [::Rdkafka::Consumer]
258
+ # @param name [String, Symbol] expected topic name
259
+ # @param partition [Integer]
260
+ # @param offset [Integer, Time]
261
+ # @return [Integer] expected offset
262
+ def resolve_offset(consumer, name, partition, offset)
263
+ if offset.is_a?(Time)
264
+ tpl = ::Rdkafka::Consumer::TopicPartitionList.new
265
+ tpl.add_topic_and_partitions_with_offsets(
266
+ name, partition => offset
267
+ )
268
+
269
+ real_offsets = consumer.offsets_for_times(tpl, TPL_REQUEST_TIMEOUT)
270
+ detected_offset = real_offsets.to_h.dig(name, partition)
271
+
272
+ detected_offset&.offset || raise(Errors::InvalidTimeBasedOffsetError)
273
+ else
274
+ offset
275
+ end
276
+ end
244
277
  end
245
278
  end
246
279
  end
@@ -70,6 +70,7 @@ module Karafka
70
70
  #
71
71
  # @return [Boolean] true if there was no exception, otherwise false.
72
72
  #
73
+ # @private
73
74
  # @note We keep the seek offset tracking, and use it to compensate for async offset flushing
74
75
  # that may not yet kick in when error occurs. That way we pause always on the last processed
75
76
  # message.
@@ -203,8 +204,15 @@ module Karafka
203
204
 
204
205
  # Seeks in the context of current topic and partition
205
206
  #
206
- # @param offset [Integer] offset where we want to seek
207
- def seek(offset)
207
+ # @param offset [Integer, Time] offset where we want to seek or time of the offset where we
208
+ # want to seek.
209
+ # @param manual_seek [Boolean] Flag to differentiate between user seek and system/strategy
210
+ # based seek. User seek operations should take precedence over system actions, hence we need
211
+ # to know who invoked it.
212
+ # @note Please note, that if you are seeking to a time offset, getting the offset is blocking
213
+ def seek(offset, manual_seek = true)
214
+ coordinator.manual_seek if manual_seek
215
+
208
216
  client.seek(
209
217
  Karafka::Messages::Seek.new(
210
218
  topic.name,
@@ -215,10 +223,18 @@ module Karafka
215
223
  end
216
224
 
217
225
  # @return [Boolean] true if partition was revoked from the current consumer
218
- # @note We know that partition got revoked because when we try to mark message as consumed,
219
- # unless if is successful, it will return false
226
+ # @note There are two "levels" on which we can know that partition was revoked. First one is
227
+ # when we loose the assignment involuntarily and second is when coordinator gets this info
228
+ # after we poll with the rebalance callbacks. The first check allows us to get this notion
229
+ # even before we poll but it gets reset when polling happens, hence we also need to switch
230
+ # the coordinator state after the revocation (but prior to running more jobs)
220
231
  def revoked?
221
- coordinator.revoked?
232
+ return true if coordinator.revoked?
233
+ return false unless client.assignment_lost?
234
+
235
+ coordinator.revoke
236
+
237
+ true
222
238
  end
223
239
 
224
240
  # @return [Boolean] are we retrying processing after an error. This can be used to provide a
@@ -20,11 +20,14 @@ module Karafka
20
20
  # How many times should we retry polling in case of a failure
21
21
  MAX_POLL_RETRIES = 20
22
22
 
23
+ # Max time for a TPL request. We increase it to compensate for remote clusters latency
24
+ TPL_REQUEST_TIMEOUT = 2_000
25
+
23
26
  # We want to make sure we never close several clients in the same moment to prevent
24
27
  # potential race conditions and other issues
25
28
  SHUTDOWN_MUTEX = Mutex.new
26
29
 
27
- private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX
30
+ private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX, :TPL_REQUEST_TIMEOUT
28
31
 
29
32
  # Creates a new consumer instance.
30
33
  #
@@ -35,15 +38,16 @@ module Karafka
35
38
  @id = SecureRandom.hex(6)
36
39
  # Name is set when we build consumer
37
40
  @name = ''
38
- @mutex = Mutex.new
39
41
  @closed = false
40
42
  @subscription_group = subscription_group
41
43
  @buffer = RawMessagesBuffer.new
42
44
  @rebalance_manager = RebalanceManager.new
43
45
  @kafka = build_consumer
44
- # Marks if we need to offset. If we did not store offsets, we should not commit the offset
45
- # position as it will crash rdkafka
46
- @offsetting = false
46
+ # There are few operations that can happen in parallel from the listener threads as well
47
+ # as from the workers. They are not fully thread-safe because they may be composed out of
48
+ # few calls to Kafka or out of few internal state changes. That is why we mutex them.
49
+ # It mostly revolves around pausing and resuming.
50
+ @mutex = Mutex.new
47
51
  # We need to keep track of what we have paused for resuming
48
52
  # In case we loose partition, we still need to resume it, otherwise it won't be fetched
49
53
  # again if we get reassigned to it later on. We need to keep them as after revocation we
@@ -104,13 +108,15 @@ module Karafka
104
108
  #
105
109
  # @param message [Karafka::Messages::Message]
106
110
  def store_offset(message)
107
- @mutex.synchronize do
108
- internal_store_offset(message)
109
- end
111
+ internal_store_offset(message)
112
+ end
113
+
114
+ # @return [Boolean] true if our current assignment has been lost involuntarily.
115
+ def assignment_lost?
116
+ @kafka.assignment_lost?
110
117
  end
111
118
 
112
119
  # Commits the offset on a current consumer in a non-blocking or blocking way.
113
- # Ignoring a case where there would not be an offset (for example when rebalance occurs).
114
120
  #
115
121
  # @param async [Boolean] should the commit happen async or sync (async by default)
116
122
  # @return [Boolean] did committing was successful. It may be not, when we no longer own
@@ -118,13 +124,13 @@ module Karafka
118
124
  #
119
125
  # @note This will commit all the offsets for the whole consumer. In order to achieve
120
126
  # granular control over where the offset should be for particular topic partitions, the
121
- # store_offset should be used to only store new offset when we want to to be flushed
127
+ # store_offset should be used to only store new offset when we want them to be flushed
128
+ #
129
+ # @note This method for async may return `true` despite involuntary partition revocation as
130
+ # it does **not** resolve to `lost_assignment?`. It returns only the commit state operation
131
+ # result.
122
132
  def commit_offsets(async: true)
123
- @mutex.lock
124
-
125
133
  internal_commit_offsets(async: async)
126
- ensure
127
- @mutex.unlock
128
134
  end
129
135
 
130
136
  # Commits offset in a synchronous way.
@@ -137,13 +143,11 @@ module Karafka
137
143
  # Seek to a particular message. The next poll on the topic/partition will return the
138
144
  # message at the given offset.
139
145
  #
140
- # @param message [Messages::Message, Messages::Seek] message to which we want to seek to
146
+ # @param message [Messages::Message, Messages::Seek] message to which we want to seek to.
147
+ # It can have the time based offset.
148
+ # @note Please note, that if you are seeking to a time offset, getting the offset is blocking
141
149
  def seek(message)
142
- @mutex.lock
143
-
144
- @kafka.seek(message)
145
- ensure
146
- @mutex.unlock
150
+ @mutex.synchronize { internal_seek(message) }
147
151
  end
148
152
 
149
153
  # Pauses given partition and moves back to last successful offset processed.
@@ -154,37 +158,34 @@ module Karafka
154
158
  # be reprocessed after getting back to processing)
155
159
  # @note This will pause indefinitely and requires manual `#resume`
156
160
  def pause(topic, partition, offset)
157
- @mutex.lock
158
-
159
- # Do not pause if the client got closed, would not change anything
160
- return if @closed
161
-
162
- pause_msg = Messages::Seek.new(topic, partition, offset)
161
+ @mutex.synchronize do
162
+ # Do not pause if the client got closed, would not change anything
163
+ return if @closed
163
164
 
164
- internal_commit_offsets(async: true)
165
+ pause_msg = Messages::Seek.new(topic, partition, offset)
165
166
 
166
- # Here we do not use our cached tpls because we should not try to pause something we do
167
- # not own anymore.
168
- tpl = topic_partition_list(topic, partition)
167
+ internal_commit_offsets(async: true)
169
168
 
170
- return unless tpl
169
+ # Here we do not use our cached tpls because we should not try to pause something we do
170
+ # not own anymore.
171
+ tpl = topic_partition_list(topic, partition)
171
172
 
172
- Karafka.monitor.instrument(
173
- 'client.pause',
174
- caller: self,
175
- subscription_group: @subscription_group,
176
- topic: topic,
177
- partition: partition,
178
- offset: offset
179
- )
173
+ return unless tpl
180
174
 
181
- @paused_tpls[topic][partition] = tpl
175
+ Karafka.monitor.instrument(
176
+ 'client.pause',
177
+ caller: self,
178
+ subscription_group: @subscription_group,
179
+ topic: topic,
180
+ partition: partition,
181
+ offset: offset
182
+ )
182
183
 
183
- @kafka.pause(tpl)
184
+ @paused_tpls[topic][partition] = tpl
184
185
 
185
- @kafka.seek(pause_msg)
186
- ensure
187
- @mutex.unlock
186
+ @kafka.pause(tpl)
187
+ internal_seek(pause_msg)
188
+ end
188
189
  end
189
190
 
190
191
  # Resumes processing of a give topic partition after it was paused.
@@ -192,33 +193,31 @@ module Karafka
192
193
  # @param topic [String] topic name
193
194
  # @param partition [Integer] partition
194
195
  def resume(topic, partition)
195
- @mutex.lock
196
-
197
- return if @closed
196
+ @mutex.synchronize do
197
+ return if @closed
198
198
 
199
- # We now commit offsets on rebalances, thus we can do it async just to make sure
200
- internal_commit_offsets(async: true)
199
+ # We now commit offsets on rebalances, thus we can do it async just to make sure
200
+ internal_commit_offsets(async: true)
201
201
 
202
- # If we were not able, let's try to reuse the one we have (if we have)
203
- tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
202
+ # If we were not able, let's try to reuse the one we have (if we have)
203
+ tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
204
204
 
205
- return unless tpl
205
+ return unless tpl
206
206
 
207
- # If we did not have it, it means we never paused this partition, thus no resume should
208
- # happen in the first place
209
- return unless @paused_tpls[topic].delete(partition)
207
+ # If we did not have it, it means we never paused this partition, thus no resume should
208
+ # happen in the first place
209
+ return unless @paused_tpls[topic].delete(partition)
210
210
 
211
- Karafka.monitor.instrument(
212
- 'client.resume',
213
- caller: self,
214
- subscription_group: @subscription_group,
215
- topic: topic,
216
- partition: partition
217
- )
211
+ Karafka.monitor.instrument(
212
+ 'client.resume',
213
+ caller: self,
214
+ subscription_group: @subscription_group,
215
+ topic: topic,
216
+ partition: partition
217
+ )
218
218
 
219
- @kafka.resume(tpl)
220
- ensure
221
- @mutex.unlock
219
+ @kafka.resume(tpl)
220
+ end
222
221
  end
223
222
 
224
223
  # Gracefully stops topic consumption.
@@ -235,9 +234,10 @@ module Karafka
235
234
  # @param [Karafka::Messages::Message] message that we want to mark as processed
236
235
  # @return [Boolean] true if successful. False if we no longer own given partition
237
236
  # @note This method won't trigger automatic offsets commits, rather relying on the offset
238
- # check-pointing trigger that happens with each batch processed
237
+ # check-pointing trigger that happens with each batch processed. It will however check the
238
+ # `librdkafka` assignment ownership to increase accuracy for involuntary revocations.
239
239
  def mark_as_consumed(message)
240
- store_offset(message)
240
+ store_offset(message) && !assignment_lost?
241
241
  end
242
242
 
243
243
  # Marks a given message as consumed and commits the offsets in a blocking way.
@@ -254,12 +254,9 @@ module Karafka
254
254
  def reset
255
255
  close
256
256
 
257
- @mutex.synchronize do
258
- @closed = false
259
- @offsetting = false
260
- @paused_tpls.clear
261
- @kafka = build_consumer
262
- end
257
+ @closed = false
258
+ @paused_tpls.clear
259
+ @kafka = build_consumer
263
260
  end
264
261
 
265
262
  # Runs a single poll ignoring all the potential errors
@@ -281,7 +278,6 @@ module Karafka
281
278
  # @param message [Karafka::Messages::Message]
282
279
  # @return [Boolean] true if we could store the offset (if we still own the partition)
283
280
  def internal_store_offset(message)
284
- @offsetting = true
285
281
  @kafka.store_offset(message)
286
282
  true
287
283
  rescue Rdkafka::RdkafkaError => e
@@ -294,11 +290,11 @@ module Karafka
294
290
  # Non thread-safe message committing method
295
291
  # @param async [Boolean] should the commit happen async or sync (async by default)
296
292
  # @return [Boolean] true if offset commit worked, false if we've lost the assignment
293
+ # @note We do **not** consider `no_offset` as any problem and we allow to commit offsets
294
+ # even when no stored, because with sync commit, it refreshes the ownership state of the
295
+ # consumer in a sync way.
297
296
  def internal_commit_offsets(async: true)
298
- return true unless @offsetting
299
-
300
297
  @kafka.commit(nil, async)
301
- @offsetting = false
302
298
 
303
299
  true
304
300
  rescue Rdkafka::RdkafkaError => e
@@ -317,28 +313,55 @@ module Karafka
317
313
  raise e
318
314
  end
319
315
 
316
+ # Non-mutexed seek that should be used only internally. Outside we expose `#seek` that is
317
+ # wrapped with a mutex.
318
+ #
319
+ # @param message [Messages::Message, Messages::Seek] message to which we want to seek to.
320
+ # It can have the time based offset.
321
+ def internal_seek(message)
322
+ # If the seek message offset is in a time format, we need to find the closest "real"
323
+ # offset matching before we seek
324
+ if message.offset.is_a?(Time)
325
+ tpl = ::Rdkafka::Consumer::TopicPartitionList.new
326
+ tpl.add_topic_and_partitions_with_offsets(
327
+ message.topic,
328
+ message.partition => message.offset
329
+ )
330
+
331
+ # Now we can overwrite the seek message offset with our resolved offset and we can
332
+ # then seek to the appropriate message
333
+ # We set the timeout to 2_000 to make sure that remote clusters handle this well
334
+ real_offsets = @kafka.offsets_for_times(tpl, TPL_REQUEST_TIMEOUT)
335
+ detected_partition = real_offsets.to_h.dig(message.topic, message.partition)
336
+
337
+ # There always needs to be an offset. In case we seek into the future, where there
338
+ # are no offsets yet, we get -1 which indicates the most recent offset
339
+ # We should always detect offset, whether it is 0, -1 or a corresponding
340
+ message.offset = detected_partition&.offset || raise(Errors::InvalidTimeBasedOffsetError)
341
+ end
342
+
343
+ @kafka.seek(message)
344
+ end
345
+
320
346
  # Commits the stored offsets in a sync way and closes the consumer.
321
347
  def close
322
348
  # Allow only one client to be closed at the same time
323
349
  SHUTDOWN_MUTEX.synchronize do
324
- # Make sure that no other operations are happening on this client when we close it
325
- @mutex.synchronize do
326
- # Once client is closed, we should not close it again
327
- # This could only happen in case of a race-condition when forceful shutdown happens
328
- # and triggers this from a different thread
329
- return if @closed
330
-
331
- @closed = true
332
-
333
- # Remove callbacks runners that were registered
334
- ::Karafka::Core::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
335
- ::Karafka::Core::Instrumentation.error_callbacks.delete(@subscription_group.id)
336
-
337
- @kafka.close
338
- @buffer.clear
339
- # @note We do not clear rebalance manager here as we may still have revocation info
340
- # here that we want to consider valid prior to running another reconnection
341
- end
350
+ # Once client is closed, we should not close it again
351
+ # This could only happen in case of a race-condition when forceful shutdown happens
352
+ # and triggers this from a different thread
353
+ return if @closed
354
+
355
+ @closed = true
356
+
357
+ # Remove callbacks runners that were registered
358
+ ::Karafka::Core::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
359
+ ::Karafka::Core::Instrumentation.error_callbacks.delete(@subscription_group.id)
360
+
361
+ @kafka.close
362
+ @buffer.clear
363
+ # @note We do not clear rebalance manager here as we may still have revocation info
364
+ # here that we want to consider valid prior to running another reconnection
342
365
  end
343
366
  end
344
367
 
@@ -49,9 +49,8 @@ module Karafka
49
49
  # Callback that kicks in inside of rdkafka, when new partitions are assigned.
50
50
  #
51
51
  # @private
52
- # @param _ [Rdkafka::Consumer]
53
52
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
54
- def on_partitions_assigned(_, partitions)
53
+ def on_partitions_assigned(partitions)
55
54
  @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
56
55
  @changed = true
57
56
  end
@@ -59,9 +58,8 @@ module Karafka
59
58
  # Callback that kicks in inside of rdkafka, when partitions are revoked.
60
59
  #
61
60
  # @private
62
- # @param _ [Rdkafka::Consumer]
63
61
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
64
- def on_partitions_revoked(_, partitions)
62
+ def on_partitions_revoked(partitions)
65
63
  @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
66
64
  @changed = true
67
65
  end
@@ -48,6 +48,9 @@ module Karafka
48
48
  StrategyNotFoundError = Class.new(BaseError)
49
49
 
50
50
  # This should never happen. Please open an issue if it does.
51
- InvalidRealOffsetUsage = Class.new(BaseError)
51
+ InvalidRealOffsetUsageError = Class.new(BaseError)
52
+
53
+ # This should never happen. Please open an issue if it does.
54
+ InvalidTimeBasedOffsetError = Class.new(BaseError)
52
55
  end
53
56
  end
@@ -12,9 +12,6 @@ module Karafka
12
12
  # @param received_at [Time] moment when we've received the message
13
13
  # @return [Karafka::Messages::Message] message object with payload and metadata
14
14
  def call(kafka_message, topic, received_at)
15
- # @see https://github.com/appsignal/rdkafka-ruby/issues/168
16
- kafka_message.headers.transform_keys!(&:to_s)
17
-
18
15
  metadata = Karafka::Messages::Metadata.new(
19
16
  timestamp: kafka_message.timestamp,
20
17
  headers: kafka_message.headers,
@@ -4,6 +4,9 @@ module Karafka
4
4
  module Messages
5
5
  # "Fake" message that we use as an abstraction layer when seeking back.
6
6
  # This allows us to encapsulate a seek with a simple abstraction
7
+ #
8
+ # @note `#offset` can be either the offset value or the time of the offset
9
+ # (first equal or greater)
7
10
  Seek = Struct.new(:topic, :partition, :offset)
8
11
  end
9
12
  end