karafka 2.1.5 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +17 -0
- data/Gemfile.lock +8 -8
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin.rb +34 -3
- data/lib/karafka/base_consumer.rb +10 -2
- data/lib/karafka/connection/client.rb +103 -86
- data/lib/karafka/errors.rb +4 -1
- data/lib/karafka/messages/seek.rb +3 -0
- data/lib/karafka/pro/iterator/expander.rb +95 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +145 -0
- data/lib/karafka/pro/iterator.rb +2 -87
- data/lib/karafka/pro/processing/filters_applier.rb +1 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +4 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +2 -2
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +2 -2
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +2 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +8 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +2 -2
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +2 -2
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +3 -1
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +1 -1
- data/lib/karafka/processing/coordinator.rb +14 -0
- data/lib/karafka/railtie.rb +2 -2
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +2 -5
- metadata +8 -6
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a6994a6d579728a877f84c87086d093aae8a1f830b891fcb4904883085432fe4
|
4
|
+
data.tar.gz: 13b21009a471194a72971ca81ddc718e044bb96587db0e8f186974f554e9ec62
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4711880bde1d2cd1cb34959f740459979b74ff4d28a671a232f88adbe7473cf67e366fc2b492fac761c572f3a6dfc147a59d46fc08e1c5e18df8ac5f108afdd
|
7
|
+
data.tar.gz: c094600c2bd421ce309c0125d60ea82ed0106d5ce4566b3bb8c1aab13c553e7bd2f6651b98029e42ac831b132563b2c502dd1c76defbf8307cd9bd2393b258f7
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,22 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.1.6 (2023-06-29)
|
4
|
+
- [Improvement] Provide time support for iterator
|
5
|
+
- [Improvement] Provide time support for admin `#read_topic`
|
6
|
+
- [Improvement] Provide time support for consumer `#seek`.
|
7
|
+
- [Improvement] Remove no longer needed locks for client operations.
|
8
|
+
- [Improvement] Raise `Karafka::Errors::TopicNotFoundError` when trying to iterate over non-existing topic.
|
9
|
+
- [Improvement] Ensure that Kafka multi-command operations run under mutex together.
|
10
|
+
- [Change] Require `waterdrop` `>= 2.6.2`
|
11
|
+
- [Change] Require `karafka-core` `>= 2.1.1`
|
12
|
+
- [Refactor] Clean-up iterator code.
|
13
|
+
- [Fix] Improve performance in dev environment for a Rails app (juike)
|
14
|
+
- [Fix] Rename `InvalidRealOffsetUsage` to `InvalidRealOffsetUsageError` to align with naming of other errors.
|
15
|
+
- [Fix] Fix unstable spec.
|
16
|
+
- [Fix] Fix a case where automatic `#seek` would overwrite manual seek of a user when running LRJ.
|
17
|
+
- [Fix] Make sure, that user direct `#seek` and `#pause` operations take precedence over system actions.
|
18
|
+
- [Fix] Make sure, that `#pause` and `#resume` with one underlying connection do not race-condition.
|
19
|
+
|
3
20
|
## 2.1.5 (2023-06-19)
|
4
21
|
- [Improvement] Drastically improve `#revoked?` response quality by checking the real time assignment lost state on librdkafka.
|
5
22
|
- [Improvement] Improve eviction of saturated jobs that would run on already revoked assignments.
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.1.
|
5
|
-
karafka-core (>= 2.1.
|
4
|
+
karafka (2.1.6)
|
5
|
+
karafka-core (>= 2.1.1, < 2.2.0)
|
6
6
|
thor (>= 0.20)
|
7
|
-
waterdrop (>= 2.6.
|
7
|
+
waterdrop (>= 2.6.2, < 3.0.0)
|
8
8
|
zeitwerk (~> 2.3)
|
9
9
|
|
10
10
|
GEM
|
@@ -30,14 +30,14 @@ GEM
|
|
30
30
|
activesupport (>= 5.0)
|
31
31
|
i18n (1.14.1)
|
32
32
|
concurrent-ruby (~> 1.0)
|
33
|
-
karafka-core (2.1.
|
33
|
+
karafka-core (2.1.1)
|
34
34
|
concurrent-ruby (>= 1.1)
|
35
|
-
karafka-rdkafka (>= 0.13.
|
36
|
-
karafka-rdkafka (0.13.
|
35
|
+
karafka-rdkafka (>= 0.13.1, < 0.14.0)
|
36
|
+
karafka-rdkafka (0.13.1)
|
37
37
|
ffi (~> 1.15)
|
38
38
|
mini_portile2 (~> 2.6)
|
39
39
|
rake (> 12)
|
40
|
-
karafka-web (0.6.
|
40
|
+
karafka-web (0.6.1)
|
41
41
|
erubi (~> 1.4)
|
42
42
|
karafka (>= 2.1.4, < 3.0.0)
|
43
43
|
karafka-core (>= 2.0.13, < 3.0.0)
|
@@ -72,7 +72,7 @@ GEM
|
|
72
72
|
tilt (2.2.0)
|
73
73
|
tzinfo (2.0.6)
|
74
74
|
concurrent-ruby (~> 1.0)
|
75
|
-
waterdrop (2.6.
|
75
|
+
waterdrop (2.6.2)
|
76
76
|
karafka-core (>= 2.1.0, < 3.0.0)
|
77
77
|
zeitwerk (~> 2.3)
|
78
78
|
zeitwerk (2.6.8)
|
data/karafka.gemspec
CHANGED
@@ -21,9 +21,9 @@ Gem::Specification.new do |spec|
|
|
21
21
|
without having to focus on things that are not your business domain.
|
22
22
|
DESC
|
23
23
|
|
24
|
-
spec.add_dependency 'karafka-core', '>= 2.1.
|
24
|
+
spec.add_dependency 'karafka-core', '>= 2.1.1', '< 2.2.0'
|
25
25
|
spec.add_dependency 'thor', '>= 0.20'
|
26
|
-
spec.add_dependency 'waterdrop', '>= 2.6.
|
26
|
+
spec.add_dependency 'waterdrop', '>= 2.6.2', '< 3.0.0'
|
27
27
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
28
28
|
|
29
29
|
if $PROGRAM_NAME.end_with?('gem')
|
data/lib/karafka/admin.rb
CHANGED
@@ -18,6 +18,9 @@ module Karafka
|
|
18
18
|
# retry after checking that the operation was finished or failed using external factor.
|
19
19
|
MAX_WAIT_TIMEOUT = 1
|
20
20
|
|
21
|
+
# Max time for a TPL request. We increase it to compensate for remote clusters latency
|
22
|
+
TPL_REQUEST_TIMEOUT = 2_000
|
23
|
+
|
21
24
|
# How many times should be try. 1 x 60 => 60 seconds wait in total
|
22
25
|
MAX_ATTEMPTS = 60
|
23
26
|
|
@@ -34,7 +37,8 @@ module Karafka
|
|
34
37
|
'enable.auto.commit': false
|
35
38
|
}.freeze
|
36
39
|
|
37
|
-
private_constant :Topic, :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :
|
40
|
+
private_constant :Topic, :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :TPL_REQUEST_TIMEOUT,
|
41
|
+
:MAX_ATTEMPTS
|
38
42
|
|
39
43
|
class << self
|
40
44
|
# Allows us to read messages from the topic
|
@@ -42,8 +46,9 @@ module Karafka
|
|
42
46
|
# @param name [String, Symbol] topic name
|
43
47
|
# @param partition [Integer] partition
|
44
48
|
# @param count [Integer] how many messages we want to get at most
|
45
|
-
# @param start_offset [Integer] offset from which we should start. If -1 is provided
|
46
|
-
# (default) we will start from the latest offset
|
49
|
+
# @param start_offset [Integer, Time] offset from which we should start. If -1 is provided
|
50
|
+
# (default) we will start from the latest offset. If time is provided, the appropriate
|
51
|
+
# offset will be resolved.
|
47
52
|
# @param settings [Hash] kafka extra settings (optional)
|
48
53
|
#
|
49
54
|
# @return [Array<Karafka::Messages::Message>] array with messages
|
@@ -53,6 +58,9 @@ module Karafka
|
|
53
58
|
low_offset, high_offset = nil
|
54
59
|
|
55
60
|
with_consumer(settings) do |consumer|
|
61
|
+
# Convert the time offset (if needed)
|
62
|
+
start_offset = resolve_offset(consumer, name.to_s, partition, start_offset)
|
63
|
+
|
56
64
|
low_offset, high_offset = consumer.query_watermark_offsets(name, partition)
|
57
65
|
|
58
66
|
# Select offset dynamically if -1 or less
|
@@ -243,6 +251,29 @@ module Karafka
|
|
243
251
|
|
244
252
|
::Rdkafka::Config.new(config_hash)
|
245
253
|
end
|
254
|
+
|
255
|
+
# Resolves the offset if offset is in a time format. Otherwise returns the offset without
|
256
|
+
# resolving.
|
257
|
+
# @param consumer [::Rdkafka::Consumer]
|
258
|
+
# @param name [String, Symbol] expected topic name
|
259
|
+
# @param partition [Integer]
|
260
|
+
# @param offset [Integer, Time]
|
261
|
+
# @return [Integer] expected offset
|
262
|
+
def resolve_offset(consumer, name, partition, offset)
|
263
|
+
if offset.is_a?(Time)
|
264
|
+
tpl = ::Rdkafka::Consumer::TopicPartitionList.new
|
265
|
+
tpl.add_topic_and_partitions_with_offsets(
|
266
|
+
name, partition => offset
|
267
|
+
)
|
268
|
+
|
269
|
+
real_offsets = consumer.offsets_for_times(tpl, TPL_REQUEST_TIMEOUT)
|
270
|
+
detected_offset = real_offsets.to_h.dig(name, partition)
|
271
|
+
|
272
|
+
detected_offset&.offset || raise(Errors::InvalidTimeBasedOffsetError)
|
273
|
+
else
|
274
|
+
offset
|
275
|
+
end
|
276
|
+
end
|
246
277
|
end
|
247
278
|
end
|
248
279
|
end
|
@@ -70,6 +70,7 @@ module Karafka
|
|
70
70
|
#
|
71
71
|
# @return [Boolean] true if there was no exception, otherwise false.
|
72
72
|
#
|
73
|
+
# @private
|
73
74
|
# @note We keep the seek offset tracking, and use it to compensate for async offset flushing
|
74
75
|
# that may not yet kick in when error occurs. That way we pause always on the last processed
|
75
76
|
# message.
|
@@ -203,8 +204,15 @@ module Karafka
|
|
203
204
|
|
204
205
|
# Seeks in the context of current topic and partition
|
205
206
|
#
|
206
|
-
# @param offset [Integer] offset where we want to seek
|
207
|
-
|
207
|
+
# @param offset [Integer, Time] offset where we want to seek or time of the offset where we
|
208
|
+
# want to seek.
|
209
|
+
# @param manual_seek [Boolean] Flag to differentiate between user seek and system/strategy
|
210
|
+
# based seek. User seek operations should take precedence over system actions, hence we need
|
211
|
+
# to know who invoked it.
|
212
|
+
# @note Please note, that if you are seeking to a time offset, getting the offset is blocking
|
213
|
+
def seek(offset, manual_seek = true)
|
214
|
+
coordinator.manual_seek if manual_seek
|
215
|
+
|
208
216
|
client.seek(
|
209
217
|
Karafka::Messages::Seek.new(
|
210
218
|
topic.name,
|
@@ -20,11 +20,14 @@ module Karafka
|
|
20
20
|
# How many times should we retry polling in case of a failure
|
21
21
|
MAX_POLL_RETRIES = 20
|
22
22
|
|
23
|
+
# Max time for a TPL request. We increase it to compensate for remote clusters latency
|
24
|
+
TPL_REQUEST_TIMEOUT = 2_000
|
25
|
+
|
23
26
|
# We want to make sure we never close several clients in the same moment to prevent
|
24
27
|
# potential race conditions and other issues
|
25
28
|
SHUTDOWN_MUTEX = Mutex.new
|
26
29
|
|
27
|
-
private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX
|
30
|
+
private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX, :TPL_REQUEST_TIMEOUT
|
28
31
|
|
29
32
|
# Creates a new consumer instance.
|
30
33
|
#
|
@@ -35,12 +38,16 @@ module Karafka
|
|
35
38
|
@id = SecureRandom.hex(6)
|
36
39
|
# Name is set when we build consumer
|
37
40
|
@name = ''
|
38
|
-
@mutex = Mutex.new
|
39
41
|
@closed = false
|
40
42
|
@subscription_group = subscription_group
|
41
43
|
@buffer = RawMessagesBuffer.new
|
42
44
|
@rebalance_manager = RebalanceManager.new
|
43
45
|
@kafka = build_consumer
|
46
|
+
# There are few operations that can happen in parallel from the listener threads as well
|
47
|
+
# as from the workers. They are not fully thread-safe because they may be composed out of
|
48
|
+
# few calls to Kafka or out of few internal state changes. That is why we mutex them.
|
49
|
+
# It mostly revolves around pausing and resuming.
|
50
|
+
@mutex = Mutex.new
|
44
51
|
# We need to keep track of what we have paused for resuming
|
45
52
|
# In case we loose partition, we still need to resume it, otherwise it won't be fetched
|
46
53
|
# again if we get reassigned to it later on. We need to keep them as after revocation we
|
@@ -101,16 +108,12 @@ module Karafka
|
|
101
108
|
#
|
102
109
|
# @param message [Karafka::Messages::Message]
|
103
110
|
def store_offset(message)
|
104
|
-
|
105
|
-
internal_store_offset(message)
|
106
|
-
end
|
111
|
+
internal_store_offset(message)
|
107
112
|
end
|
108
113
|
|
109
114
|
# @return [Boolean] true if our current assignment has been lost involuntarily.
|
110
115
|
def assignment_lost?
|
111
|
-
@
|
112
|
-
@kafka.assignment_lost?
|
113
|
-
end
|
116
|
+
@kafka.assignment_lost?
|
114
117
|
end
|
115
118
|
|
116
119
|
# Commits the offset on a current consumer in a non-blocking or blocking way.
|
@@ -127,11 +130,7 @@ module Karafka
|
|
127
130
|
# it does **not** resolve to `lost_assignment?`. It returns only the commit state operation
|
128
131
|
# result.
|
129
132
|
def commit_offsets(async: true)
|
130
|
-
@mutex.lock
|
131
|
-
|
132
133
|
internal_commit_offsets(async: async)
|
133
|
-
ensure
|
134
|
-
@mutex.unlock
|
135
134
|
end
|
136
135
|
|
137
136
|
# Commits offset in a synchronous way.
|
@@ -144,13 +143,11 @@ module Karafka
|
|
144
143
|
# Seek to a particular message. The next poll on the topic/partition will return the
|
145
144
|
# message at the given offset.
|
146
145
|
#
|
147
|
-
# @param message [Messages::Message, Messages::Seek] message to which we want to seek to
|
146
|
+
# @param message [Messages::Message, Messages::Seek] message to which we want to seek to.
|
147
|
+
# It can have the time based offset.
|
148
|
+
# @note Please note, that if you are seeking to a time offset, getting the offset is blocking
|
148
149
|
def seek(message)
|
149
|
-
@mutex.
|
150
|
-
|
151
|
-
@kafka.seek(message)
|
152
|
-
ensure
|
153
|
-
@mutex.unlock
|
150
|
+
@mutex.synchronize { internal_seek(message) }
|
154
151
|
end
|
155
152
|
|
156
153
|
# Pauses given partition and moves back to last successful offset processed.
|
@@ -161,37 +158,34 @@ module Karafka
|
|
161
158
|
# be reprocessed after getting back to processing)
|
162
159
|
# @note This will pause indefinitely and requires manual `#resume`
|
163
160
|
def pause(topic, partition, offset)
|
164
|
-
@mutex.
|
165
|
-
|
166
|
-
|
167
|
-
return if @closed
|
168
|
-
|
169
|
-
pause_msg = Messages::Seek.new(topic, partition, offset)
|
161
|
+
@mutex.synchronize do
|
162
|
+
# Do not pause if the client got closed, would not change anything
|
163
|
+
return if @closed
|
170
164
|
|
171
|
-
|
165
|
+
pause_msg = Messages::Seek.new(topic, partition, offset)
|
172
166
|
|
173
|
-
|
174
|
-
# not own anymore.
|
175
|
-
tpl = topic_partition_list(topic, partition)
|
167
|
+
internal_commit_offsets(async: true)
|
176
168
|
|
177
|
-
|
169
|
+
# Here we do not use our cached tpls because we should not try to pause something we do
|
170
|
+
# not own anymore.
|
171
|
+
tpl = topic_partition_list(topic, partition)
|
178
172
|
|
179
|
-
|
180
|
-
'client.pause',
|
181
|
-
caller: self,
|
182
|
-
subscription_group: @subscription_group,
|
183
|
-
topic: topic,
|
184
|
-
partition: partition,
|
185
|
-
offset: offset
|
186
|
-
)
|
173
|
+
return unless tpl
|
187
174
|
|
188
|
-
|
175
|
+
Karafka.monitor.instrument(
|
176
|
+
'client.pause',
|
177
|
+
caller: self,
|
178
|
+
subscription_group: @subscription_group,
|
179
|
+
topic: topic,
|
180
|
+
partition: partition,
|
181
|
+
offset: offset
|
182
|
+
)
|
189
183
|
|
190
|
-
|
184
|
+
@paused_tpls[topic][partition] = tpl
|
191
185
|
|
192
|
-
|
193
|
-
|
194
|
-
|
186
|
+
@kafka.pause(tpl)
|
187
|
+
internal_seek(pause_msg)
|
188
|
+
end
|
195
189
|
end
|
196
190
|
|
197
191
|
# Resumes processing of a give topic partition after it was paused.
|
@@ -199,33 +193,31 @@ module Karafka
|
|
199
193
|
# @param topic [String] topic name
|
200
194
|
# @param partition [Integer] partition
|
201
195
|
def resume(topic, partition)
|
202
|
-
@mutex.
|
203
|
-
|
204
|
-
return if @closed
|
196
|
+
@mutex.synchronize do
|
197
|
+
return if @closed
|
205
198
|
|
206
|
-
|
207
|
-
|
199
|
+
# We now commit offsets on rebalances, thus we can do it async just to make sure
|
200
|
+
internal_commit_offsets(async: true)
|
208
201
|
|
209
|
-
|
210
|
-
|
202
|
+
# If we were not able, let's try to reuse the one we have (if we have)
|
203
|
+
tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
|
211
204
|
|
212
|
-
|
205
|
+
return unless tpl
|
213
206
|
|
214
|
-
|
215
|
-
|
216
|
-
|
207
|
+
# If we did not have it, it means we never paused this partition, thus no resume should
|
208
|
+
# happen in the first place
|
209
|
+
return unless @paused_tpls[topic].delete(partition)
|
217
210
|
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
211
|
+
Karafka.monitor.instrument(
|
212
|
+
'client.resume',
|
213
|
+
caller: self,
|
214
|
+
subscription_group: @subscription_group,
|
215
|
+
topic: topic,
|
216
|
+
partition: partition
|
217
|
+
)
|
225
218
|
|
226
|
-
|
227
|
-
|
228
|
-
@mutex.unlock
|
219
|
+
@kafka.resume(tpl)
|
220
|
+
end
|
229
221
|
end
|
230
222
|
|
231
223
|
# Gracefully stops topic consumption.
|
@@ -262,11 +254,9 @@ module Karafka
|
|
262
254
|
def reset
|
263
255
|
close
|
264
256
|
|
265
|
-
@
|
266
|
-
|
267
|
-
|
268
|
-
@kafka = build_consumer
|
269
|
-
end
|
257
|
+
@closed = false
|
258
|
+
@paused_tpls.clear
|
259
|
+
@kafka = build_consumer
|
270
260
|
end
|
271
261
|
|
272
262
|
# Runs a single poll ignoring all the potential errors
|
@@ -323,28 +313,55 @@ module Karafka
|
|
323
313
|
raise e
|
324
314
|
end
|
325
315
|
|
316
|
+
# Non-mutexed seek that should be used only internally. Outside we expose `#seek` that is
|
317
|
+
# wrapped with a mutex.
|
318
|
+
#
|
319
|
+
# @param message [Messages::Message, Messages::Seek] message to which we want to seek to.
|
320
|
+
# It can have the time based offset.
|
321
|
+
def internal_seek(message)
|
322
|
+
# If the seek message offset is in a time format, we need to find the closest "real"
|
323
|
+
# offset matching before we seek
|
324
|
+
if message.offset.is_a?(Time)
|
325
|
+
tpl = ::Rdkafka::Consumer::TopicPartitionList.new
|
326
|
+
tpl.add_topic_and_partitions_with_offsets(
|
327
|
+
message.topic,
|
328
|
+
message.partition => message.offset
|
329
|
+
)
|
330
|
+
|
331
|
+
# Now we can overwrite the seek message offset with our resolved offset and we can
|
332
|
+
# then seek to the appropriate message
|
333
|
+
# We set the timeout to 2_000 to make sure that remote clusters handle this well
|
334
|
+
real_offsets = @kafka.offsets_for_times(tpl, TPL_REQUEST_TIMEOUT)
|
335
|
+
detected_partition = real_offsets.to_h.dig(message.topic, message.partition)
|
336
|
+
|
337
|
+
# There always needs to be an offset. In case we seek into the future, where there
|
338
|
+
# are no offsets yet, we get -1 which indicates the most recent offset
|
339
|
+
# We should always detect offset, whether it is 0, -1 or a corresponding
|
340
|
+
message.offset = detected_partition&.offset || raise(Errors::InvalidTimeBasedOffsetError)
|
341
|
+
end
|
342
|
+
|
343
|
+
@kafka.seek(message)
|
344
|
+
end
|
345
|
+
|
326
346
|
# Commits the stored offsets in a sync way and closes the consumer.
|
327
347
|
def close
|
328
348
|
# Allow only one client to be closed at the same time
|
329
349
|
SHUTDOWN_MUTEX.synchronize do
|
330
|
-
#
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
# @note We do not clear rebalance manager here as we may still have revocation info
|
346
|
-
# here that we want to consider valid prior to running another reconnection
|
347
|
-
end
|
350
|
+
# Once client is closed, we should not close it again
|
351
|
+
# This could only happen in case of a race-condition when forceful shutdown happens
|
352
|
+
# and triggers this from a different thread
|
353
|
+
return if @closed
|
354
|
+
|
355
|
+
@closed = true
|
356
|
+
|
357
|
+
# Remove callbacks runners that were registered
|
358
|
+
::Karafka::Core::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
|
359
|
+
::Karafka::Core::Instrumentation.error_callbacks.delete(@subscription_group.id)
|
360
|
+
|
361
|
+
@kafka.close
|
362
|
+
@buffer.clear
|
363
|
+
# @note We do not clear rebalance manager here as we may still have revocation info
|
364
|
+
# here that we want to consider valid prior to running another reconnection
|
348
365
|
end
|
349
366
|
end
|
350
367
|
|
data/lib/karafka/errors.rb
CHANGED
@@ -48,6 +48,9 @@ module Karafka
|
|
48
48
|
StrategyNotFoundError = Class.new(BaseError)
|
49
49
|
|
50
50
|
# This should never happen. Please open an issue if it does.
|
51
|
-
|
51
|
+
InvalidRealOffsetUsageError = Class.new(BaseError)
|
52
|
+
|
53
|
+
# This should never happen. Please open an issue if it does.
|
54
|
+
InvalidTimeBasedOffsetError = Class.new(BaseError)
|
52
55
|
end
|
53
56
|
end
|
@@ -4,6 +4,9 @@ module Karafka
|
|
4
4
|
module Messages
|
5
5
|
# "Fake" message that we use as an abstraction layer when seeking back.
|
6
6
|
# This allows us to encapsulate a seek with a simple abstraction
|
7
|
+
#
|
8
|
+
# @note `#offset` can be either the offset value or the time of the offset
|
9
|
+
# (first equal or greater)
|
7
10
|
Seek = Struct.new(:topic, :partition, :offset)
|
8
11
|
end
|
9
12
|
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
class Iterator
|
17
|
+
# There are various ways you can provide topics information for iterating.
|
18
|
+
#
|
19
|
+
# This mapper normalizes this data, resolves offsets and maps the time based offsets into
|
20
|
+
# appropriate once
|
21
|
+
#
|
22
|
+
# Following formats are accepted:
|
23
|
+
#
|
24
|
+
# - 'topic1' - just a string with one topic name
|
25
|
+
# - ['topic1', 'topic2'] - just the names
|
26
|
+
# - { 'topic1' => -100 } - names with negative lookup offset
|
27
|
+
# - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
|
28
|
+
# - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
|
29
|
+
# - { 'topic1' => 100 } - means we run all partitions from the offset 100
|
30
|
+
# - { 'topic1' => Time.now - 60 } - we run all partitions from the message from 60s ago
|
31
|
+
# - { 'topic1' => { 1 => Time.now - 60 } } - partition1 from message 60s ago
|
32
|
+
#
|
33
|
+
class Expander
|
34
|
+
# Expands topics to which we want to subscribe with partitions information in case this
|
35
|
+
# info is not provided.
|
36
|
+
#
|
37
|
+
# @param topics [Array, Hash, String] topics definitions
|
38
|
+
# @return [Hash] expanded and normalized requested topics and partitions data
|
39
|
+
def call(topics)
|
40
|
+
expanded = Hash.new { |h, k| h[k] = {} }
|
41
|
+
|
42
|
+
normalize_format(topics).map do |topic, details|
|
43
|
+
if details.is_a?(Hash)
|
44
|
+
details.each do |partition, offset|
|
45
|
+
expanded[topic][partition] = offset
|
46
|
+
end
|
47
|
+
else
|
48
|
+
partition_count(topic).times do |partition|
|
49
|
+
# If no offsets are provided, we just start from zero
|
50
|
+
expanded[topic][partition] = details || 0
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
expanded
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
# Input can be provided in multiple formats. Here we normalize it to one (hash).
|
61
|
+
#
|
62
|
+
# @param topics [Array, Hash, String] requested topics
|
63
|
+
# @return [Hash] normalized hash with topics data
|
64
|
+
def normalize_format(topics)
|
65
|
+
# Simplification for the single topic case
|
66
|
+
topics = [topics] if topics.is_a?(String)
|
67
|
+
|
68
|
+
# If we've got just array with topics, we need to convert that into a representation
|
69
|
+
# that we can expand with offsets
|
70
|
+
topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
|
71
|
+
# We remap by creating new hash, just in case the hash came as the argument for this
|
72
|
+
# expanded. We do not want to modify user provided hash
|
73
|
+
topics.transform_keys(&:to_s)
|
74
|
+
end
|
75
|
+
|
76
|
+
# List of topics with their partition information for expansion
|
77
|
+
# We cache it so we do not have to run consecutive requests to obtain data about multiple
|
78
|
+
# topics
|
79
|
+
def topics
|
80
|
+
@topics ||= Admin.cluster_info.topics
|
81
|
+
end
|
82
|
+
|
83
|
+
# @param name [String] topic name
|
84
|
+
# @return [Integer] number of partitions of the topic we want to iterate over
|
85
|
+
def partition_count(name)
|
86
|
+
topics
|
87
|
+
.find { |topic| topic.fetch(:topic_name) == name }
|
88
|
+
.tap { |topic| topic || raise(Errors::TopicNotFoundError, name) }
|
89
|
+
.fetch(:partitions)
|
90
|
+
.count
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
class Iterator
|
17
|
+
# Max time for a TPL request. We increase it to compensate for remote clusters latency
|
18
|
+
TPL_REQUEST_TIMEOUT = 2_000
|
19
|
+
|
20
|
+
private_constant :TPL_REQUEST_TIMEOUT
|
21
|
+
|
22
|
+
# Because we have various formats in which we can provide the offsets, before we can
|
23
|
+
# subscribe to them, there needs to be a bit of normalization.
|
24
|
+
#
|
25
|
+
# For some of the cases, we need to go to Kafka and get the real offsets or watermarks.
|
26
|
+
#
|
27
|
+
# This builder resolves that and builds a tpl to which we can safely subscribe the way
|
28
|
+
# we want it.
|
29
|
+
class TplBuilder
|
30
|
+
# @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
|
31
|
+
# @param expanded_topics [Hash] hash with expanded and normalized topics data
|
32
|
+
def initialize(consumer, expanded_topics)
|
33
|
+
@consumer = consumer
|
34
|
+
@expanded_topics = expanded_topics
|
35
|
+
@mapped_topics = Hash.new { |h, k| h[k] = {} }
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [Rdkafka::Consumer::TopicPartitionList] final tpl we can use to subscribe
|
39
|
+
def call
|
40
|
+
resolve_partitions_without_offsets
|
41
|
+
resolve_partitions_with_exact_offsets
|
42
|
+
resolve_partitions_with_negative_offsets
|
43
|
+
resolve_partitions_with_time_offsets
|
44
|
+
|
45
|
+
# Final tpl with all the data
|
46
|
+
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
47
|
+
|
48
|
+
@mapped_topics.each do |name, partitions|
|
49
|
+
tpl.add_topic_and_partitions_with_offsets(name, partitions)
|
50
|
+
end
|
51
|
+
|
52
|
+
tpl
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
# First we expand on those partitions that do not have offsets defined.
|
58
|
+
# When we operate in case like this, we just start from beginning
|
59
|
+
def resolve_partitions_without_offsets
|
60
|
+
@expanded_topics.each do |name, partitions|
|
61
|
+
# We can here only about the case where we have partitions without offsets
|
62
|
+
next unless partitions.is_a?(Array) || partitions.is_a?(Range)
|
63
|
+
|
64
|
+
# When no offsets defined, we just start from zero
|
65
|
+
@mapped_topics[name] = partitions.map { |partition| [partition, 0] }.to_h
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# If we get exact numeric offsets, we can just start from them without any extra work
|
70
|
+
def resolve_partitions_with_exact_offsets
|
71
|
+
@expanded_topics.each do |name, partitions|
|
72
|
+
next unless partitions.is_a?(Hash)
|
73
|
+
|
74
|
+
partitions.each do |partition, offset|
|
75
|
+
# Skip negative and time based offsets
|
76
|
+
next unless offset.is_a?(Integer) && offset >= 0
|
77
|
+
|
78
|
+
# Exact offsets can be used as they are
|
79
|
+
# No need for extra operations
|
80
|
+
@mapped_topics[name][partition] = offset
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# If the offsets are negative, it means we want to fetch N last messages and we need to
|
86
|
+
# figure out the appropriate offsets
|
87
|
+
#
|
88
|
+
# We do it by getting the watermark offsets and just calculating it. This means that for
|
89
|
+
# heavily compacted topics, this may return less than the desired number but it is a
|
90
|
+
# limitation that is documented.
|
91
|
+
def resolve_partitions_with_negative_offsets
|
92
|
+
@expanded_topics.each do |name, partitions|
|
93
|
+
next unless partitions.is_a?(Hash)
|
94
|
+
|
95
|
+
partitions.each do |partition, offset|
|
96
|
+
# Care only about negative offsets (last n messages)
|
97
|
+
next unless offset.is_a?(Integer) && offset.negative?
|
98
|
+
|
99
|
+
_, high_watermark_offset = @consumer.query_watermark_offsets(name, partition)
|
100
|
+
# We add because this offset is negative
|
101
|
+
@mapped_topics[name][partition] = high_watermark_offset + offset
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# For time based offsets we first need to aggregate them and request the proper offsets.
|
107
|
+
# We want to get all times in one go for all tpls defined with times, so we accumulate
|
108
|
+
# them here and we will make one sync request to kafka for all.
|
109
|
+
def resolve_partitions_with_time_offsets
|
110
|
+
time_tpl = Rdkafka::Consumer::TopicPartitionList.new
|
111
|
+
|
112
|
+
# First we need to collect the time based once
|
113
|
+
@expanded_topics.each do |name, partitions|
|
114
|
+
next unless partitions.is_a?(Hash)
|
115
|
+
|
116
|
+
time_based = {}
|
117
|
+
|
118
|
+
partitions.each do |partition, offset|
|
119
|
+
next unless offset.is_a?(Time)
|
120
|
+
|
121
|
+
time_based[partition] = offset
|
122
|
+
end
|
123
|
+
|
124
|
+
next if time_based.empty?
|
125
|
+
|
126
|
+
time_tpl.add_topic_and_partitions_with_offsets(name, time_based)
|
127
|
+
end
|
128
|
+
|
129
|
+
# If there were no time-based, no need to query Kafka
|
130
|
+
return if time_tpl.empty?
|
131
|
+
|
132
|
+
real_offsets = @consumer.offsets_for_times(time_tpl, TPL_REQUEST_TIMEOUT)
|
133
|
+
|
134
|
+
real_offsets.to_h.each do |name, results|
|
135
|
+
results.each do |result|
|
136
|
+
raise(Errors::InvalidTimeBasedOffsetError) unless result
|
137
|
+
|
138
|
+
@mapped_topics[name][result.partition] = result.offset
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
data/lib/karafka/pro/iterator.rb
CHANGED
@@ -50,7 +50,7 @@ module Karafka
|
|
50
50
|
settings: { 'auto.offset.reset': 'beginning' },
|
51
51
|
yield_nil: false
|
52
52
|
)
|
53
|
-
@topics_with_partitions =
|
53
|
+
@topics_with_partitions = Expander.new.call(topics)
|
54
54
|
|
55
55
|
@routing_topics = @topics_with_partitions.map do |name, _|
|
56
56
|
[name, ::Karafka::Routing::Router.find_or_initialize_by_name(name)]
|
@@ -71,7 +71,7 @@ module Karafka
|
|
71
71
|
# only eat up resources.
|
72
72
|
def each
|
73
73
|
Admin.with_consumer(@settings) do |consumer|
|
74
|
-
tpl =
|
74
|
+
tpl = TplBuilder.new(consumer, @topics_with_partitions).call
|
75
75
|
consumer.assign(tpl)
|
76
76
|
|
77
77
|
# We need this for self-referenced APIs like pausing
|
@@ -131,43 +131,6 @@ module Karafka
|
|
131
131
|
|
132
132
|
private
|
133
133
|
|
134
|
-
# Expands topics to which we want to subscribe with partitions information in case this
|
135
|
-
# info is not provided. For our convenience we want to support 5 formats of defining
|
136
|
-
# the subscribed topics:
|
137
|
-
#
|
138
|
-
# - 'topic1' - just a string with one topic name
|
139
|
-
# - ['topic1', 'topic2'] - just the names
|
140
|
-
# - { 'topic1' => -100 } - names with negative lookup offset
|
141
|
-
# - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
|
142
|
-
# - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
|
143
|
-
#
|
144
|
-
# @param topics [Array, Hash] topics definitions
|
145
|
-
# @return [Hash] hash with topics containing partitions definitions
|
146
|
-
def expand_topics_with_partitions(topics)
|
147
|
-
# Simplification for the single topic case
|
148
|
-
topics = [topics] if topics.is_a?(String)
|
149
|
-
# If we've got just array with topics, we need to convert that into a representation
|
150
|
-
# that we can expand with offsets
|
151
|
-
topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
|
152
|
-
|
153
|
-
expanded = Hash.new { |h, k| h[k] = {} }
|
154
|
-
|
155
|
-
topics.map do |topic, details|
|
156
|
-
if details.is_a?(Hash)
|
157
|
-
details.each do |partition, offset|
|
158
|
-
expanded[topic][partition] = offset
|
159
|
-
end
|
160
|
-
else
|
161
|
-
partition_count(topic.to_s).times do |partition|
|
162
|
-
# If no offsets are provided, we just start from zero
|
163
|
-
expanded[topic][partition] = details || 0
|
164
|
-
end
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
|
-
expanded
|
169
|
-
end
|
170
|
-
|
171
134
|
# @param timeout [Integer] timeout in ms
|
172
135
|
# @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
|
173
136
|
def poll(timeout)
|
@@ -200,54 +163,6 @@ module Karafka
|
|
200
163
|
def done?
|
201
164
|
@stopped_partitions >= @total_partitions
|
202
165
|
end
|
203
|
-
|
204
|
-
# Builds the tpl representing all the subscriptions we want to run
|
205
|
-
#
|
206
|
-
# Additionally for negative offsets, does the watermark calculation where to start
|
207
|
-
#
|
208
|
-
# @param consumer [Rdkafka::Consumer] consumer we need in case of negative offsets as
|
209
|
-
# negative are going to be used to do "give me last X". We use the already initialized
|
210
|
-
# consumer instance, not to start another one again.
|
211
|
-
# @return [Rdkafka::Consumer::TopicPartitionList]
|
212
|
-
def tpl_with_expanded_offsets(consumer)
|
213
|
-
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
214
|
-
|
215
|
-
@topics_with_partitions.each do |name, partitions|
|
216
|
-
partitions_with_offsets = {}
|
217
|
-
|
218
|
-
# When no offsets defined, we just start from zero
|
219
|
-
if partitions.is_a?(Array) || partitions.is_a?(Range)
|
220
|
-
partitions_with_offsets = partitions.map { |partition| [partition, 0] }.to_h
|
221
|
-
else
|
222
|
-
# When offsets defined, we can either use them if positive or expand and move back
|
223
|
-
# in case of negative (-1000 means last 1000 messages, etc)
|
224
|
-
partitions.each do |partition, offset|
|
225
|
-
if offset.negative?
|
226
|
-
_, high_watermark_offset = consumer.query_watermark_offsets(name, partition)
|
227
|
-
# We add because this offset is negative
|
228
|
-
partitions_with_offsets[partition] = high_watermark_offset + offset
|
229
|
-
else
|
230
|
-
partitions_with_offsets[partition] = offset
|
231
|
-
end
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
|
-
tpl.add_topic_and_partitions_with_offsets(name, partitions_with_offsets)
|
236
|
-
end
|
237
|
-
|
238
|
-
tpl
|
239
|
-
end
|
240
|
-
|
241
|
-
# @param name [String] topic name
|
242
|
-
# @return [Integer] number of partitions of the topic we want to iterate over
|
243
|
-
def partition_count(name)
|
244
|
-
Admin
|
245
|
-
.cluster_info
|
246
|
-
.topics
|
247
|
-
.find { |topic| topic.fetch(:topic_name) == name }
|
248
|
-
.fetch(:partitions)
|
249
|
-
.count
|
250
|
-
end
|
251
166
|
end
|
252
167
|
end
|
253
168
|
end
|
@@ -81,6 +81,7 @@ module Karafka
|
|
81
81
|
# The first message we do need to get next time we poll. We use the minimum not to jump
|
82
82
|
# accidentally by over any.
|
83
83
|
# @return [Karafka::Messages::Message, nil] cursor message or nil if none
|
84
|
+
# @note Cursor message can also return the offset in the time format
|
84
85
|
def cursor
|
85
86
|
return nil unless active?
|
86
87
|
|
@@ -44,7 +44,9 @@ module Karafka
|
|
44
44
|
if coordinator.filtered? && !revoked?
|
45
45
|
handle_post_filtering
|
46
46
|
elsif !revoked?
|
47
|
-
seek
|
47
|
+
# no need to check for manual seek because AJ consumer is internal and
|
48
|
+
# fully controlled by us
|
49
|
+
seek(coordinator.seek_offset, false)
|
48
50
|
resume
|
49
51
|
else
|
50
52
|
resume
|
@@ -50,7 +50,9 @@ module Karafka
|
|
50
50
|
if coordinator.filtered? && !revoked?
|
51
51
|
handle_post_filtering
|
52
52
|
elsif !revoked?
|
53
|
-
seek
|
53
|
+
# no need to check for manual seek because AJ consumer is internal and
|
54
|
+
# fully controlled by us
|
55
|
+
seek(coordinator.seek_offset, false)
|
54
56
|
resume
|
55
57
|
else
|
56
58
|
resume
|
@@ -42,7 +42,9 @@ module Karafka
|
|
42
42
|
if coordinator.success?
|
43
43
|
coordinator.pause_tracker.reset
|
44
44
|
|
45
|
-
seek
|
45
|
+
# no need to check for manual seek because AJ consumer is internal and
|
46
|
+
# fully controlled by us
|
47
|
+
seek(coordinator.seek_offset, false) unless revoked?
|
46
48
|
|
47
49
|
resume
|
48
50
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -46,7 +46,9 @@ module Karafka
|
|
46
46
|
# Since we have VP here we do not commit intermediate offsets and need to commit
|
47
47
|
# them here. We do commit in collapsed mode but this is generalized.
|
48
48
|
mark_as_consumed(last_group_message) unless revoked?
|
49
|
-
seek
|
49
|
+
# no need to check for manual seek because AJ consumer is internal and
|
50
|
+
# fully controlled by us
|
51
|
+
seek(coordinator.seek_offset, false) unless revoked?
|
50
52
|
|
51
53
|
resume
|
52
54
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -46,7 +46,9 @@ module Karafka
|
|
46
46
|
if coordinator.filtered? && !revoked?
|
47
47
|
handle_post_filtering
|
48
48
|
elsif !revoked?
|
49
|
-
seek
|
49
|
+
# no need to check for manual seek because AJ consumer is internal and
|
50
|
+
# fully controlled by us
|
51
|
+
seek(coordinator.seek_offset, false)
|
50
52
|
resume
|
51
53
|
else
|
52
54
|
resume
|
@@ -48,7 +48,10 @@ module Karafka
|
|
48
48
|
coordinator.pause_tracker.reset
|
49
49
|
|
50
50
|
mark_as_consumed(last_group_message) unless revoked?
|
51
|
-
|
51
|
+
|
52
|
+
# no need to check for manual seek because AJ consumer is internal and
|
53
|
+
# fully controlled by us
|
54
|
+
seek(coordinator.seek_offset, false) unless revoked?
|
52
55
|
|
53
56
|
resume
|
54
57
|
else
|
@@ -42,8 +42,8 @@ module Karafka
|
|
42
42
|
|
43
43
|
if coordinator.filtered? && !revoked?
|
44
44
|
handle_post_filtering
|
45
|
-
elsif !revoked?
|
46
|
-
seek(last_group_message.offset + 1)
|
45
|
+
elsif !revoked? && !coordinator.manual_seek?
|
46
|
+
seek(last_group_message.offset + 1, false)
|
47
47
|
resume
|
48
48
|
else
|
49
49
|
resume
|
@@ -38,7 +38,8 @@ module Karafka
|
|
38
38
|
return if coordinator.manual_pause?
|
39
39
|
|
40
40
|
mark_as_consumed(last_group_message) unless revoked?
|
41
|
-
|
41
|
+
# We should not overwrite user manual seel request with our seek
|
42
|
+
seek(coordinator.seek_offset, false) unless revoked? || coordinator.manual_seek?
|
42
43
|
|
43
44
|
resume
|
44
45
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -37,7 +37,9 @@ module Karafka
|
|
37
37
|
|
38
38
|
return if coordinator.manual_pause?
|
39
39
|
|
40
|
-
|
40
|
+
unless revoked? || coordinator.manual_seek?
|
41
|
+
seek(last_group_message.offset + 1, false)
|
42
|
+
end
|
41
43
|
|
42
44
|
resume
|
43
45
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -70,6 +70,10 @@ module Karafka
|
|
70
70
|
when :skip
|
71
71
|
nil
|
72
72
|
when :seek
|
73
|
+
# User direct actions take priority over automatic operations
|
74
|
+
# If we've already seeked we can just resume operations, nothing extra needed
|
75
|
+
return resume if coordinator.manual_seek?
|
76
|
+
|
73
77
|
throttle_message = filter.cursor
|
74
78
|
|
75
79
|
Karafka.monitor.instrument(
|
@@ -77,11 +81,14 @@ module Karafka
|
|
77
81
|
caller: self,
|
78
82
|
message: throttle_message
|
79
83
|
) do
|
80
|
-
seek(throttle_message.offset)
|
84
|
+
seek(throttle_message.offset, false)
|
81
85
|
end
|
82
86
|
|
83
87
|
resume
|
84
88
|
when :pause
|
89
|
+
# User direct actions take priority over automatic operations
|
90
|
+
return nil if coordinator.manual_pause?
|
91
|
+
|
85
92
|
throttle_message = filter.cursor
|
86
93
|
|
87
94
|
Karafka.monitor.instrument(
|
@@ -53,7 +53,7 @@ module Karafka
|
|
53
53
|
return if coordinator.manual_pause?
|
54
54
|
|
55
55
|
mark_as_consumed(last_group_message) unless revoked?
|
56
|
-
seek(coordinator.seek_offset) unless revoked?
|
56
|
+
seek(coordinator.seek_offset, false) unless revoked? || coordinator.manual_seek?
|
57
57
|
|
58
58
|
resume
|
59
59
|
else
|
@@ -45,10 +45,10 @@ module Karafka
|
|
45
45
|
# If still not revoked and was throttled, we need to apply throttling logic
|
46
46
|
if coordinator.filtered? && !revoked?
|
47
47
|
handle_post_filtering
|
48
|
-
elsif !revoked?
|
48
|
+
elsif !revoked? && !coordinator.manual_seek?
|
49
49
|
# If not revoked and not throttled, we move to where we were suppose to and
|
50
50
|
# resume
|
51
|
-
seek(coordinator.seek_offset)
|
51
|
+
seek(coordinator.seek_offset, false)
|
52
52
|
resume
|
53
53
|
else
|
54
54
|
resume
|
@@ -43,10 +43,10 @@ module Karafka
|
|
43
43
|
# If still not revoked and was throttled, we need to apply filtering logic
|
44
44
|
if coordinator.filtered? && !revoked?
|
45
45
|
handle_post_filtering
|
46
|
-
elsif !revoked?
|
46
|
+
elsif !revoked? && !coordinator.manual_seek?
|
47
47
|
# If not revoked and not throttled, we move to where we were suppose to and
|
48
48
|
# resume
|
49
|
-
seek(last_group_message.offset + 1)
|
49
|
+
seek(last_group_message.offset + 1, false)
|
50
50
|
resume
|
51
51
|
else
|
52
52
|
resume
|
@@ -118,7 +118,7 @@ module Karafka
|
|
118
118
|
|
119
119
|
# @return [Messages::Seek] markable message for real offset marking
|
120
120
|
def markable
|
121
|
-
raise Errors::
|
121
|
+
raise Errors::InvalidRealOffsetUsageError unless markable?
|
122
122
|
|
123
123
|
Messages::Seek.new(
|
124
124
|
@topic,
|
@@ -23,6 +23,7 @@ module Karafka
|
|
23
23
|
@consumptions = {}
|
24
24
|
@running_jobs = 0
|
25
25
|
@manual_pause = false
|
26
|
+
@manual_seek = false
|
26
27
|
@mutex = Mutex.new
|
27
28
|
@marked = false
|
28
29
|
@failure = false
|
@@ -41,6 +42,9 @@ module Karafka
|
|
41
42
|
# When starting to run, no pause is expected and no manual pause as well
|
42
43
|
@manual_pause = false
|
43
44
|
|
45
|
+
# No user invoked seeks on a new run
|
46
|
+
@manual_seek = false
|
47
|
+
|
44
48
|
# We set it on the first encounter and never again, because then the offset setting
|
45
49
|
# should be up to the consumers logic (our or the end user)
|
46
50
|
# Seek offset needs to be always initialized as for case where manual offset management
|
@@ -148,6 +152,16 @@ module Karafka
|
|
148
152
|
@pause_tracker.paused? && @manual_pause
|
149
153
|
end
|
150
154
|
|
155
|
+
# Marks seek as manual for coordination purposes
|
156
|
+
def manual_seek
|
157
|
+
@manual_seek = true
|
158
|
+
end
|
159
|
+
|
160
|
+
# @return [Boolean] did a user invoke seek in the current operations scope
|
161
|
+
def manual_seek?
|
162
|
+
@manual_seek
|
163
|
+
end
|
164
|
+
|
151
165
|
# Allows to run synchronized (locked) code that can operate in between virtual partitions
|
152
166
|
# @param block [Proc] code we want to run in the synchronized mode
|
153
167
|
def synchronize(&block)
|
data/lib/karafka/railtie.rb
CHANGED
@@ -70,11 +70,11 @@ if Karafka.rails?
|
|
70
70
|
# We can have many listeners, but it does not matter in which we will reload the code
|
71
71
|
# as long as all the consumers will be re-created as Rails reload is thread-safe
|
72
72
|
::Karafka::App.monitor.subscribe('connection.listener.fetch_loop') do
|
73
|
-
# Reload code each time there is a change in the code
|
74
|
-
next unless Rails.application.reloaders.any?(&:updated?)
|
75
73
|
# If consumer persistence is enabled, no reason to reload because we will still keep
|
76
74
|
# old consumer instances in memory.
|
77
75
|
next if Karafka::App.config.consumer_persistence
|
76
|
+
# Reload code each time there is a change in the code
|
77
|
+
next unless Rails.application.reloaders.any?(&:updated?)
|
78
78
|
|
79
79
|
Rails.application.reloader.reload!
|
80
80
|
end
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
@@ -1,5 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
�,S�����C8�<�OqCy?f��S3���m�VɂN�DDZ@���?�����\ *�ek�L��
|
4
|
-
�b35�'���r�j�<>4vI1��(P��)27�|�:��� c�y4���8X9�۟��
|
5
|
-
�0S�@D
|
1
|
+
mf̈́x1��2�'�lj!���.����U��KL1.�fv��$����N��Oi���U/)3lS��ݑ�%}�aE��}/J\�_��1In� N�t+����mV��G�����C��_Y;��X3�Y�8(J,}���=���:��p �(��2��{:�O�K"$��7W�A�5k��|l���]�Bd^�I�4*\���9���PL'������ޝmh�]o�n(�� ��e˱վ
|
2
|
+
PJ�5���
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
|
36
36
|
MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2023-06-
|
38
|
+
date: 2023-06-29 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
requirements:
|
44
44
|
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: 2.1.
|
46
|
+
version: 2.1.1
|
47
47
|
- - "<"
|
48
48
|
- !ruby/object:Gem::Version
|
49
49
|
version: 2.2.0
|
@@ -53,7 +53,7 @@ dependencies:
|
|
53
53
|
requirements:
|
54
54
|
- - ">="
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: 2.1.
|
56
|
+
version: 2.1.1
|
57
57
|
- - "<"
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: 2.2.0
|
@@ -77,7 +77,7 @@ dependencies:
|
|
77
77
|
requirements:
|
78
78
|
- - ">="
|
79
79
|
- !ruby/object:Gem::Version
|
80
|
-
version: 2.6.
|
80
|
+
version: 2.6.2
|
81
81
|
- - "<"
|
82
82
|
- !ruby/object:Gem::Version
|
83
83
|
version: 3.0.0
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
requirements:
|
88
88
|
- - ">="
|
89
89
|
- !ruby/object:Gem::Version
|
90
|
-
version: 2.6.
|
90
|
+
version: 2.6.2
|
91
91
|
- - "<"
|
92
92
|
- !ruby/object:Gem::Version
|
93
93
|
version: 3.0.0
|
@@ -226,6 +226,8 @@ files:
|
|
226
226
|
- lib/karafka/pro/encryption/messages/parser.rb
|
227
227
|
- lib/karafka/pro/encryption/setup/config.rb
|
228
228
|
- lib/karafka/pro/iterator.rb
|
229
|
+
- lib/karafka/pro/iterator/expander.rb
|
230
|
+
- lib/karafka/pro/iterator/tpl_builder.rb
|
229
231
|
- lib/karafka/pro/loader.rb
|
230
232
|
- lib/karafka/pro/performance_tracker.rb
|
231
233
|
- lib/karafka/pro/processing/collapser.rb
|
metadata.gz.sig
CHANGED
Binary file
|