karafka 2.1.5 → 2.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +17 -0
- data/Gemfile.lock +8 -8
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin.rb +34 -3
- data/lib/karafka/base_consumer.rb +10 -2
- data/lib/karafka/connection/client.rb +103 -86
- data/lib/karafka/errors.rb +4 -1
- data/lib/karafka/messages/seek.rb +3 -0
- data/lib/karafka/pro/iterator/expander.rb +95 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +145 -0
- data/lib/karafka/pro/iterator.rb +2 -87
- data/lib/karafka/pro/processing/filters_applier.rb +1 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +4 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +2 -2
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +2 -2
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +2 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +8 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +2 -2
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +2 -2
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +3 -1
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +1 -1
- data/lib/karafka/processing/coordinator.rb +14 -0
- data/lib/karafka/railtie.rb +2 -2
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +2 -5
- metadata +8 -6
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a6994a6d579728a877f84c87086d093aae8a1f830b891fcb4904883085432fe4
|
4
|
+
data.tar.gz: 13b21009a471194a72971ca81ddc718e044bb96587db0e8f186974f554e9ec62
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4711880bde1d2cd1cb34959f740459979b74ff4d28a671a232f88adbe7473cf67e366fc2b492fac761c572f3a6dfc147a59d46fc08e1c5e18df8ac5f108afdd
|
7
|
+
data.tar.gz: c094600c2bd421ce309c0125d60ea82ed0106d5ce4566b3bb8c1aab13c553e7bd2f6651b98029e42ac831b132563b2c502dd1c76defbf8307cd9bd2393b258f7
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,22 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.1.6 (2023-06-29)
|
4
|
+
- [Improvement] Provide time support for iterator
|
5
|
+
- [Improvement] Provide time support for admin `#read_topic`
|
6
|
+
- [Improvement] Provide time support for consumer `#seek`.
|
7
|
+
- [Improvement] Remove no longer needed locks for client operations.
|
8
|
+
- [Improvement] Raise `Karafka::Errors::TopicNotFoundError` when trying to iterate over non-existing topic.
|
9
|
+
- [Improvement] Ensure that Kafka multi-command operations run under mutex together.
|
10
|
+
- [Change] Require `waterdrop` `>= 2.6.2`
|
11
|
+
- [Change] Require `karafka-core` `>= 2.1.1`
|
12
|
+
- [Refactor] Clean-up iterator code.
|
13
|
+
- [Fix] Improve performance in dev environment for a Rails app (juike)
|
14
|
+
- [Fix] Rename `InvalidRealOffsetUsage` to `InvalidRealOffsetUsageError` to align with naming of other errors.
|
15
|
+
- [Fix] Fix unstable spec.
|
16
|
+
- [Fix] Fix a case where automatic `#seek` would overwrite manual seek of a user when running LRJ.
|
17
|
+
- [Fix] Make sure, that user direct `#seek` and `#pause` operations take precedence over system actions.
|
18
|
+
- [Fix] Make sure, that `#pause` and `#resume` with one underlying connection do not race-condition.
|
19
|
+
|
3
20
|
## 2.1.5 (2023-06-19)
|
4
21
|
- [Improvement] Drastically improve `#revoked?` response quality by checking the real time assignment lost state on librdkafka.
|
5
22
|
- [Improvement] Improve eviction of saturated jobs that would run on already revoked assignments.
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.1.
|
5
|
-
karafka-core (>= 2.1.
|
4
|
+
karafka (2.1.6)
|
5
|
+
karafka-core (>= 2.1.1, < 2.2.0)
|
6
6
|
thor (>= 0.20)
|
7
|
-
waterdrop (>= 2.6.
|
7
|
+
waterdrop (>= 2.6.2, < 3.0.0)
|
8
8
|
zeitwerk (~> 2.3)
|
9
9
|
|
10
10
|
GEM
|
@@ -30,14 +30,14 @@ GEM
|
|
30
30
|
activesupport (>= 5.0)
|
31
31
|
i18n (1.14.1)
|
32
32
|
concurrent-ruby (~> 1.0)
|
33
|
-
karafka-core (2.1.
|
33
|
+
karafka-core (2.1.1)
|
34
34
|
concurrent-ruby (>= 1.1)
|
35
|
-
karafka-rdkafka (>= 0.13.
|
36
|
-
karafka-rdkafka (0.13.
|
35
|
+
karafka-rdkafka (>= 0.13.1, < 0.14.0)
|
36
|
+
karafka-rdkafka (0.13.1)
|
37
37
|
ffi (~> 1.15)
|
38
38
|
mini_portile2 (~> 2.6)
|
39
39
|
rake (> 12)
|
40
|
-
karafka-web (0.6.
|
40
|
+
karafka-web (0.6.1)
|
41
41
|
erubi (~> 1.4)
|
42
42
|
karafka (>= 2.1.4, < 3.0.0)
|
43
43
|
karafka-core (>= 2.0.13, < 3.0.0)
|
@@ -72,7 +72,7 @@ GEM
|
|
72
72
|
tilt (2.2.0)
|
73
73
|
tzinfo (2.0.6)
|
74
74
|
concurrent-ruby (~> 1.0)
|
75
|
-
waterdrop (2.6.
|
75
|
+
waterdrop (2.6.2)
|
76
76
|
karafka-core (>= 2.1.0, < 3.0.0)
|
77
77
|
zeitwerk (~> 2.3)
|
78
78
|
zeitwerk (2.6.8)
|
data/karafka.gemspec
CHANGED
@@ -21,9 +21,9 @@ Gem::Specification.new do |spec|
|
|
21
21
|
without having to focus on things that are not your business domain.
|
22
22
|
DESC
|
23
23
|
|
24
|
-
spec.add_dependency 'karafka-core', '>= 2.1.
|
24
|
+
spec.add_dependency 'karafka-core', '>= 2.1.1', '< 2.2.0'
|
25
25
|
spec.add_dependency 'thor', '>= 0.20'
|
26
|
-
spec.add_dependency 'waterdrop', '>= 2.6.
|
26
|
+
spec.add_dependency 'waterdrop', '>= 2.6.2', '< 3.0.0'
|
27
27
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
28
28
|
|
29
29
|
if $PROGRAM_NAME.end_with?('gem')
|
data/lib/karafka/admin.rb
CHANGED
@@ -18,6 +18,9 @@ module Karafka
|
|
18
18
|
# retry after checking that the operation was finished or failed using external factor.
|
19
19
|
MAX_WAIT_TIMEOUT = 1
|
20
20
|
|
21
|
+
# Max time for a TPL request. We increase it to compensate for remote clusters latency
|
22
|
+
TPL_REQUEST_TIMEOUT = 2_000
|
23
|
+
|
21
24
|
# How many times should be try. 1 x 60 => 60 seconds wait in total
|
22
25
|
MAX_ATTEMPTS = 60
|
23
26
|
|
@@ -34,7 +37,8 @@ module Karafka
|
|
34
37
|
'enable.auto.commit': false
|
35
38
|
}.freeze
|
36
39
|
|
37
|
-
private_constant :Topic, :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :
|
40
|
+
private_constant :Topic, :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :TPL_REQUEST_TIMEOUT,
|
41
|
+
:MAX_ATTEMPTS
|
38
42
|
|
39
43
|
class << self
|
40
44
|
# Allows us to read messages from the topic
|
@@ -42,8 +46,9 @@ module Karafka
|
|
42
46
|
# @param name [String, Symbol] topic name
|
43
47
|
# @param partition [Integer] partition
|
44
48
|
# @param count [Integer] how many messages we want to get at most
|
45
|
-
# @param start_offset [Integer] offset from which we should start. If -1 is provided
|
46
|
-
# (default) we will start from the latest offset
|
49
|
+
# @param start_offset [Integer, Time] offset from which we should start. If -1 is provided
|
50
|
+
# (default) we will start from the latest offset. If time is provided, the appropriate
|
51
|
+
# offset will be resolved.
|
47
52
|
# @param settings [Hash] kafka extra settings (optional)
|
48
53
|
#
|
49
54
|
# @return [Array<Karafka::Messages::Message>] array with messages
|
@@ -53,6 +58,9 @@ module Karafka
|
|
53
58
|
low_offset, high_offset = nil
|
54
59
|
|
55
60
|
with_consumer(settings) do |consumer|
|
61
|
+
# Convert the time offset (if needed)
|
62
|
+
start_offset = resolve_offset(consumer, name.to_s, partition, start_offset)
|
63
|
+
|
56
64
|
low_offset, high_offset = consumer.query_watermark_offsets(name, partition)
|
57
65
|
|
58
66
|
# Select offset dynamically if -1 or less
|
@@ -243,6 +251,29 @@ module Karafka
|
|
243
251
|
|
244
252
|
::Rdkafka::Config.new(config_hash)
|
245
253
|
end
|
254
|
+
|
255
|
+
# Resolves the offset if offset is in a time format. Otherwise returns the offset without
|
256
|
+
# resolving.
|
257
|
+
# @param consumer [::Rdkafka::Consumer]
|
258
|
+
# @param name [String, Symbol] expected topic name
|
259
|
+
# @param partition [Integer]
|
260
|
+
# @param offset [Integer, Time]
|
261
|
+
# @return [Integer] expected offset
|
262
|
+
def resolve_offset(consumer, name, partition, offset)
|
263
|
+
if offset.is_a?(Time)
|
264
|
+
tpl = ::Rdkafka::Consumer::TopicPartitionList.new
|
265
|
+
tpl.add_topic_and_partitions_with_offsets(
|
266
|
+
name, partition => offset
|
267
|
+
)
|
268
|
+
|
269
|
+
real_offsets = consumer.offsets_for_times(tpl, TPL_REQUEST_TIMEOUT)
|
270
|
+
detected_offset = real_offsets.to_h.dig(name, partition)
|
271
|
+
|
272
|
+
detected_offset&.offset || raise(Errors::InvalidTimeBasedOffsetError)
|
273
|
+
else
|
274
|
+
offset
|
275
|
+
end
|
276
|
+
end
|
246
277
|
end
|
247
278
|
end
|
248
279
|
end
|
@@ -70,6 +70,7 @@ module Karafka
|
|
70
70
|
#
|
71
71
|
# @return [Boolean] true if there was no exception, otherwise false.
|
72
72
|
#
|
73
|
+
# @private
|
73
74
|
# @note We keep the seek offset tracking, and use it to compensate for async offset flushing
|
74
75
|
# that may not yet kick in when error occurs. That way we pause always on the last processed
|
75
76
|
# message.
|
@@ -203,8 +204,15 @@ module Karafka
|
|
203
204
|
|
204
205
|
# Seeks in the context of current topic and partition
|
205
206
|
#
|
206
|
-
# @param offset [Integer] offset where we want to seek
|
207
|
-
|
207
|
+
# @param offset [Integer, Time] offset where we want to seek or time of the offset where we
|
208
|
+
# want to seek.
|
209
|
+
# @param manual_seek [Boolean] Flag to differentiate between user seek and system/strategy
|
210
|
+
# based seek. User seek operations should take precedence over system actions, hence we need
|
211
|
+
# to know who invoked it.
|
212
|
+
# @note Please note, that if you are seeking to a time offset, getting the offset is blocking
|
213
|
+
def seek(offset, manual_seek = true)
|
214
|
+
coordinator.manual_seek if manual_seek
|
215
|
+
|
208
216
|
client.seek(
|
209
217
|
Karafka::Messages::Seek.new(
|
210
218
|
topic.name,
|
@@ -20,11 +20,14 @@ module Karafka
|
|
20
20
|
# How many times should we retry polling in case of a failure
|
21
21
|
MAX_POLL_RETRIES = 20
|
22
22
|
|
23
|
+
# Max time for a TPL request. We increase it to compensate for remote clusters latency
|
24
|
+
TPL_REQUEST_TIMEOUT = 2_000
|
25
|
+
|
23
26
|
# We want to make sure we never close several clients in the same moment to prevent
|
24
27
|
# potential race conditions and other issues
|
25
28
|
SHUTDOWN_MUTEX = Mutex.new
|
26
29
|
|
27
|
-
private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX
|
30
|
+
private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX, :TPL_REQUEST_TIMEOUT
|
28
31
|
|
29
32
|
# Creates a new consumer instance.
|
30
33
|
#
|
@@ -35,12 +38,16 @@ module Karafka
|
|
35
38
|
@id = SecureRandom.hex(6)
|
36
39
|
# Name is set when we build consumer
|
37
40
|
@name = ''
|
38
|
-
@mutex = Mutex.new
|
39
41
|
@closed = false
|
40
42
|
@subscription_group = subscription_group
|
41
43
|
@buffer = RawMessagesBuffer.new
|
42
44
|
@rebalance_manager = RebalanceManager.new
|
43
45
|
@kafka = build_consumer
|
46
|
+
# There are few operations that can happen in parallel from the listener threads as well
|
47
|
+
# as from the workers. They are not fully thread-safe because they may be composed out of
|
48
|
+
# few calls to Kafka or out of few internal state changes. That is why we mutex them.
|
49
|
+
# It mostly revolves around pausing and resuming.
|
50
|
+
@mutex = Mutex.new
|
44
51
|
# We need to keep track of what we have paused for resuming
|
45
52
|
# In case we loose partition, we still need to resume it, otherwise it won't be fetched
|
46
53
|
# again if we get reassigned to it later on. We need to keep them as after revocation we
|
@@ -101,16 +108,12 @@ module Karafka
|
|
101
108
|
#
|
102
109
|
# @param message [Karafka::Messages::Message]
|
103
110
|
def store_offset(message)
|
104
|
-
|
105
|
-
internal_store_offset(message)
|
106
|
-
end
|
111
|
+
internal_store_offset(message)
|
107
112
|
end
|
108
113
|
|
109
114
|
# @return [Boolean] true if our current assignment has been lost involuntarily.
|
110
115
|
def assignment_lost?
|
111
|
-
@
|
112
|
-
@kafka.assignment_lost?
|
113
|
-
end
|
116
|
+
@kafka.assignment_lost?
|
114
117
|
end
|
115
118
|
|
116
119
|
# Commits the offset on a current consumer in a non-blocking or blocking way.
|
@@ -127,11 +130,7 @@ module Karafka
|
|
127
130
|
# it does **not** resolve to `lost_assignment?`. It returns only the commit state operation
|
128
131
|
# result.
|
129
132
|
def commit_offsets(async: true)
|
130
|
-
@mutex.lock
|
131
|
-
|
132
133
|
internal_commit_offsets(async: async)
|
133
|
-
ensure
|
134
|
-
@mutex.unlock
|
135
134
|
end
|
136
135
|
|
137
136
|
# Commits offset in a synchronous way.
|
@@ -144,13 +143,11 @@ module Karafka
|
|
144
143
|
# Seek to a particular message. The next poll on the topic/partition will return the
|
145
144
|
# message at the given offset.
|
146
145
|
#
|
147
|
-
# @param message [Messages::Message, Messages::Seek] message to which we want to seek to
|
146
|
+
# @param message [Messages::Message, Messages::Seek] message to which we want to seek to.
|
147
|
+
# It can have the time based offset.
|
148
|
+
# @note Please note, that if you are seeking to a time offset, getting the offset is blocking
|
148
149
|
def seek(message)
|
149
|
-
@mutex.
|
150
|
-
|
151
|
-
@kafka.seek(message)
|
152
|
-
ensure
|
153
|
-
@mutex.unlock
|
150
|
+
@mutex.synchronize { internal_seek(message) }
|
154
151
|
end
|
155
152
|
|
156
153
|
# Pauses given partition and moves back to last successful offset processed.
|
@@ -161,37 +158,34 @@ module Karafka
|
|
161
158
|
# be reprocessed after getting back to processing)
|
162
159
|
# @note This will pause indefinitely and requires manual `#resume`
|
163
160
|
def pause(topic, partition, offset)
|
164
|
-
@mutex.
|
165
|
-
|
166
|
-
|
167
|
-
return if @closed
|
168
|
-
|
169
|
-
pause_msg = Messages::Seek.new(topic, partition, offset)
|
161
|
+
@mutex.synchronize do
|
162
|
+
# Do not pause if the client got closed, would not change anything
|
163
|
+
return if @closed
|
170
164
|
|
171
|
-
|
165
|
+
pause_msg = Messages::Seek.new(topic, partition, offset)
|
172
166
|
|
173
|
-
|
174
|
-
# not own anymore.
|
175
|
-
tpl = topic_partition_list(topic, partition)
|
167
|
+
internal_commit_offsets(async: true)
|
176
168
|
|
177
|
-
|
169
|
+
# Here we do not use our cached tpls because we should not try to pause something we do
|
170
|
+
# not own anymore.
|
171
|
+
tpl = topic_partition_list(topic, partition)
|
178
172
|
|
179
|
-
|
180
|
-
'client.pause',
|
181
|
-
caller: self,
|
182
|
-
subscription_group: @subscription_group,
|
183
|
-
topic: topic,
|
184
|
-
partition: partition,
|
185
|
-
offset: offset
|
186
|
-
)
|
173
|
+
return unless tpl
|
187
174
|
|
188
|
-
|
175
|
+
Karafka.monitor.instrument(
|
176
|
+
'client.pause',
|
177
|
+
caller: self,
|
178
|
+
subscription_group: @subscription_group,
|
179
|
+
topic: topic,
|
180
|
+
partition: partition,
|
181
|
+
offset: offset
|
182
|
+
)
|
189
183
|
|
190
|
-
|
184
|
+
@paused_tpls[topic][partition] = tpl
|
191
185
|
|
192
|
-
|
193
|
-
|
194
|
-
|
186
|
+
@kafka.pause(tpl)
|
187
|
+
internal_seek(pause_msg)
|
188
|
+
end
|
195
189
|
end
|
196
190
|
|
197
191
|
# Resumes processing of a give topic partition after it was paused.
|
@@ -199,33 +193,31 @@ module Karafka
|
|
199
193
|
# @param topic [String] topic name
|
200
194
|
# @param partition [Integer] partition
|
201
195
|
def resume(topic, partition)
|
202
|
-
@mutex.
|
203
|
-
|
204
|
-
return if @closed
|
196
|
+
@mutex.synchronize do
|
197
|
+
return if @closed
|
205
198
|
|
206
|
-
|
207
|
-
|
199
|
+
# We now commit offsets on rebalances, thus we can do it async just to make sure
|
200
|
+
internal_commit_offsets(async: true)
|
208
201
|
|
209
|
-
|
210
|
-
|
202
|
+
# If we were not able, let's try to reuse the one we have (if we have)
|
203
|
+
tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
|
211
204
|
|
212
|
-
|
205
|
+
return unless tpl
|
213
206
|
|
214
|
-
|
215
|
-
|
216
|
-
|
207
|
+
# If we did not have it, it means we never paused this partition, thus no resume should
|
208
|
+
# happen in the first place
|
209
|
+
return unless @paused_tpls[topic].delete(partition)
|
217
210
|
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
211
|
+
Karafka.monitor.instrument(
|
212
|
+
'client.resume',
|
213
|
+
caller: self,
|
214
|
+
subscription_group: @subscription_group,
|
215
|
+
topic: topic,
|
216
|
+
partition: partition
|
217
|
+
)
|
225
218
|
|
226
|
-
|
227
|
-
|
228
|
-
@mutex.unlock
|
219
|
+
@kafka.resume(tpl)
|
220
|
+
end
|
229
221
|
end
|
230
222
|
|
231
223
|
# Gracefully stops topic consumption.
|
@@ -262,11 +254,9 @@ module Karafka
|
|
262
254
|
def reset
|
263
255
|
close
|
264
256
|
|
265
|
-
@
|
266
|
-
|
267
|
-
|
268
|
-
@kafka = build_consumer
|
269
|
-
end
|
257
|
+
@closed = false
|
258
|
+
@paused_tpls.clear
|
259
|
+
@kafka = build_consumer
|
270
260
|
end
|
271
261
|
|
272
262
|
# Runs a single poll ignoring all the potential errors
|
@@ -323,28 +313,55 @@ module Karafka
|
|
323
313
|
raise e
|
324
314
|
end
|
325
315
|
|
316
|
+
# Non-mutexed seek that should be used only internally. Outside we expose `#seek` that is
|
317
|
+
# wrapped with a mutex.
|
318
|
+
#
|
319
|
+
# @param message [Messages::Message, Messages::Seek] message to which we want to seek to.
|
320
|
+
# It can have the time based offset.
|
321
|
+
def internal_seek(message)
|
322
|
+
# If the seek message offset is in a time format, we need to find the closest "real"
|
323
|
+
# offset matching before we seek
|
324
|
+
if message.offset.is_a?(Time)
|
325
|
+
tpl = ::Rdkafka::Consumer::TopicPartitionList.new
|
326
|
+
tpl.add_topic_and_partitions_with_offsets(
|
327
|
+
message.topic,
|
328
|
+
message.partition => message.offset
|
329
|
+
)
|
330
|
+
|
331
|
+
# Now we can overwrite the seek message offset with our resolved offset and we can
|
332
|
+
# then seek to the appropriate message
|
333
|
+
# We set the timeout to 2_000 to make sure that remote clusters handle this well
|
334
|
+
real_offsets = @kafka.offsets_for_times(tpl, TPL_REQUEST_TIMEOUT)
|
335
|
+
detected_partition = real_offsets.to_h.dig(message.topic, message.partition)
|
336
|
+
|
337
|
+
# There always needs to be an offset. In case we seek into the future, where there
|
338
|
+
# are no offsets yet, we get -1 which indicates the most recent offset
|
339
|
+
# We should always detect offset, whether it is 0, -1 or a corresponding
|
340
|
+
message.offset = detected_partition&.offset || raise(Errors::InvalidTimeBasedOffsetError)
|
341
|
+
end
|
342
|
+
|
343
|
+
@kafka.seek(message)
|
344
|
+
end
|
345
|
+
|
326
346
|
# Commits the stored offsets in a sync way and closes the consumer.
|
327
347
|
def close
|
328
348
|
# Allow only one client to be closed at the same time
|
329
349
|
SHUTDOWN_MUTEX.synchronize do
|
330
|
-
#
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
# @note We do not clear rebalance manager here as we may still have revocation info
|
346
|
-
# here that we want to consider valid prior to running another reconnection
|
347
|
-
end
|
350
|
+
# Once client is closed, we should not close it again
|
351
|
+
# This could only happen in case of a race-condition when forceful shutdown happens
|
352
|
+
# and triggers this from a different thread
|
353
|
+
return if @closed
|
354
|
+
|
355
|
+
@closed = true
|
356
|
+
|
357
|
+
# Remove callbacks runners that were registered
|
358
|
+
::Karafka::Core::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
|
359
|
+
::Karafka::Core::Instrumentation.error_callbacks.delete(@subscription_group.id)
|
360
|
+
|
361
|
+
@kafka.close
|
362
|
+
@buffer.clear
|
363
|
+
# @note We do not clear rebalance manager here as we may still have revocation info
|
364
|
+
# here that we want to consider valid prior to running another reconnection
|
348
365
|
end
|
349
366
|
end
|
350
367
|
|
data/lib/karafka/errors.rb
CHANGED
@@ -48,6 +48,9 @@ module Karafka
|
|
48
48
|
StrategyNotFoundError = Class.new(BaseError)
|
49
49
|
|
50
50
|
# This should never happen. Please open an issue if it does.
|
51
|
-
|
51
|
+
InvalidRealOffsetUsageError = Class.new(BaseError)
|
52
|
+
|
53
|
+
# This should never happen. Please open an issue if it does.
|
54
|
+
InvalidTimeBasedOffsetError = Class.new(BaseError)
|
52
55
|
end
|
53
56
|
end
|
@@ -4,6 +4,9 @@ module Karafka
|
|
4
4
|
module Messages
|
5
5
|
# "Fake" message that we use as an abstraction layer when seeking back.
|
6
6
|
# This allows us to encapsulate a seek with a simple abstraction
|
7
|
+
#
|
8
|
+
# @note `#offset` can be either the offset value or the time of the offset
|
9
|
+
# (first equal or greater)
|
7
10
|
Seek = Struct.new(:topic, :partition, :offset)
|
8
11
|
end
|
9
12
|
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
class Iterator
|
17
|
+
# There are various ways you can provide topics information for iterating.
|
18
|
+
#
|
19
|
+
# This mapper normalizes this data, resolves offsets and maps the time based offsets into
|
20
|
+
# appropriate once
|
21
|
+
#
|
22
|
+
# Following formats are accepted:
|
23
|
+
#
|
24
|
+
# - 'topic1' - just a string with one topic name
|
25
|
+
# - ['topic1', 'topic2'] - just the names
|
26
|
+
# - { 'topic1' => -100 } - names with negative lookup offset
|
27
|
+
# - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
|
28
|
+
# - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
|
29
|
+
# - { 'topic1' => 100 } - means we run all partitions from the offset 100
|
30
|
+
# - { 'topic1' => Time.now - 60 } - we run all partitions from the message from 60s ago
|
31
|
+
# - { 'topic1' => { 1 => Time.now - 60 } } - partition1 from message 60s ago
|
32
|
+
#
|
33
|
+
class Expander
|
34
|
+
# Expands topics to which we want to subscribe with partitions information in case this
|
35
|
+
# info is not provided.
|
36
|
+
#
|
37
|
+
# @param topics [Array, Hash, String] topics definitions
|
38
|
+
# @return [Hash] expanded and normalized requested topics and partitions data
|
39
|
+
def call(topics)
|
40
|
+
expanded = Hash.new { |h, k| h[k] = {} }
|
41
|
+
|
42
|
+
normalize_format(topics).map do |topic, details|
|
43
|
+
if details.is_a?(Hash)
|
44
|
+
details.each do |partition, offset|
|
45
|
+
expanded[topic][partition] = offset
|
46
|
+
end
|
47
|
+
else
|
48
|
+
partition_count(topic).times do |partition|
|
49
|
+
# If no offsets are provided, we just start from zero
|
50
|
+
expanded[topic][partition] = details || 0
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
expanded
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
# Input can be provided in multiple formats. Here we normalize it to one (hash).
|
61
|
+
#
|
62
|
+
# @param topics [Array, Hash, String] requested topics
|
63
|
+
# @return [Hash] normalized hash with topics data
|
64
|
+
def normalize_format(topics)
|
65
|
+
# Simplification for the single topic case
|
66
|
+
topics = [topics] if topics.is_a?(String)
|
67
|
+
|
68
|
+
# If we've got just array with topics, we need to convert that into a representation
|
69
|
+
# that we can expand with offsets
|
70
|
+
topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
|
71
|
+
# We remap by creating new hash, just in case the hash came as the argument for this
|
72
|
+
# expanded. We do not want to modify user provided hash
|
73
|
+
topics.transform_keys(&:to_s)
|
74
|
+
end
|
75
|
+
|
76
|
+
# List of topics with their partition information for expansion
|
77
|
+
# We cache it so we do not have to run consecutive requests to obtain data about multiple
|
78
|
+
# topics
|
79
|
+
def topics
|
80
|
+
@topics ||= Admin.cluster_info.topics
|
81
|
+
end
|
82
|
+
|
83
|
+
# @param name [String] topic name
|
84
|
+
# @return [Integer] number of partitions of the topic we want to iterate over
|
85
|
+
def partition_count(name)
|
86
|
+
topics
|
87
|
+
.find { |topic| topic.fetch(:topic_name) == name }
|
88
|
+
.tap { |topic| topic || raise(Errors::TopicNotFoundError, name) }
|
89
|
+
.fetch(:partitions)
|
90
|
+
.count
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
class Iterator
|
17
|
+
# Max time for a TPL request. We increase it to compensate for remote clusters latency
|
18
|
+
TPL_REQUEST_TIMEOUT = 2_000
|
19
|
+
|
20
|
+
private_constant :TPL_REQUEST_TIMEOUT
|
21
|
+
|
22
|
+
# Because we have various formats in which we can provide the offsets, before we can
|
23
|
+
# subscribe to them, there needs to be a bit of normalization.
|
24
|
+
#
|
25
|
+
# For some of the cases, we need to go to Kafka and get the real offsets or watermarks.
|
26
|
+
#
|
27
|
+
# This builder resolves that and builds a tpl to which we can safely subscribe the way
|
28
|
+
# we want it.
|
29
|
+
class TplBuilder
|
30
|
+
# @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
|
31
|
+
# @param expanded_topics [Hash] hash with expanded and normalized topics data
|
32
|
+
def initialize(consumer, expanded_topics)
|
33
|
+
@consumer = consumer
|
34
|
+
@expanded_topics = expanded_topics
|
35
|
+
@mapped_topics = Hash.new { |h, k| h[k] = {} }
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [Rdkafka::Consumer::TopicPartitionList] final tpl we can use to subscribe
|
39
|
+
def call
|
40
|
+
resolve_partitions_without_offsets
|
41
|
+
resolve_partitions_with_exact_offsets
|
42
|
+
resolve_partitions_with_negative_offsets
|
43
|
+
resolve_partitions_with_time_offsets
|
44
|
+
|
45
|
+
# Final tpl with all the data
|
46
|
+
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
47
|
+
|
48
|
+
@mapped_topics.each do |name, partitions|
|
49
|
+
tpl.add_topic_and_partitions_with_offsets(name, partitions)
|
50
|
+
end
|
51
|
+
|
52
|
+
tpl
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
# First we expand on those partitions that do not have offsets defined.
|
58
|
+
# When we operate in case like this, we just start from beginning
|
59
|
+
def resolve_partitions_without_offsets
|
60
|
+
@expanded_topics.each do |name, partitions|
|
61
|
+
# We can here only about the case where we have partitions without offsets
|
62
|
+
next unless partitions.is_a?(Array) || partitions.is_a?(Range)
|
63
|
+
|
64
|
+
# When no offsets defined, we just start from zero
|
65
|
+
@mapped_topics[name] = partitions.map { |partition| [partition, 0] }.to_h
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# If we get exact numeric offsets, we can just start from them without any extra work
|
70
|
+
def resolve_partitions_with_exact_offsets
|
71
|
+
@expanded_topics.each do |name, partitions|
|
72
|
+
next unless partitions.is_a?(Hash)
|
73
|
+
|
74
|
+
partitions.each do |partition, offset|
|
75
|
+
# Skip negative and time based offsets
|
76
|
+
next unless offset.is_a?(Integer) && offset >= 0
|
77
|
+
|
78
|
+
# Exact offsets can be used as they are
|
79
|
+
# No need for extra operations
|
80
|
+
@mapped_topics[name][partition] = offset
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# If the offsets are negative, it means we want to fetch N last messages and we need to
|
86
|
+
# figure out the appropriate offsets
|
87
|
+
#
|
88
|
+
# We do it by getting the watermark offsets and just calculating it. This means that for
|
89
|
+
# heavily compacted topics, this may return less than the desired number but it is a
|
90
|
+
# limitation that is documented.
|
91
|
+
def resolve_partitions_with_negative_offsets
|
92
|
+
@expanded_topics.each do |name, partitions|
|
93
|
+
next unless partitions.is_a?(Hash)
|
94
|
+
|
95
|
+
partitions.each do |partition, offset|
|
96
|
+
# Care only about negative offsets (last n messages)
|
97
|
+
next unless offset.is_a?(Integer) && offset.negative?
|
98
|
+
|
99
|
+
_, high_watermark_offset = @consumer.query_watermark_offsets(name, partition)
|
100
|
+
# We add because this offset is negative
|
101
|
+
@mapped_topics[name][partition] = high_watermark_offset + offset
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# For time based offsets we first need to aggregate them and request the proper offsets.
|
107
|
+
# We want to get all times in one go for all tpls defined with times, so we accumulate
|
108
|
+
# them here and we will make one sync request to kafka for all.
|
109
|
+
def resolve_partitions_with_time_offsets
|
110
|
+
time_tpl = Rdkafka::Consumer::TopicPartitionList.new
|
111
|
+
|
112
|
+
# First we need to collect the time based once
|
113
|
+
@expanded_topics.each do |name, partitions|
|
114
|
+
next unless partitions.is_a?(Hash)
|
115
|
+
|
116
|
+
time_based = {}
|
117
|
+
|
118
|
+
partitions.each do |partition, offset|
|
119
|
+
next unless offset.is_a?(Time)
|
120
|
+
|
121
|
+
time_based[partition] = offset
|
122
|
+
end
|
123
|
+
|
124
|
+
next if time_based.empty?
|
125
|
+
|
126
|
+
time_tpl.add_topic_and_partitions_with_offsets(name, time_based)
|
127
|
+
end
|
128
|
+
|
129
|
+
# If there were no time-based, no need to query Kafka
|
130
|
+
return if time_tpl.empty?
|
131
|
+
|
132
|
+
real_offsets = @consumer.offsets_for_times(time_tpl, TPL_REQUEST_TIMEOUT)
|
133
|
+
|
134
|
+
real_offsets.to_h.each do |name, results|
|
135
|
+
results.each do |result|
|
136
|
+
raise(Errors::InvalidTimeBasedOffsetError) unless result
|
137
|
+
|
138
|
+
@mapped_topics[name][result.partition] = result.offset
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
data/lib/karafka/pro/iterator.rb
CHANGED
@@ -50,7 +50,7 @@ module Karafka
|
|
50
50
|
settings: { 'auto.offset.reset': 'beginning' },
|
51
51
|
yield_nil: false
|
52
52
|
)
|
53
|
-
@topics_with_partitions =
|
53
|
+
@topics_with_partitions = Expander.new.call(topics)
|
54
54
|
|
55
55
|
@routing_topics = @topics_with_partitions.map do |name, _|
|
56
56
|
[name, ::Karafka::Routing::Router.find_or_initialize_by_name(name)]
|
@@ -71,7 +71,7 @@ module Karafka
|
|
71
71
|
# only eat up resources.
|
72
72
|
def each
|
73
73
|
Admin.with_consumer(@settings) do |consumer|
|
74
|
-
tpl =
|
74
|
+
tpl = TplBuilder.new(consumer, @topics_with_partitions).call
|
75
75
|
consumer.assign(tpl)
|
76
76
|
|
77
77
|
# We need this for self-referenced APIs like pausing
|
@@ -131,43 +131,6 @@ module Karafka
|
|
131
131
|
|
132
132
|
private
|
133
133
|
|
134
|
-
# Expands topics to which we want to subscribe with partitions information in case this
|
135
|
-
# info is not provided. For our convenience we want to support 5 formats of defining
|
136
|
-
# the subscribed topics:
|
137
|
-
#
|
138
|
-
# - 'topic1' - just a string with one topic name
|
139
|
-
# - ['topic1', 'topic2'] - just the names
|
140
|
-
# - { 'topic1' => -100 } - names with negative lookup offset
|
141
|
-
# - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
|
142
|
-
# - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
|
143
|
-
#
|
144
|
-
# @param topics [Array, Hash] topics definitions
|
145
|
-
# @return [Hash] hash with topics containing partitions definitions
|
146
|
-
def expand_topics_with_partitions(topics)
|
147
|
-
# Simplification for the single topic case
|
148
|
-
topics = [topics] if topics.is_a?(String)
|
149
|
-
# If we've got just array with topics, we need to convert that into a representation
|
150
|
-
# that we can expand with offsets
|
151
|
-
topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
|
152
|
-
|
153
|
-
expanded = Hash.new { |h, k| h[k] = {} }
|
154
|
-
|
155
|
-
topics.map do |topic, details|
|
156
|
-
if details.is_a?(Hash)
|
157
|
-
details.each do |partition, offset|
|
158
|
-
expanded[topic][partition] = offset
|
159
|
-
end
|
160
|
-
else
|
161
|
-
partition_count(topic.to_s).times do |partition|
|
162
|
-
# If no offsets are provided, we just start from zero
|
163
|
-
expanded[topic][partition] = details || 0
|
164
|
-
end
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
|
-
expanded
|
169
|
-
end
|
170
|
-
|
171
134
|
# @param timeout [Integer] timeout in ms
|
172
135
|
# @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
|
173
136
|
def poll(timeout)
|
@@ -200,54 +163,6 @@ module Karafka
|
|
200
163
|
def done?
|
201
164
|
@stopped_partitions >= @total_partitions
|
202
165
|
end
|
203
|
-
|
204
|
-
# Builds the tpl representing all the subscriptions we want to run
|
205
|
-
#
|
206
|
-
# Additionally for negative offsets, does the watermark calculation where to start
|
207
|
-
#
|
208
|
-
# @param consumer [Rdkafka::Consumer] consumer we need in case of negative offsets as
|
209
|
-
# negative are going to be used to do "give me last X". We use the already initialized
|
210
|
-
# consumer instance, not to start another one again.
|
211
|
-
# @return [Rdkafka::Consumer::TopicPartitionList]
|
212
|
-
def tpl_with_expanded_offsets(consumer)
|
213
|
-
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
214
|
-
|
215
|
-
@topics_with_partitions.each do |name, partitions|
|
216
|
-
partitions_with_offsets = {}
|
217
|
-
|
218
|
-
# When no offsets defined, we just start from zero
|
219
|
-
if partitions.is_a?(Array) || partitions.is_a?(Range)
|
220
|
-
partitions_with_offsets = partitions.map { |partition| [partition, 0] }.to_h
|
221
|
-
else
|
222
|
-
# When offsets defined, we can either use them if positive or expand and move back
|
223
|
-
# in case of negative (-1000 means last 1000 messages, etc)
|
224
|
-
partitions.each do |partition, offset|
|
225
|
-
if offset.negative?
|
226
|
-
_, high_watermark_offset = consumer.query_watermark_offsets(name, partition)
|
227
|
-
# We add because this offset is negative
|
228
|
-
partitions_with_offsets[partition] = high_watermark_offset + offset
|
229
|
-
else
|
230
|
-
partitions_with_offsets[partition] = offset
|
231
|
-
end
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
|
-
tpl.add_topic_and_partitions_with_offsets(name, partitions_with_offsets)
|
236
|
-
end
|
237
|
-
|
238
|
-
tpl
|
239
|
-
end
|
240
|
-
|
241
|
-
# @param name [String] topic name
|
242
|
-
# @return [Integer] number of partitions of the topic we want to iterate over
|
243
|
-
def partition_count(name)
|
244
|
-
Admin
|
245
|
-
.cluster_info
|
246
|
-
.topics
|
247
|
-
.find { |topic| topic.fetch(:topic_name) == name }
|
248
|
-
.fetch(:partitions)
|
249
|
-
.count
|
250
|
-
end
|
251
166
|
end
|
252
167
|
end
|
253
168
|
end
|
@@ -81,6 +81,7 @@ module Karafka
|
|
81
81
|
# The first message we do need to get next time we poll. We use the minimum not to jump
|
82
82
|
# accidentally by over any.
|
83
83
|
# @return [Karafka::Messages::Message, nil] cursor message or nil if none
|
84
|
+
# @note Cursor message can also return the offset in the time format
|
84
85
|
def cursor
|
85
86
|
return nil unless active?
|
86
87
|
|
@@ -44,7 +44,9 @@ module Karafka
|
|
44
44
|
if coordinator.filtered? && !revoked?
|
45
45
|
handle_post_filtering
|
46
46
|
elsif !revoked?
|
47
|
-
seek
|
47
|
+
# no need to check for manual seek because AJ consumer is internal and
|
48
|
+
# fully controlled by us
|
49
|
+
seek(coordinator.seek_offset, false)
|
48
50
|
resume
|
49
51
|
else
|
50
52
|
resume
|
@@ -50,7 +50,9 @@ module Karafka
|
|
50
50
|
if coordinator.filtered? && !revoked?
|
51
51
|
handle_post_filtering
|
52
52
|
elsif !revoked?
|
53
|
-
seek
|
53
|
+
# no need to check for manual seek because AJ consumer is internal and
|
54
|
+
# fully controlled by us
|
55
|
+
seek(coordinator.seek_offset, false)
|
54
56
|
resume
|
55
57
|
else
|
56
58
|
resume
|
@@ -42,7 +42,9 @@ module Karafka
|
|
42
42
|
if coordinator.success?
|
43
43
|
coordinator.pause_tracker.reset
|
44
44
|
|
45
|
-
seek
|
45
|
+
# no need to check for manual seek because AJ consumer is internal and
|
46
|
+
# fully controlled by us
|
47
|
+
seek(coordinator.seek_offset, false) unless revoked?
|
46
48
|
|
47
49
|
resume
|
48
50
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -46,7 +46,9 @@ module Karafka
|
|
46
46
|
# Since we have VP here we do not commit intermediate offsets and need to commit
|
47
47
|
# them here. We do commit in collapsed mode but this is generalized.
|
48
48
|
mark_as_consumed(last_group_message) unless revoked?
|
49
|
-
seek
|
49
|
+
# no need to check for manual seek because AJ consumer is internal and
|
50
|
+
# fully controlled by us
|
51
|
+
seek(coordinator.seek_offset, false) unless revoked?
|
50
52
|
|
51
53
|
resume
|
52
54
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -46,7 +46,9 @@ module Karafka
|
|
46
46
|
if coordinator.filtered? && !revoked?
|
47
47
|
handle_post_filtering
|
48
48
|
elsif !revoked?
|
49
|
-
seek
|
49
|
+
# no need to check for manual seek because AJ consumer is internal and
|
50
|
+
# fully controlled by us
|
51
|
+
seek(coordinator.seek_offset, false)
|
50
52
|
resume
|
51
53
|
else
|
52
54
|
resume
|
@@ -48,7 +48,10 @@ module Karafka
|
|
48
48
|
coordinator.pause_tracker.reset
|
49
49
|
|
50
50
|
mark_as_consumed(last_group_message) unless revoked?
|
51
|
-
|
51
|
+
|
52
|
+
# no need to check for manual seek because AJ consumer is internal and
|
53
|
+
# fully controlled by us
|
54
|
+
seek(coordinator.seek_offset, false) unless revoked?
|
52
55
|
|
53
56
|
resume
|
54
57
|
else
|
@@ -42,8 +42,8 @@ module Karafka
|
|
42
42
|
|
43
43
|
if coordinator.filtered? && !revoked?
|
44
44
|
handle_post_filtering
|
45
|
-
elsif !revoked?
|
46
|
-
seek(last_group_message.offset + 1)
|
45
|
+
elsif !revoked? && !coordinator.manual_seek?
|
46
|
+
seek(last_group_message.offset + 1, false)
|
47
47
|
resume
|
48
48
|
else
|
49
49
|
resume
|
@@ -38,7 +38,8 @@ module Karafka
|
|
38
38
|
return if coordinator.manual_pause?
|
39
39
|
|
40
40
|
mark_as_consumed(last_group_message) unless revoked?
|
41
|
-
|
41
|
+
# We should not overwrite user manual seel request with our seek
|
42
|
+
seek(coordinator.seek_offset, false) unless revoked? || coordinator.manual_seek?
|
42
43
|
|
43
44
|
resume
|
44
45
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -37,7 +37,9 @@ module Karafka
|
|
37
37
|
|
38
38
|
return if coordinator.manual_pause?
|
39
39
|
|
40
|
-
|
40
|
+
unless revoked? || coordinator.manual_seek?
|
41
|
+
seek(last_group_message.offset + 1, false)
|
42
|
+
end
|
41
43
|
|
42
44
|
resume
|
43
45
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -70,6 +70,10 @@ module Karafka
|
|
70
70
|
when :skip
|
71
71
|
nil
|
72
72
|
when :seek
|
73
|
+
# User direct actions take priority over automatic operations
|
74
|
+
# If we've already seeked we can just resume operations, nothing extra needed
|
75
|
+
return resume if coordinator.manual_seek?
|
76
|
+
|
73
77
|
throttle_message = filter.cursor
|
74
78
|
|
75
79
|
Karafka.monitor.instrument(
|
@@ -77,11 +81,14 @@ module Karafka
|
|
77
81
|
caller: self,
|
78
82
|
message: throttle_message
|
79
83
|
) do
|
80
|
-
seek(throttle_message.offset)
|
84
|
+
seek(throttle_message.offset, false)
|
81
85
|
end
|
82
86
|
|
83
87
|
resume
|
84
88
|
when :pause
|
89
|
+
# User direct actions take priority over automatic operations
|
90
|
+
return nil if coordinator.manual_pause?
|
91
|
+
|
85
92
|
throttle_message = filter.cursor
|
86
93
|
|
87
94
|
Karafka.monitor.instrument(
|
@@ -53,7 +53,7 @@ module Karafka
|
|
53
53
|
return if coordinator.manual_pause?
|
54
54
|
|
55
55
|
mark_as_consumed(last_group_message) unless revoked?
|
56
|
-
seek(coordinator.seek_offset) unless revoked?
|
56
|
+
seek(coordinator.seek_offset, false) unless revoked? || coordinator.manual_seek?
|
57
57
|
|
58
58
|
resume
|
59
59
|
else
|
@@ -45,10 +45,10 @@ module Karafka
|
|
45
45
|
# If still not revoked and was throttled, we need to apply throttling logic
|
46
46
|
if coordinator.filtered? && !revoked?
|
47
47
|
handle_post_filtering
|
48
|
-
elsif !revoked?
|
48
|
+
elsif !revoked? && !coordinator.manual_seek?
|
49
49
|
# If not revoked and not throttled, we move to where we were suppose to and
|
50
50
|
# resume
|
51
|
-
seek(coordinator.seek_offset)
|
51
|
+
seek(coordinator.seek_offset, false)
|
52
52
|
resume
|
53
53
|
else
|
54
54
|
resume
|
@@ -43,10 +43,10 @@ module Karafka
|
|
43
43
|
# If still not revoked and was throttled, we need to apply filtering logic
|
44
44
|
if coordinator.filtered? && !revoked?
|
45
45
|
handle_post_filtering
|
46
|
-
elsif !revoked?
|
46
|
+
elsif !revoked? && !coordinator.manual_seek?
|
47
47
|
# If not revoked and not throttled, we move to where we were suppose to and
|
48
48
|
# resume
|
49
|
-
seek(last_group_message.offset + 1)
|
49
|
+
seek(last_group_message.offset + 1, false)
|
50
50
|
resume
|
51
51
|
else
|
52
52
|
resume
|
@@ -118,7 +118,7 @@ module Karafka
|
|
118
118
|
|
119
119
|
# @return [Messages::Seek] markable message for real offset marking
|
120
120
|
def markable
|
121
|
-
raise Errors::
|
121
|
+
raise Errors::InvalidRealOffsetUsageError unless markable?
|
122
122
|
|
123
123
|
Messages::Seek.new(
|
124
124
|
@topic,
|
@@ -23,6 +23,7 @@ module Karafka
|
|
23
23
|
@consumptions = {}
|
24
24
|
@running_jobs = 0
|
25
25
|
@manual_pause = false
|
26
|
+
@manual_seek = false
|
26
27
|
@mutex = Mutex.new
|
27
28
|
@marked = false
|
28
29
|
@failure = false
|
@@ -41,6 +42,9 @@ module Karafka
|
|
41
42
|
# When starting to run, no pause is expected and no manual pause as well
|
42
43
|
@manual_pause = false
|
43
44
|
|
45
|
+
# No user invoked seeks on a new run
|
46
|
+
@manual_seek = false
|
47
|
+
|
44
48
|
# We set it on the first encounter and never again, because then the offset setting
|
45
49
|
# should be up to the consumers logic (our or the end user)
|
46
50
|
# Seek offset needs to be always initialized as for case where manual offset management
|
@@ -148,6 +152,16 @@ module Karafka
|
|
148
152
|
@pause_tracker.paused? && @manual_pause
|
149
153
|
end
|
150
154
|
|
155
|
+
# Marks seek as manual for coordination purposes
|
156
|
+
def manual_seek
|
157
|
+
@manual_seek = true
|
158
|
+
end
|
159
|
+
|
160
|
+
# @return [Boolean] did a user invoke seek in the current operations scope
|
161
|
+
def manual_seek?
|
162
|
+
@manual_seek
|
163
|
+
end
|
164
|
+
|
151
165
|
# Allows to run synchronized (locked) code that can operate in between virtual partitions
|
152
166
|
# @param block [Proc] code we want to run in the synchronized mode
|
153
167
|
def synchronize(&block)
|
data/lib/karafka/railtie.rb
CHANGED
@@ -70,11 +70,11 @@ if Karafka.rails?
|
|
70
70
|
# We can have many listeners, but it does not matter in which we will reload the code
|
71
71
|
# as long as all the consumers will be re-created as Rails reload is thread-safe
|
72
72
|
::Karafka::App.monitor.subscribe('connection.listener.fetch_loop') do
|
73
|
-
# Reload code each time there is a change in the code
|
74
|
-
next unless Rails.application.reloaders.any?(&:updated?)
|
75
73
|
# If consumer persistence is enabled, no reason to reload because we will still keep
|
76
74
|
# old consumer instances in memory.
|
77
75
|
next if Karafka::App.config.consumer_persistence
|
76
|
+
# Reload code each time there is a change in the code
|
77
|
+
next unless Rails.application.reloaders.any?(&:updated?)
|
78
78
|
|
79
79
|
Rails.application.reloader.reload!
|
80
80
|
end
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
@@ -1,5 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
�,S�����C8�<�OqCy?f��S3���m�VɂN�DDZ@���?�����\ *�ek�L��
|
4
|
-
�b35�'���r�j�<>4vI1��(P��)27�|�:��� c�y4���8X9�۟��
|
5
|
-
�0S�@D
|
1
|
+
mf̈́x1��2�'�lj!���.����U��KL1.�fv��$����N��Oi���U/)3lS��ݑ�%}�aE��}/J\�_��1In� N�t+����mV��G�����C��_Y;��X3�Y�8(J,}���=���:��p �(��2��{:�O�K"$��7W�A�5k��|l���]�Bd^�I�4*\���9���PL'������ޝmh�]o�n(�� ��e˱վ
|
2
|
+
PJ�5���
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
|
36
36
|
MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2023-06-
|
38
|
+
date: 2023-06-29 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
requirements:
|
44
44
|
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: 2.1.
|
46
|
+
version: 2.1.1
|
47
47
|
- - "<"
|
48
48
|
- !ruby/object:Gem::Version
|
49
49
|
version: 2.2.0
|
@@ -53,7 +53,7 @@ dependencies:
|
|
53
53
|
requirements:
|
54
54
|
- - ">="
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: 2.1.
|
56
|
+
version: 2.1.1
|
57
57
|
- - "<"
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: 2.2.0
|
@@ -77,7 +77,7 @@ dependencies:
|
|
77
77
|
requirements:
|
78
78
|
- - ">="
|
79
79
|
- !ruby/object:Gem::Version
|
80
|
-
version: 2.6.
|
80
|
+
version: 2.6.2
|
81
81
|
- - "<"
|
82
82
|
- !ruby/object:Gem::Version
|
83
83
|
version: 3.0.0
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
requirements:
|
88
88
|
- - ">="
|
89
89
|
- !ruby/object:Gem::Version
|
90
|
-
version: 2.6.
|
90
|
+
version: 2.6.2
|
91
91
|
- - "<"
|
92
92
|
- !ruby/object:Gem::Version
|
93
93
|
version: 3.0.0
|
@@ -226,6 +226,8 @@ files:
|
|
226
226
|
- lib/karafka/pro/encryption/messages/parser.rb
|
227
227
|
- lib/karafka/pro/encryption/setup/config.rb
|
228
228
|
- lib/karafka/pro/iterator.rb
|
229
|
+
- lib/karafka/pro/iterator/expander.rb
|
230
|
+
- lib/karafka/pro/iterator/tpl_builder.rb
|
229
231
|
- lib/karafka/pro/loader.rb
|
230
232
|
- lib/karafka/pro/performance_tracker.rb
|
231
233
|
- lib/karafka/pro/processing/collapser.rb
|
metadata.gz.sig
CHANGED
Binary file
|