karafka 2.1.4 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +28 -0
- data/Gemfile.lock +20 -20
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin.rb +37 -4
- data/lib/karafka/base_consumer.rb +21 -5
- data/lib/karafka/connection/client.rb +118 -95
- data/lib/karafka/connection/rebalance_manager.rb +2 -4
- data/lib/karafka/errors.rb +4 -1
- data/lib/karafka/messages/builders/message.rb +0 -3
- data/lib/karafka/messages/seek.rb +3 -0
- data/lib/karafka/patches/rdkafka/bindings.rb +4 -6
- data/lib/karafka/pro/iterator/expander.rb +95 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +145 -0
- data/lib/karafka/pro/iterator.rb +2 -87
- data/lib/karafka/pro/processing/filters_applier.rb +1 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +4 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +2 -2
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +2 -2
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +2 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +8 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +2 -2
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +2 -2
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +3 -1
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +1 -1
- data/lib/karafka/processing/coordinator.rb +14 -0
- data/lib/karafka/processing/strategies/default.rb +27 -11
- data/lib/karafka/railtie.rb +2 -2
- data/lib/karafka/setup/attributes_map.rb +1 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +10 -9
- metadata.gz.sig +0 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +0 -22
@@ -49,14 +49,13 @@ module Karafka
|
|
49
49
|
#
|
50
50
|
# @param code [Integer]
|
51
51
|
# @param opaque [Rdkafka::Opaque]
|
52
|
-
# @param consumer [Rdkafka::Consumer]
|
53
52
|
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
54
|
-
def trigger_callbacks(code, opaque,
|
53
|
+
def trigger_callbacks(code, opaque, tpl)
|
55
54
|
case code
|
56
55
|
when RB::RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS
|
57
|
-
opaque.call_on_partitions_assigned(
|
56
|
+
opaque.call_on_partitions_assigned(tpl)
|
58
57
|
when RB::RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS
|
59
|
-
opaque.call_on_partitions_revoked(
|
58
|
+
opaque.call_on_partitions_revoked(tpl)
|
60
59
|
end
|
61
60
|
rescue StandardError => e
|
62
61
|
Karafka.monitor.instrument(
|
@@ -92,9 +91,8 @@ module Karafka
|
|
92
91
|
return unless opaque
|
93
92
|
|
94
93
|
tpl = ::Rdkafka::Consumer::TopicPartitionList.from_native_tpl(partitions_ptr).freeze
|
95
|
-
consumer = ::Rdkafka::Consumer.new(client_ptr)
|
96
94
|
|
97
|
-
pr.trigger_callbacks(code, opaque,
|
95
|
+
pr.trigger_callbacks(code, opaque, tpl)
|
98
96
|
end
|
99
97
|
end
|
100
98
|
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
class Iterator
|
17
|
+
# There are various ways you can provide topics information for iterating.
|
18
|
+
#
|
19
|
+
# This mapper normalizes this data, resolves offsets and maps the time based offsets into
|
20
|
+
# appropriate once
|
21
|
+
#
|
22
|
+
# Following formats are accepted:
|
23
|
+
#
|
24
|
+
# - 'topic1' - just a string with one topic name
|
25
|
+
# - ['topic1', 'topic2'] - just the names
|
26
|
+
# - { 'topic1' => -100 } - names with negative lookup offset
|
27
|
+
# - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
|
28
|
+
# - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
|
29
|
+
# - { 'topic1' => 100 } - means we run all partitions from the offset 100
|
30
|
+
# - { 'topic1' => Time.now - 60 } - we run all partitions from the message from 60s ago
|
31
|
+
# - { 'topic1' => { 1 => Time.now - 60 } } - partition1 from message 60s ago
|
32
|
+
#
|
33
|
+
class Expander
|
34
|
+
# Expands topics to which we want to subscribe with partitions information in case this
|
35
|
+
# info is not provided.
|
36
|
+
#
|
37
|
+
# @param topics [Array, Hash, String] topics definitions
|
38
|
+
# @return [Hash] expanded and normalized requested topics and partitions data
|
39
|
+
def call(topics)
|
40
|
+
expanded = Hash.new { |h, k| h[k] = {} }
|
41
|
+
|
42
|
+
normalize_format(topics).map do |topic, details|
|
43
|
+
if details.is_a?(Hash)
|
44
|
+
details.each do |partition, offset|
|
45
|
+
expanded[topic][partition] = offset
|
46
|
+
end
|
47
|
+
else
|
48
|
+
partition_count(topic).times do |partition|
|
49
|
+
# If no offsets are provided, we just start from zero
|
50
|
+
expanded[topic][partition] = details || 0
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
expanded
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
# Input can be provided in multiple formats. Here we normalize it to one (hash).
|
61
|
+
#
|
62
|
+
# @param topics [Array, Hash, String] requested topics
|
63
|
+
# @return [Hash] normalized hash with topics data
|
64
|
+
def normalize_format(topics)
|
65
|
+
# Simplification for the single topic case
|
66
|
+
topics = [topics] if topics.is_a?(String)
|
67
|
+
|
68
|
+
# If we've got just array with topics, we need to convert that into a representation
|
69
|
+
# that we can expand with offsets
|
70
|
+
topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
|
71
|
+
# We remap by creating new hash, just in case the hash came as the argument for this
|
72
|
+
# expanded. We do not want to modify user provided hash
|
73
|
+
topics.transform_keys(&:to_s)
|
74
|
+
end
|
75
|
+
|
76
|
+
# List of topics with their partition information for expansion
|
77
|
+
# We cache it so we do not have to run consecutive requests to obtain data about multiple
|
78
|
+
# topics
|
79
|
+
def topics
|
80
|
+
@topics ||= Admin.cluster_info.topics
|
81
|
+
end
|
82
|
+
|
83
|
+
# @param name [String] topic name
|
84
|
+
# @return [Integer] number of partitions of the topic we want to iterate over
|
85
|
+
def partition_count(name)
|
86
|
+
topics
|
87
|
+
.find { |topic| topic.fetch(:topic_name) == name }
|
88
|
+
.tap { |topic| topic || raise(Errors::TopicNotFoundError, name) }
|
89
|
+
.fetch(:partitions)
|
90
|
+
.count
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
class Iterator
|
17
|
+
# Max time for a TPL request. We increase it to compensate for remote clusters latency
|
18
|
+
TPL_REQUEST_TIMEOUT = 2_000
|
19
|
+
|
20
|
+
private_constant :TPL_REQUEST_TIMEOUT
|
21
|
+
|
22
|
+
# Because we have various formats in which we can provide the offsets, before we can
|
23
|
+
# subscribe to them, there needs to be a bit of normalization.
|
24
|
+
#
|
25
|
+
# For some of the cases, we need to go to Kafka and get the real offsets or watermarks.
|
26
|
+
#
|
27
|
+
# This builder resolves that and builds a tpl to which we can safely subscribe the way
|
28
|
+
# we want it.
|
29
|
+
class TplBuilder
|
30
|
+
# @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
|
31
|
+
# @param expanded_topics [Hash] hash with expanded and normalized topics data
|
32
|
+
def initialize(consumer, expanded_topics)
|
33
|
+
@consumer = consumer
|
34
|
+
@expanded_topics = expanded_topics
|
35
|
+
@mapped_topics = Hash.new { |h, k| h[k] = {} }
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [Rdkafka::Consumer::TopicPartitionList] final tpl we can use to subscribe
|
39
|
+
def call
|
40
|
+
resolve_partitions_without_offsets
|
41
|
+
resolve_partitions_with_exact_offsets
|
42
|
+
resolve_partitions_with_negative_offsets
|
43
|
+
resolve_partitions_with_time_offsets
|
44
|
+
|
45
|
+
# Final tpl with all the data
|
46
|
+
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
47
|
+
|
48
|
+
@mapped_topics.each do |name, partitions|
|
49
|
+
tpl.add_topic_and_partitions_with_offsets(name, partitions)
|
50
|
+
end
|
51
|
+
|
52
|
+
tpl
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
# First we expand on those partitions that do not have offsets defined.
|
58
|
+
# When we operate in case like this, we just start from beginning
|
59
|
+
def resolve_partitions_without_offsets
|
60
|
+
@expanded_topics.each do |name, partitions|
|
61
|
+
# We can here only about the case where we have partitions without offsets
|
62
|
+
next unless partitions.is_a?(Array) || partitions.is_a?(Range)
|
63
|
+
|
64
|
+
# When no offsets defined, we just start from zero
|
65
|
+
@mapped_topics[name] = partitions.map { |partition| [partition, 0] }.to_h
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# If we get exact numeric offsets, we can just start from them without any extra work
|
70
|
+
def resolve_partitions_with_exact_offsets
|
71
|
+
@expanded_topics.each do |name, partitions|
|
72
|
+
next unless partitions.is_a?(Hash)
|
73
|
+
|
74
|
+
partitions.each do |partition, offset|
|
75
|
+
# Skip negative and time based offsets
|
76
|
+
next unless offset.is_a?(Integer) && offset >= 0
|
77
|
+
|
78
|
+
# Exact offsets can be used as they are
|
79
|
+
# No need for extra operations
|
80
|
+
@mapped_topics[name][partition] = offset
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# If the offsets are negative, it means we want to fetch N last messages and we need to
|
86
|
+
# figure out the appropriate offsets
|
87
|
+
#
|
88
|
+
# We do it by getting the watermark offsets and just calculating it. This means that for
|
89
|
+
# heavily compacted topics, this may return less than the desired number but it is a
|
90
|
+
# limitation that is documented.
|
91
|
+
def resolve_partitions_with_negative_offsets
|
92
|
+
@expanded_topics.each do |name, partitions|
|
93
|
+
next unless partitions.is_a?(Hash)
|
94
|
+
|
95
|
+
partitions.each do |partition, offset|
|
96
|
+
# Care only about negative offsets (last n messages)
|
97
|
+
next unless offset.is_a?(Integer) && offset.negative?
|
98
|
+
|
99
|
+
_, high_watermark_offset = @consumer.query_watermark_offsets(name, partition)
|
100
|
+
# We add because this offset is negative
|
101
|
+
@mapped_topics[name][partition] = high_watermark_offset + offset
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# For time based offsets we first need to aggregate them and request the proper offsets.
|
107
|
+
# We want to get all times in one go for all tpls defined with times, so we accumulate
|
108
|
+
# them here and we will make one sync request to kafka for all.
|
109
|
+
def resolve_partitions_with_time_offsets
|
110
|
+
time_tpl = Rdkafka::Consumer::TopicPartitionList.new
|
111
|
+
|
112
|
+
# First we need to collect the time based once
|
113
|
+
@expanded_topics.each do |name, partitions|
|
114
|
+
next unless partitions.is_a?(Hash)
|
115
|
+
|
116
|
+
time_based = {}
|
117
|
+
|
118
|
+
partitions.each do |partition, offset|
|
119
|
+
next unless offset.is_a?(Time)
|
120
|
+
|
121
|
+
time_based[partition] = offset
|
122
|
+
end
|
123
|
+
|
124
|
+
next if time_based.empty?
|
125
|
+
|
126
|
+
time_tpl.add_topic_and_partitions_with_offsets(name, time_based)
|
127
|
+
end
|
128
|
+
|
129
|
+
# If there were no time-based, no need to query Kafka
|
130
|
+
return if time_tpl.empty?
|
131
|
+
|
132
|
+
real_offsets = @consumer.offsets_for_times(time_tpl, TPL_REQUEST_TIMEOUT)
|
133
|
+
|
134
|
+
real_offsets.to_h.each do |name, results|
|
135
|
+
results.each do |result|
|
136
|
+
raise(Errors::InvalidTimeBasedOffsetError) unless result
|
137
|
+
|
138
|
+
@mapped_topics[name][result.partition] = result.offset
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
data/lib/karafka/pro/iterator.rb
CHANGED
@@ -50,7 +50,7 @@ module Karafka
|
|
50
50
|
settings: { 'auto.offset.reset': 'beginning' },
|
51
51
|
yield_nil: false
|
52
52
|
)
|
53
|
-
@topics_with_partitions =
|
53
|
+
@topics_with_partitions = Expander.new.call(topics)
|
54
54
|
|
55
55
|
@routing_topics = @topics_with_partitions.map do |name, _|
|
56
56
|
[name, ::Karafka::Routing::Router.find_or_initialize_by_name(name)]
|
@@ -71,7 +71,7 @@ module Karafka
|
|
71
71
|
# only eat up resources.
|
72
72
|
def each
|
73
73
|
Admin.with_consumer(@settings) do |consumer|
|
74
|
-
tpl =
|
74
|
+
tpl = TplBuilder.new(consumer, @topics_with_partitions).call
|
75
75
|
consumer.assign(tpl)
|
76
76
|
|
77
77
|
# We need this for self-referenced APIs like pausing
|
@@ -131,43 +131,6 @@ module Karafka
|
|
131
131
|
|
132
132
|
private
|
133
133
|
|
134
|
-
# Expands topics to which we want to subscribe with partitions information in case this
|
135
|
-
# info is not provided. For our convenience we want to support 5 formats of defining
|
136
|
-
# the subscribed topics:
|
137
|
-
#
|
138
|
-
# - 'topic1' - just a string with one topic name
|
139
|
-
# - ['topic1', 'topic2'] - just the names
|
140
|
-
# - { 'topic1' => -100 } - names with negative lookup offset
|
141
|
-
# - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
|
142
|
-
# - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
|
143
|
-
#
|
144
|
-
# @param topics [Array, Hash] topics definitions
|
145
|
-
# @return [Hash] hash with topics containing partitions definitions
|
146
|
-
def expand_topics_with_partitions(topics)
|
147
|
-
# Simplification for the single topic case
|
148
|
-
topics = [topics] if topics.is_a?(String)
|
149
|
-
# If we've got just array with topics, we need to convert that into a representation
|
150
|
-
# that we can expand with offsets
|
151
|
-
topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
|
152
|
-
|
153
|
-
expanded = Hash.new { |h, k| h[k] = {} }
|
154
|
-
|
155
|
-
topics.map do |topic, details|
|
156
|
-
if details.is_a?(Hash)
|
157
|
-
details.each do |partition, offset|
|
158
|
-
expanded[topic][partition] = offset
|
159
|
-
end
|
160
|
-
else
|
161
|
-
partition_count(topic.to_s).times do |partition|
|
162
|
-
# If no offsets are provided, we just start from zero
|
163
|
-
expanded[topic][partition] = details || 0
|
164
|
-
end
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
|
-
expanded
|
169
|
-
end
|
170
|
-
|
171
134
|
# @param timeout [Integer] timeout in ms
|
172
135
|
# @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
|
173
136
|
def poll(timeout)
|
@@ -200,54 +163,6 @@ module Karafka
|
|
200
163
|
def done?
|
201
164
|
@stopped_partitions >= @total_partitions
|
202
165
|
end
|
203
|
-
|
204
|
-
# Builds the tpl representing all the subscriptions we want to run
|
205
|
-
#
|
206
|
-
# Additionally for negative offsets, does the watermark calculation where to start
|
207
|
-
#
|
208
|
-
# @param consumer [Rdkafka::Consumer] consumer we need in case of negative offsets as
|
209
|
-
# negative are going to be used to do "give me last X". We use the already initialized
|
210
|
-
# consumer instance, not to start another one again.
|
211
|
-
# @return [Rdkafka::Consumer::TopicPartitionList]
|
212
|
-
def tpl_with_expanded_offsets(consumer)
|
213
|
-
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
214
|
-
|
215
|
-
@topics_with_partitions.each do |name, partitions|
|
216
|
-
partitions_with_offsets = {}
|
217
|
-
|
218
|
-
# When no offsets defined, we just start from zero
|
219
|
-
if partitions.is_a?(Array) || partitions.is_a?(Range)
|
220
|
-
partitions_with_offsets = partitions.map { |partition| [partition, 0] }.to_h
|
221
|
-
else
|
222
|
-
# When offsets defined, we can either use them if positive or expand and move back
|
223
|
-
# in case of negative (-1000 means last 1000 messages, etc)
|
224
|
-
partitions.each do |partition, offset|
|
225
|
-
if offset.negative?
|
226
|
-
_, high_watermark_offset = consumer.query_watermark_offsets(name, partition)
|
227
|
-
# We add because this offset is negative
|
228
|
-
partitions_with_offsets[partition] = high_watermark_offset + offset
|
229
|
-
else
|
230
|
-
partitions_with_offsets[partition] = offset
|
231
|
-
end
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
|
-
tpl.add_topic_and_partitions_with_offsets(name, partitions_with_offsets)
|
236
|
-
end
|
237
|
-
|
238
|
-
tpl
|
239
|
-
end
|
240
|
-
|
241
|
-
# @param name [String] topic name
|
242
|
-
# @return [Integer] number of partitions of the topic we want to iterate over
|
243
|
-
def partition_count(name)
|
244
|
-
Admin
|
245
|
-
.cluster_info
|
246
|
-
.topics
|
247
|
-
.find { |topic| topic.fetch(:topic_name) == name }
|
248
|
-
.fetch(:partitions)
|
249
|
-
.count
|
250
|
-
end
|
251
166
|
end
|
252
167
|
end
|
253
168
|
end
|
@@ -81,6 +81,7 @@ module Karafka
|
|
81
81
|
# The first message we do need to get next time we poll. We use the minimum not to jump
|
82
82
|
# accidentally by over any.
|
83
83
|
# @return [Karafka::Messages::Message, nil] cursor message or nil if none
|
84
|
+
# @note Cursor message can also return the offset in the time format
|
84
85
|
def cursor
|
85
86
|
return nil unless active?
|
86
87
|
|
@@ -44,7 +44,9 @@ module Karafka
|
|
44
44
|
if coordinator.filtered? && !revoked?
|
45
45
|
handle_post_filtering
|
46
46
|
elsif !revoked?
|
47
|
-
seek
|
47
|
+
# no need to check for manual seek because AJ consumer is internal and
|
48
|
+
# fully controlled by us
|
49
|
+
seek(coordinator.seek_offset, false)
|
48
50
|
resume
|
49
51
|
else
|
50
52
|
resume
|
@@ -50,7 +50,9 @@ module Karafka
|
|
50
50
|
if coordinator.filtered? && !revoked?
|
51
51
|
handle_post_filtering
|
52
52
|
elsif !revoked?
|
53
|
-
seek
|
53
|
+
# no need to check for manual seek because AJ consumer is internal and
|
54
|
+
# fully controlled by us
|
55
|
+
seek(coordinator.seek_offset, false)
|
54
56
|
resume
|
55
57
|
else
|
56
58
|
resume
|
@@ -42,7 +42,9 @@ module Karafka
|
|
42
42
|
if coordinator.success?
|
43
43
|
coordinator.pause_tracker.reset
|
44
44
|
|
45
|
-
seek
|
45
|
+
# no need to check for manual seek because AJ consumer is internal and
|
46
|
+
# fully controlled by us
|
47
|
+
seek(coordinator.seek_offset, false) unless revoked?
|
46
48
|
|
47
49
|
resume
|
48
50
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -46,7 +46,9 @@ module Karafka
|
|
46
46
|
# Since we have VP here we do not commit intermediate offsets and need to commit
|
47
47
|
# them here. We do commit in collapsed mode but this is generalized.
|
48
48
|
mark_as_consumed(last_group_message) unless revoked?
|
49
|
-
seek
|
49
|
+
# no need to check for manual seek because AJ consumer is internal and
|
50
|
+
# fully controlled by us
|
51
|
+
seek(coordinator.seek_offset, false) unless revoked?
|
50
52
|
|
51
53
|
resume
|
52
54
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -46,7 +46,9 @@ module Karafka
|
|
46
46
|
if coordinator.filtered? && !revoked?
|
47
47
|
handle_post_filtering
|
48
48
|
elsif !revoked?
|
49
|
-
seek
|
49
|
+
# no need to check for manual seek because AJ consumer is internal and
|
50
|
+
# fully controlled by us
|
51
|
+
seek(coordinator.seek_offset, false)
|
50
52
|
resume
|
51
53
|
else
|
52
54
|
resume
|
@@ -48,7 +48,10 @@ module Karafka
|
|
48
48
|
coordinator.pause_tracker.reset
|
49
49
|
|
50
50
|
mark_as_consumed(last_group_message) unless revoked?
|
51
|
-
|
51
|
+
|
52
|
+
# no need to check for manual seek because AJ consumer is internal and
|
53
|
+
# fully controlled by us
|
54
|
+
seek(coordinator.seek_offset, false) unless revoked?
|
52
55
|
|
53
56
|
resume
|
54
57
|
else
|
@@ -42,8 +42,8 @@ module Karafka
|
|
42
42
|
|
43
43
|
if coordinator.filtered? && !revoked?
|
44
44
|
handle_post_filtering
|
45
|
-
elsif !revoked?
|
46
|
-
seek(last_group_message.offset + 1)
|
45
|
+
elsif !revoked? && !coordinator.manual_seek?
|
46
|
+
seek(last_group_message.offset + 1, false)
|
47
47
|
resume
|
48
48
|
else
|
49
49
|
resume
|
@@ -38,7 +38,8 @@ module Karafka
|
|
38
38
|
return if coordinator.manual_pause?
|
39
39
|
|
40
40
|
mark_as_consumed(last_group_message) unless revoked?
|
41
|
-
|
41
|
+
# We should not overwrite user manual seel request with our seek
|
42
|
+
seek(coordinator.seek_offset, false) unless revoked? || coordinator.manual_seek?
|
42
43
|
|
43
44
|
resume
|
44
45
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -37,7 +37,9 @@ module Karafka
|
|
37
37
|
|
38
38
|
return if coordinator.manual_pause?
|
39
39
|
|
40
|
-
|
40
|
+
unless revoked? || coordinator.manual_seek?
|
41
|
+
seek(last_group_message.offset + 1, false)
|
42
|
+
end
|
41
43
|
|
42
44
|
resume
|
43
45
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
@@ -70,6 +70,10 @@ module Karafka
|
|
70
70
|
when :skip
|
71
71
|
nil
|
72
72
|
when :seek
|
73
|
+
# User direct actions take priority over automatic operations
|
74
|
+
# If we've already seeked we can just resume operations, nothing extra needed
|
75
|
+
return resume if coordinator.manual_seek?
|
76
|
+
|
73
77
|
throttle_message = filter.cursor
|
74
78
|
|
75
79
|
Karafka.monitor.instrument(
|
@@ -77,11 +81,14 @@ module Karafka
|
|
77
81
|
caller: self,
|
78
82
|
message: throttle_message
|
79
83
|
) do
|
80
|
-
seek(throttle_message.offset)
|
84
|
+
seek(throttle_message.offset, false)
|
81
85
|
end
|
82
86
|
|
83
87
|
resume
|
84
88
|
when :pause
|
89
|
+
# User direct actions take priority over automatic operations
|
90
|
+
return nil if coordinator.manual_pause?
|
91
|
+
|
85
92
|
throttle_message = filter.cursor
|
86
93
|
|
87
94
|
Karafka.monitor.instrument(
|
@@ -53,7 +53,7 @@ module Karafka
|
|
53
53
|
return if coordinator.manual_pause?
|
54
54
|
|
55
55
|
mark_as_consumed(last_group_message) unless revoked?
|
56
|
-
seek(coordinator.seek_offset) unless revoked?
|
56
|
+
seek(coordinator.seek_offset, false) unless revoked? || coordinator.manual_seek?
|
57
57
|
|
58
58
|
resume
|
59
59
|
else
|
@@ -45,10 +45,10 @@ module Karafka
|
|
45
45
|
# If still not revoked and was throttled, we need to apply throttling logic
|
46
46
|
if coordinator.filtered? && !revoked?
|
47
47
|
handle_post_filtering
|
48
|
-
elsif !revoked?
|
48
|
+
elsif !revoked? && !coordinator.manual_seek?
|
49
49
|
# If not revoked and not throttled, we move to where we were suppose to and
|
50
50
|
# resume
|
51
|
-
seek(coordinator.seek_offset)
|
51
|
+
seek(coordinator.seek_offset, false)
|
52
52
|
resume
|
53
53
|
else
|
54
54
|
resume
|
@@ -43,10 +43,10 @@ module Karafka
|
|
43
43
|
# If still not revoked and was throttled, we need to apply filtering logic
|
44
44
|
if coordinator.filtered? && !revoked?
|
45
45
|
handle_post_filtering
|
46
|
-
elsif !revoked?
|
46
|
+
elsif !revoked? && !coordinator.manual_seek?
|
47
47
|
# If not revoked and not throttled, we move to where we were suppose to and
|
48
48
|
# resume
|
49
|
-
seek(last_group_message.offset + 1)
|
49
|
+
seek(last_group_message.offset + 1, false)
|
50
50
|
resume
|
51
51
|
else
|
52
52
|
resume
|
@@ -118,7 +118,7 @@ module Karafka
|
|
118
118
|
|
119
119
|
# @return [Messages::Seek] markable message for real offset marking
|
120
120
|
def markable
|
121
|
-
raise Errors::
|
121
|
+
raise Errors::InvalidRealOffsetUsageError unless markable?
|
122
122
|
|
123
123
|
Messages::Seek.new(
|
124
124
|
@topic,
|
@@ -23,6 +23,7 @@ module Karafka
|
|
23
23
|
@consumptions = {}
|
24
24
|
@running_jobs = 0
|
25
25
|
@manual_pause = false
|
26
|
+
@manual_seek = false
|
26
27
|
@mutex = Mutex.new
|
27
28
|
@marked = false
|
28
29
|
@failure = false
|
@@ -41,6 +42,9 @@ module Karafka
|
|
41
42
|
# When starting to run, no pause is expected and no manual pause as well
|
42
43
|
@manual_pause = false
|
43
44
|
|
45
|
+
# No user invoked seeks on a new run
|
46
|
+
@manual_seek = false
|
47
|
+
|
44
48
|
# We set it on the first encounter and never again, because then the offset setting
|
45
49
|
# should be up to the consumers logic (our or the end user)
|
46
50
|
# Seek offset needs to be always initialized as for case where manual offset management
|
@@ -148,6 +152,16 @@ module Karafka
|
|
148
152
|
@pause_tracker.paused? && @manual_pause
|
149
153
|
end
|
150
154
|
|
155
|
+
# Marks seek as manual for coordination purposes
|
156
|
+
def manual_seek
|
157
|
+
@manual_seek = true
|
158
|
+
end
|
159
|
+
|
160
|
+
# @return [Boolean] did a user invoke seek in the current operations scope
|
161
|
+
def manual_seek?
|
162
|
+
@manual_seek
|
163
|
+
end
|
164
|
+
|
151
165
|
# Allows to run synchronized (locked) code that can operate in between virtual partitions
|
152
166
|
# @param block [Proc] code we want to run in the synchronized mode
|
153
167
|
def synchronize(&block)
|