karafka 2.0.40 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +2 -2
- data/CHANGELOG.md +30 -1
- data/Gemfile +3 -2
- data/Gemfile.lock +13 -1
- data/bin/integrations +17 -2
- data/config/locales/errors.yml +10 -0
- data/config/locales/pro_errors.yml +0 -2
- data/lib/karafka/active_job/consumer.rb +16 -11
- data/lib/karafka/active_job/current_attributes/loading.rb +36 -0
- data/lib/karafka/active_job/current_attributes/persistence.rb +28 -0
- data/lib/karafka/active_job/current_attributes.rb +42 -0
- data/lib/karafka/active_job/dispatcher.rb +8 -2
- data/lib/karafka/admin.rb +17 -13
- data/lib/karafka/connection/client.rb +6 -3
- data/lib/karafka/errors.rb +3 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +12 -0
- data/lib/karafka/instrumentation/logger_listener.rb +16 -5
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +166 -0
- data/lib/karafka/pro/active_job/consumer.rb +1 -10
- data/lib/karafka/pro/active_job/dispatcher.rb +2 -2
- data/lib/karafka/pro/iterator.rb +253 -0
- data/lib/karafka/pro/processing/coordinator.rb +20 -1
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +52 -0
- data/lib/karafka/pro/processing/filters_applier.rb +4 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +2 -2
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
- data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +3 -6
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom_vp.rb +43 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_vp.rb +1 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +3 -7
- data/lib/karafka/pro/processing/strategies/dlq/ftr_mom_vp.rb +41 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +1 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -6
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom_vp.rb +36 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +1 -0
- data/lib/karafka/pro/processing/strategies/dlq/mom.rb +8 -7
- data/lib/karafka/pro/processing/strategies/dlq/mom_vp.rb +37 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom_vp.rb +40 -0
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/mom_vp.rb +38 -0
- data/lib/karafka/pro/processing/strategies/mom/ftr_vp.rb +37 -0
- data/lib/karafka/pro/{base_consumer.rb → processing/strategies/mom/vp.rb} +17 -7
- data/lib/karafka/pro/processing/strategies/vp/default.rb +51 -0
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +147 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +0 -17
- data/lib/karafka/processing/strategies/default.rb +2 -0
- data/lib/karafka/processing/strategies/dlq_mom.rb +9 -7
- data/lib/karafka/routing/router.rb +15 -0
- data/lib/karafka/setup/config.rb +7 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +5 -0
- data.tar.gz.sig +0 -0
- metadata +17 -4
- metadata.gz.sig +0 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +0 -16
@@ -0,0 +1,166 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'socket'
|
4
|
+
|
5
|
+
module Karafka
|
6
|
+
module Instrumentation
|
7
|
+
module Vendors
|
8
|
+
# Namespace for instrumentation related with Kubernetes
|
9
|
+
module Kubernetes
|
10
|
+
# Kubernetes HTTP listener that does not only reply when process is not fully hanging, but
|
11
|
+
# also allows to define max time of processing and looping.
|
12
|
+
#
|
13
|
+
# Processes like Karafka server can hang while still being reachable. For example, in case
|
14
|
+
# something would hang inside of the user code, Karafka could stop polling and no new
|
15
|
+
# data would be processed, but process itself would still be active. This listener allows
|
16
|
+
# for defining of a ttl that gets bumped on each poll loop and before and after processing
|
17
|
+
# of a given messages batch.
|
18
|
+
class LivenessListener
|
19
|
+
include ::Karafka::Core::Helpers::Time
|
20
|
+
|
21
|
+
# @param hostname [String, nil] hostname or nil to bind on all
|
22
|
+
# @param port [Integer] TCP port on which we want to run our HTTP status server
|
23
|
+
# @param consuming_ttl [Integer] time in ms after which we consider consumption hanging.
|
24
|
+
# It allows us to define max consumption time after which k8s should consider given
|
25
|
+
# process as hanging
|
26
|
+
# @param polling_ttl [Integer] max time in ms for polling. If polling (any) does not
|
27
|
+
# happen that often, process should be considered dead.
|
28
|
+
# @note The default TTL matches the default `max.poll.interval.ms`
|
29
|
+
def initialize(
|
30
|
+
hostname: nil,
|
31
|
+
port: 3000,
|
32
|
+
consuming_ttl: 5 * 60 * 1_000,
|
33
|
+
polling_ttl: 5 * 60 * 1_000
|
34
|
+
)
|
35
|
+
@server = TCPServer.new(*[hostname, port].compact)
|
36
|
+
@polling_ttl = polling_ttl
|
37
|
+
@consuming_ttl = consuming_ttl
|
38
|
+
@mutex = Mutex.new
|
39
|
+
@pollings = {}
|
40
|
+
@consumptions = {}
|
41
|
+
|
42
|
+
Thread.new do
|
43
|
+
loop do
|
44
|
+
break unless respond
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Tick on each fetch
|
50
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
51
|
+
def on_connection_listener_fetch_loop(_event)
|
52
|
+
mark_polling_tick
|
53
|
+
end
|
54
|
+
|
55
|
+
# Tick on starting work
|
56
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
57
|
+
def on_consumer_consume(_event)
|
58
|
+
mark_consumption_tick
|
59
|
+
end
|
60
|
+
|
61
|
+
# Tick on finished work
|
62
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
63
|
+
def on_consumer_consumed(_event)
|
64
|
+
clear_consumption_tick
|
65
|
+
end
|
66
|
+
|
67
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
68
|
+
def on_consumer_revoke(_event)
|
69
|
+
mark_consumption_tick
|
70
|
+
end
|
71
|
+
|
72
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
73
|
+
def on_consumer_revoked(_event)
|
74
|
+
clear_consumption_tick
|
75
|
+
end
|
76
|
+
|
77
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
78
|
+
def on_consumer_shutting_down(_event)
|
79
|
+
mark_consumption_tick
|
80
|
+
end
|
81
|
+
|
82
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
83
|
+
def on_consumer_shutdown(_event)
|
84
|
+
clear_consumption_tick
|
85
|
+
end
|
86
|
+
|
87
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
88
|
+
def on_error_occurred(_event)
|
89
|
+
clear_consumption_tick
|
90
|
+
clear_polling_tick
|
91
|
+
end
|
92
|
+
|
93
|
+
# Stop the http server when we stop the process
|
94
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
95
|
+
def on_app_stopped(_event)
|
96
|
+
@server.close
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
# Wraps the logic with a mutex
|
102
|
+
# @param block [Proc] code we want to run in mutex
|
103
|
+
def synchronize(&block)
|
104
|
+
@mutex.synchronize(&block)
|
105
|
+
end
|
106
|
+
|
107
|
+
# @return [Integer] object id of the current thread
|
108
|
+
def thread_id
|
109
|
+
Thread.current.object_id
|
110
|
+
end
|
111
|
+
|
112
|
+
# Update the polling tick time for current thread
|
113
|
+
def mark_polling_tick
|
114
|
+
synchronize do
|
115
|
+
@pollings[thread_id] = monotonic_now
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Clear current thread polling time tracker
|
120
|
+
def clear_polling_tick
|
121
|
+
synchronize do
|
122
|
+
@pollings.delete(thread_id)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Update the processing tick time
|
127
|
+
def mark_consumption_tick
|
128
|
+
synchronize do
|
129
|
+
@consumptions[thread_id] = monotonic_now
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Clear current thread consumption time tracker
|
134
|
+
def clear_consumption_tick
|
135
|
+
synchronize do
|
136
|
+
@consumptions.delete(thread_id)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Responds to a HTTP request with the process liveness status
|
141
|
+
def respond
|
142
|
+
client = @server.accept
|
143
|
+
client.gets
|
144
|
+
client.print "HTTP/1.1 #{status}\r\n"
|
145
|
+
client.close
|
146
|
+
|
147
|
+
true
|
148
|
+
rescue Errno::ECONNRESET, Errno::EPIPE, IOError
|
149
|
+
!@server.closed?
|
150
|
+
end
|
151
|
+
|
152
|
+
# Did we exceed any of the ttls
|
153
|
+
# @return [String] 204 string if ok, 500 otherwise
|
154
|
+
def status
|
155
|
+
time = monotonic_now
|
156
|
+
|
157
|
+
return '500' if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
|
158
|
+
return '500' if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
|
159
|
+
|
160
|
+
'204'
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -35,18 +35,9 @@ module Karafka
|
|
35
35
|
# double-processing
|
36
36
|
break if Karafka::App.stopping? && !topic.virtual_partitions?
|
37
37
|
|
38
|
-
# Break if we already know, that one of virtual partitions has failed and we will
|
39
|
-
# be restarting processing all together after all VPs are done. This will minimize
|
40
|
-
# number of jobs that will be re-processed
|
41
|
-
break if topic.virtual_partitions? && failing?
|
42
|
-
|
43
38
|
consume_job(message)
|
44
39
|
|
45
|
-
# We
|
46
|
-
# this could create random markings.
|
47
|
-
# The exception here is the collapsed state where we can move one after another
|
48
|
-
next if topic.virtual_partitions? && !collapsed?
|
49
|
-
|
40
|
+
# We can always mark because of the virtual offset management that we have in VPs
|
50
41
|
mark_as_consumed(message)
|
51
42
|
end
|
52
43
|
end
|
@@ -39,7 +39,7 @@ module Karafka
|
|
39
39
|
fetch_option(job, :dispatch_method, DEFAULTS),
|
40
40
|
dispatch_details(job).merge!(
|
41
41
|
topic: job.queue_name,
|
42
|
-
payload: ::ActiveSupport::JSON.encode(job
|
42
|
+
payload: ::ActiveSupport::JSON.encode(serialize_job(job))
|
43
43
|
)
|
44
44
|
)
|
45
45
|
end
|
@@ -54,7 +54,7 @@ module Karafka
|
|
54
54
|
|
55
55
|
dispatches[d_method] << dispatch_details(job).merge!(
|
56
56
|
topic: job.queue_name,
|
57
|
-
payload: ::ActiveSupport::JSON.encode(job
|
57
|
+
payload: ::ActiveSupport::JSON.encode(serialize_job(job))
|
58
58
|
)
|
59
59
|
end
|
60
60
|
|
@@ -0,0 +1,253 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
# Topic iterator allows you to iterate over topic/partition data and perform lookups for
|
17
|
+
# information that you need.
|
18
|
+
#
|
19
|
+
# It supports early stops on finding the requested data and allows for seeking till
|
20
|
+
# the end. It also allows for signaling, when a given message should be last out of certain
|
21
|
+
# partition, but we still want to continue iterating in other messages.
|
22
|
+
#
|
23
|
+
# It does **not** create a consumer group and does not have any offset management.
|
24
|
+
class Iterator
|
25
|
+
# Local partition reference for librdkafka
|
26
|
+
Partition = Struct.new(:partition, :offset)
|
27
|
+
|
28
|
+
private_constant :Partition
|
29
|
+
|
30
|
+
# A simple API allowing to iterate over topic/partition data, without having to subscribe
|
31
|
+
# and deal with rebalances. This API allows for multi-partition streaming and is optimized
|
32
|
+
# for data lookups. It allows for explicit stopping iteration over any partition during
|
33
|
+
# the iteration process, allowing for optimized lookups.
|
34
|
+
#
|
35
|
+
# @param topics [Array<String>, Hash] list of strings if we want to subscribe to multiple
|
36
|
+
# topics and all of their partitions or a hash where keys are the topics and values are
|
37
|
+
# hashes with partitions and their initial offsets.
|
38
|
+
# @param settings [Hash] extra settings for the consumer. Please keep in mind, that if
|
39
|
+
# overwritten, you may want to include `auto.offset.reset` to match your case.
|
40
|
+
# @param yield_nil [Boolean] should we yield also `nil` values when poll returns nothing.
|
41
|
+
# Useful in particular for long-living iterators.
|
42
|
+
#
|
43
|
+
# @note It is worth keeping in mind, that this API also needs to operate within
|
44
|
+
# `max.poll.interval.ms` limitations on each iteration
|
45
|
+
#
|
46
|
+
# @note In case of a never-ending iterator, you need to set `enable.partition.eof` to `false`
|
47
|
+
# so we don't stop polling data even when reaching the end (end on a given moment)
|
48
|
+
def initialize(
|
49
|
+
topics,
|
50
|
+
settings: { 'auto.offset.reset': 'beginning' },
|
51
|
+
yield_nil: false
|
52
|
+
)
|
53
|
+
@topics_with_partitions = expand_topics_with_partitions(topics)
|
54
|
+
|
55
|
+
@routing_topics = @topics_with_partitions.map do |name, _|
|
56
|
+
[name, ::Karafka::Routing::Router.find_or_initialize_by_name(name)]
|
57
|
+
end.to_h
|
58
|
+
|
59
|
+
@total_partitions = @topics_with_partitions.map(&:last).sum(&:count)
|
60
|
+
|
61
|
+
@stopped_partitions = 0
|
62
|
+
|
63
|
+
@settings = settings
|
64
|
+
@yield_nil = yield_nil
|
65
|
+
end
|
66
|
+
|
67
|
+
# Iterates over requested topic partitions and yields the results with the iterator itself
|
68
|
+
# Iterator instance is yielded because one can run `stop_partition` to stop iterating over
|
69
|
+
# part of data. It is useful for scenarios where we are looking for some information in all
|
70
|
+
# the partitions but once we found it, given partition data is no longer needed and would
|
71
|
+
# only eat up resources.
|
72
|
+
def each
|
73
|
+
Admin.with_consumer(@settings) do |consumer|
|
74
|
+
tpl = tpl_with_expanded_offsets(consumer)
|
75
|
+
consumer.assign(tpl)
|
76
|
+
|
77
|
+
# We need this for self-referenced APIs like pausing
|
78
|
+
@current_consumer = consumer
|
79
|
+
|
80
|
+
# Stream data until we reach the end of all the partitions or until the end user
|
81
|
+
# indicates that they are done
|
82
|
+
until done?
|
83
|
+
message = poll(200)
|
84
|
+
|
85
|
+
# Skip nils if not explicitly required
|
86
|
+
next if message.nil? && !@yield_nil
|
87
|
+
|
88
|
+
if message
|
89
|
+
@current_message = build_message(message)
|
90
|
+
|
91
|
+
yield(@current_message, self)
|
92
|
+
else
|
93
|
+
yield(nil, self)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
@current_message = nil
|
98
|
+
@current_consumer = nil
|
99
|
+
end
|
100
|
+
|
101
|
+
# Reset so we can use the same iterator again if needed
|
102
|
+
@stopped_partitions = 0
|
103
|
+
end
|
104
|
+
|
105
|
+
# Stops the partition we're currently yielded into
|
106
|
+
def stop_current_partition
|
107
|
+
stop_partition(
|
108
|
+
@current_message.topic,
|
109
|
+
@current_message.partition
|
110
|
+
)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Stops processing of a given partition
|
114
|
+
# We expect the partition to be provided because of a scenario, where there is a
|
115
|
+
# multi-partition iteration and we want to stop a different partition that the one that
|
116
|
+
# is currently yielded.
|
117
|
+
#
|
118
|
+
# We pause it forever and no longer work with it.
|
119
|
+
#
|
120
|
+
# @param name [String] topic name of which partition we want to stop
|
121
|
+
# @param partition [Integer] partition we want to stop processing
|
122
|
+
def stop_partition(name, partition)
|
123
|
+
@stopped_partitions += 1
|
124
|
+
|
125
|
+
@current_consumer.pause(
|
126
|
+
Rdkafka::Consumer::TopicPartitionList.new(
|
127
|
+
name => [Partition.new(partition, 0)]
|
128
|
+
)
|
129
|
+
)
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
# Expands topics to which we want to subscribe with partitions information in case this
|
135
|
+
# info is not provided. For our convenience we want to support 5 formats of defining
|
136
|
+
# the subscribed topics:
|
137
|
+
#
|
138
|
+
# - 'topic1' - just a string with one topic name
|
139
|
+
# - ['topic1', 'topic2'] - just the names
|
140
|
+
# - { 'topic1' => -100 } - names with negative lookup offset
|
141
|
+
# - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
|
142
|
+
# - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
|
143
|
+
#
|
144
|
+
# @param topics [Array, Hash] topics definitions
|
145
|
+
# @return [Hash] hash with topics containing partitions definitions
|
146
|
+
def expand_topics_with_partitions(topics)
|
147
|
+
# Simplification for the single topic case
|
148
|
+
topics = [topics] if topics.is_a?(String)
|
149
|
+
# If we've got just array with topics, we need to convert that into a representation
|
150
|
+
# that we can expand with offsets
|
151
|
+
topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
|
152
|
+
|
153
|
+
expanded = Hash.new { |h, k| h[k] = {} }
|
154
|
+
|
155
|
+
topics.map do |topic, details|
|
156
|
+
if details.is_a?(Hash)
|
157
|
+
details.each do |partition, offset|
|
158
|
+
expanded[topic][partition] = offset
|
159
|
+
end
|
160
|
+
else
|
161
|
+
partition_count(topic.to_s).times do |partition|
|
162
|
+
# If no offsets are provided, we just start from zero
|
163
|
+
expanded[topic][partition] = details || 0
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
expanded
|
169
|
+
end
|
170
|
+
|
171
|
+
# @param timeout [Integer] timeout in ms
|
172
|
+
# @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
|
173
|
+
def poll(timeout)
|
174
|
+
@current_consumer.poll(timeout)
|
175
|
+
rescue Rdkafka::RdkafkaError => e
|
176
|
+
# End of partition
|
177
|
+
if e.code == :partition_eof
|
178
|
+
@stopped_partitions += 1
|
179
|
+
|
180
|
+
retry
|
181
|
+
end
|
182
|
+
|
183
|
+
raise e
|
184
|
+
end
|
185
|
+
|
186
|
+
# Converts raw rdkafka message into Karafka message
|
187
|
+
#
|
188
|
+
# @param message [Rdkafka::Consumer::Message] raw rdkafka message
|
189
|
+
# @return [::Karafka::Messages::Message]
|
190
|
+
def build_message(message)
|
191
|
+
Messages::Builders::Message.call(
|
192
|
+
message,
|
193
|
+
@routing_topics.fetch(message.topic),
|
194
|
+
Time.now
|
195
|
+
)
|
196
|
+
end
|
197
|
+
|
198
|
+
# Do we have all the data we wanted or did every topic partition has reached eof.
|
199
|
+
# @return [Boolean]
|
200
|
+
def done?
|
201
|
+
@stopped_partitions >= @total_partitions
|
202
|
+
end
|
203
|
+
|
204
|
+
# Builds the tpl representing all the subscriptions we want to run
|
205
|
+
#
|
206
|
+
# Additionally for negative offsets, does the watermark calculation where to start
|
207
|
+
#
|
208
|
+
# @param consumer [Rdkafka::Consumer] consumer we need in case of negative offsets as
|
209
|
+
# negative are going to be used to do "give me last X". We use the already initialized
|
210
|
+
# consumer instance, not to start another one again.
|
211
|
+
# @return [Rdkafka::Consumer::TopicPartitionList]
|
212
|
+
def tpl_with_expanded_offsets(consumer)
|
213
|
+
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
214
|
+
|
215
|
+
@topics_with_partitions.each do |name, partitions|
|
216
|
+
partitions_with_offsets = {}
|
217
|
+
|
218
|
+
# When no offsets defined, we just start from zero
|
219
|
+
if partitions.is_a?(Array) || partitions.is_a?(Range)
|
220
|
+
partitions_with_offsets = partitions.map { |partition| [partition, 0] }.to_h
|
221
|
+
else
|
222
|
+
# When offsets defined, we can either use them if positive or expand and move back
|
223
|
+
# in case of negative (-1000 means last 1000 messages, etc)
|
224
|
+
partitions.each do |partition, offset|
|
225
|
+
if offset.negative?
|
226
|
+
_, high_watermark_offset = consumer.query_watermark_offsets(name, partition)
|
227
|
+
# We add because this offset is negative
|
228
|
+
partitions_with_offsets[partition] = high_watermark_offset + offset
|
229
|
+
else
|
230
|
+
partitions_with_offsets[partition] = offset
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
tpl.add_topic_and_partitions_with_offsets(name, partitions_with_offsets)
|
236
|
+
end
|
237
|
+
|
238
|
+
tpl
|
239
|
+
end
|
240
|
+
|
241
|
+
# @param name [String] topic name
|
242
|
+
# @return [Integer] number of partitions of the topic we want to iterate over
|
243
|
+
def partition_count(name)
|
244
|
+
Admin
|
245
|
+
.cluster_info
|
246
|
+
.topics
|
247
|
+
.find { |topic| topic.fetch(:topic_name) == name }
|
248
|
+
.fetch(:partitions)
|
249
|
+
.count
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
@@ -17,7 +17,7 @@ module Karafka
|
|
17
17
|
# Pro coordinator that provides extra orchestration methods useful for parallel processing
|
18
18
|
# within the same partition
|
19
19
|
class Coordinator < ::Karafka::Processing::Coordinator
|
20
|
-
attr_reader :filter
|
20
|
+
attr_reader :filter, :virtual_offset_manager
|
21
21
|
|
22
22
|
# @param args [Object] anything the base coordinator accepts
|
23
23
|
def initialize(*args)
|
@@ -27,6 +27,20 @@ module Karafka
|
|
27
27
|
@flow_lock = Mutex.new
|
28
28
|
@collapser = Collapser.new
|
29
29
|
@filter = FiltersApplier.new(self)
|
30
|
+
|
31
|
+
return unless topic.virtual_partitions?
|
32
|
+
|
33
|
+
@virtual_offset_manager = VirtualOffsetManager.new(
|
34
|
+
topic.name,
|
35
|
+
partition
|
36
|
+
)
|
37
|
+
|
38
|
+
# We register our own "internal" filter to support filtering of messages that were marked
|
39
|
+
# as consumed virtually
|
40
|
+
@filter.filters << Filters::VirtualLimiter.new(
|
41
|
+
@virtual_offset_manager,
|
42
|
+
@collapser
|
43
|
+
)
|
30
44
|
end
|
31
45
|
|
32
46
|
# Starts the coordination process
|
@@ -40,6 +54,11 @@ module Karafka
|
|
40
54
|
@filter.apply!(messages)
|
41
55
|
|
42
56
|
@executed.clear
|
57
|
+
|
58
|
+
# We keep the old processed offsets until the collapsing is done and regular processing
|
59
|
+
# with virtualization is restored
|
60
|
+
@virtual_offset_manager.clear if topic.virtual_partitions? && !@collapser.collapsed?
|
61
|
+
|
43
62
|
@last_message = messages.last
|
44
63
|
end
|
45
64
|
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
module Filters
|
18
|
+
# Removes messages that are already marked as consumed in the virtual offset manager
|
19
|
+
# This should operate only when using virtual partitions.
|
20
|
+
#
|
21
|
+
# This cleaner prevents us from duplicated processing of messages that were virtually
|
22
|
+
# marked as consumed even if we could not mark them as consumed in Kafka. This allows us
|
23
|
+
# to limit reprocessing when errors occur drastically when operating with virtual
|
24
|
+
# partitions
|
25
|
+
#
|
26
|
+
# @note It should be registered only when VPs are used
|
27
|
+
class VirtualLimiter < Base
|
28
|
+
# @param manager [Processing::VirtualOffsetManager]
|
29
|
+
# @param collapser [Processing::Collapser]
|
30
|
+
def initialize(manager, collapser)
|
31
|
+
@manager = manager
|
32
|
+
@collapser = collapser
|
33
|
+
|
34
|
+
super()
|
35
|
+
end
|
36
|
+
|
37
|
+
# Remove messages that we already marked as virtually consumed. Does nothing if not in
|
38
|
+
# the collapsed mode.
|
39
|
+
#
|
40
|
+
# @param messages [Array<Karafka::Messages::Message>]
|
41
|
+
def apply!(messages)
|
42
|
+
return unless @collapser.collapsed?
|
43
|
+
|
44
|
+
marked = @manager.marked
|
45
|
+
|
46
|
+
messages.delete_if { |message| marked.include?(message.offset) }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -21,6 +21,10 @@ module Karafka
|
|
21
21
|
# This means that this is the API we expose as a single filter, allowing us to control
|
22
22
|
# the filtering via many filters easily.
|
23
23
|
class FiltersApplier
|
24
|
+
# @return [Array] registered filters array. Useful if we want to inject internal context
|
25
|
+
# aware filters.
|
26
|
+
attr_reader :filters
|
27
|
+
|
24
28
|
# @param coordinator [Pro::Coordinator] pro coordinator
|
25
29
|
def initialize(coordinator)
|
26
30
|
# Builds filters out of their factories
|
@@ -24,7 +24,9 @@ module Karafka
|
|
24
24
|
# This case is a bit of special. Please see the `AjDlqMom` for explanation on how the
|
25
25
|
# offset management works in this case.
|
26
26
|
module DlqLrjMom
|
27
|
-
include Strategies::
|
27
|
+
include Strategies::Default
|
28
|
+
include Strategies::Dlq::Default
|
29
|
+
include Strategies::Aj::LrjMom
|
28
30
|
|
29
31
|
# Features for this strategy
|
30
32
|
FEATURES = %i[
|
@@ -20,9 +20,9 @@ module Karafka
|
|
20
20
|
# Manual offset management enabled
|
21
21
|
# Virtual Partitions enabled
|
22
22
|
module DlqMomVp
|
23
|
-
include Strategies::Dlq::Default
|
24
|
-
include Strategies::Vp::Default
|
25
23
|
include Strategies::Default
|
24
|
+
include Strategies::Dlq::Vp
|
25
|
+
include Strategies::Vp::Default
|
26
26
|
|
27
27
|
# Features for this strategy
|
28
28
|
FEATURES = %i[
|
@@ -55,14 +55,11 @@ module Karafka
|
|
55
55
|
|
56
56
|
return resume if revoked?
|
57
57
|
|
58
|
-
skippable_message,
|
58
|
+
skippable_message, _marked = find_skippable_message
|
59
59
|
dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
|
60
60
|
|
61
|
-
|
62
|
-
|
63
|
-
else
|
64
|
-
pause(skippable_message.offset + 1, nil, false)
|
65
|
-
end
|
61
|
+
coordinator.seek_offset = skippable_message.offset + 1
|
62
|
+
pause(coordinator.seek_offset, nil, false)
|
66
63
|
end
|
67
64
|
end
|
68
65
|
end
|