karafka 2.0.40 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +2 -2
- data/CHANGELOG.md +30 -1
- data/Gemfile +3 -2
- data/Gemfile.lock +13 -1
- data/bin/integrations +17 -2
- data/config/locales/errors.yml +10 -0
- data/config/locales/pro_errors.yml +0 -2
- data/lib/karafka/active_job/consumer.rb +16 -11
- data/lib/karafka/active_job/current_attributes/loading.rb +36 -0
- data/lib/karafka/active_job/current_attributes/persistence.rb +28 -0
- data/lib/karafka/active_job/current_attributes.rb +42 -0
- data/lib/karafka/active_job/dispatcher.rb +8 -2
- data/lib/karafka/admin.rb +17 -13
- data/lib/karafka/connection/client.rb +6 -3
- data/lib/karafka/errors.rb +3 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +12 -0
- data/lib/karafka/instrumentation/logger_listener.rb +16 -5
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +166 -0
- data/lib/karafka/pro/active_job/consumer.rb +1 -10
- data/lib/karafka/pro/active_job/dispatcher.rb +2 -2
- data/lib/karafka/pro/iterator.rb +253 -0
- data/lib/karafka/pro/processing/coordinator.rb +20 -1
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +52 -0
- data/lib/karafka/pro/processing/filters_applier.rb +4 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +2 -2
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
- data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +3 -6
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom_vp.rb +43 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_vp.rb +1 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +3 -7
- data/lib/karafka/pro/processing/strategies/dlq/ftr_mom_vp.rb +41 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +1 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -6
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom_vp.rb +36 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +1 -0
- data/lib/karafka/pro/processing/strategies/dlq/mom.rb +8 -7
- data/lib/karafka/pro/processing/strategies/dlq/mom_vp.rb +37 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom_vp.rb +40 -0
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/mom_vp.rb +38 -0
- data/lib/karafka/pro/processing/strategies/mom/ftr_vp.rb +37 -0
- data/lib/karafka/pro/{base_consumer.rb → processing/strategies/mom/vp.rb} +17 -7
- data/lib/karafka/pro/processing/strategies/vp/default.rb +51 -0
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +147 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +0 -17
- data/lib/karafka/processing/strategies/default.rb +2 -0
- data/lib/karafka/processing/strategies/dlq_mom.rb +9 -7
- data/lib/karafka/routing/router.rb +15 -0
- data/lib/karafka/setup/config.rb +7 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +5 -0
- data.tar.gz.sig +0 -0
- metadata +17 -4
- metadata.gz.sig +0 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +0 -16
@@ -0,0 +1,166 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'socket'
|
4
|
+
|
5
|
+
module Karafka
|
6
|
+
module Instrumentation
|
7
|
+
module Vendors
|
8
|
+
# Namespace for instrumentation related with Kubernetes
|
9
|
+
module Kubernetes
|
10
|
+
# Kubernetes HTTP listener that does not only reply when process is not fully hanging, but
|
11
|
+
# also allows to define max time of processing and looping.
|
12
|
+
#
|
13
|
+
# Processes like Karafka server can hang while still being reachable. For example, in case
|
14
|
+
# something would hang inside of the user code, Karafka could stop polling and no new
|
15
|
+
# data would be processed, but process itself would still be active. This listener allows
|
16
|
+
# for defining of a ttl that gets bumped on each poll loop and before and after processing
|
17
|
+
# of a given messages batch.
|
18
|
+
class LivenessListener
|
19
|
+
include ::Karafka::Core::Helpers::Time
|
20
|
+
|
21
|
+
# @param hostname [String, nil] hostname or nil to bind on all
|
22
|
+
# @param port [Integer] TCP port on which we want to run our HTTP status server
|
23
|
+
# @param consuming_ttl [Integer] time in ms after which we consider consumption hanging.
|
24
|
+
# It allows us to define max consumption time after which k8s should consider given
|
25
|
+
# process as hanging
|
26
|
+
# @param polling_ttl [Integer] max time in ms for polling. If polling (any) does not
|
27
|
+
# happen that often, process should be considered dead.
|
28
|
+
# @note The default TTL matches the default `max.poll.interval.ms`
|
29
|
+
def initialize(
|
30
|
+
hostname: nil,
|
31
|
+
port: 3000,
|
32
|
+
consuming_ttl: 5 * 60 * 1_000,
|
33
|
+
polling_ttl: 5 * 60 * 1_000
|
34
|
+
)
|
35
|
+
@server = TCPServer.new(*[hostname, port].compact)
|
36
|
+
@polling_ttl = polling_ttl
|
37
|
+
@consuming_ttl = consuming_ttl
|
38
|
+
@mutex = Mutex.new
|
39
|
+
@pollings = {}
|
40
|
+
@consumptions = {}
|
41
|
+
|
42
|
+
Thread.new do
|
43
|
+
loop do
|
44
|
+
break unless respond
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Tick on each fetch
|
50
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
51
|
+
def on_connection_listener_fetch_loop(_event)
|
52
|
+
mark_polling_tick
|
53
|
+
end
|
54
|
+
|
55
|
+
# Tick on starting work
|
56
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
57
|
+
def on_consumer_consume(_event)
|
58
|
+
mark_consumption_tick
|
59
|
+
end
|
60
|
+
|
61
|
+
# Tick on finished work
|
62
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
63
|
+
def on_consumer_consumed(_event)
|
64
|
+
clear_consumption_tick
|
65
|
+
end
|
66
|
+
|
67
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
68
|
+
def on_consumer_revoke(_event)
|
69
|
+
mark_consumption_tick
|
70
|
+
end
|
71
|
+
|
72
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
73
|
+
def on_consumer_revoked(_event)
|
74
|
+
clear_consumption_tick
|
75
|
+
end
|
76
|
+
|
77
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
78
|
+
def on_consumer_shutting_down(_event)
|
79
|
+
mark_consumption_tick
|
80
|
+
end
|
81
|
+
|
82
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
83
|
+
def on_consumer_shutdown(_event)
|
84
|
+
clear_consumption_tick
|
85
|
+
end
|
86
|
+
|
87
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
88
|
+
def on_error_occurred(_event)
|
89
|
+
clear_consumption_tick
|
90
|
+
clear_polling_tick
|
91
|
+
end
|
92
|
+
|
93
|
+
# Stop the http server when we stop the process
|
94
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
95
|
+
def on_app_stopped(_event)
|
96
|
+
@server.close
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
# Wraps the logic with a mutex
|
102
|
+
# @param block [Proc] code we want to run in mutex
|
103
|
+
def synchronize(&block)
|
104
|
+
@mutex.synchronize(&block)
|
105
|
+
end
|
106
|
+
|
107
|
+
# @return [Integer] object id of the current thread
|
108
|
+
def thread_id
|
109
|
+
Thread.current.object_id
|
110
|
+
end
|
111
|
+
|
112
|
+
# Update the polling tick time for current thread
|
113
|
+
def mark_polling_tick
|
114
|
+
synchronize do
|
115
|
+
@pollings[thread_id] = monotonic_now
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Clear current thread polling time tracker
|
120
|
+
def clear_polling_tick
|
121
|
+
synchronize do
|
122
|
+
@pollings.delete(thread_id)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Update the processing tick time
|
127
|
+
def mark_consumption_tick
|
128
|
+
synchronize do
|
129
|
+
@consumptions[thread_id] = monotonic_now
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Clear current thread consumption time tracker
|
134
|
+
def clear_consumption_tick
|
135
|
+
synchronize do
|
136
|
+
@consumptions.delete(thread_id)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Responds to a HTTP request with the process liveness status
|
141
|
+
def respond
|
142
|
+
client = @server.accept
|
143
|
+
client.gets
|
144
|
+
client.print "HTTP/1.1 #{status}\r\n"
|
145
|
+
client.close
|
146
|
+
|
147
|
+
true
|
148
|
+
rescue Errno::ECONNRESET, Errno::EPIPE, IOError
|
149
|
+
!@server.closed?
|
150
|
+
end
|
151
|
+
|
152
|
+
# Did we exceed any of the ttls
|
153
|
+
# @return [String] 204 string if ok, 500 otherwise
|
154
|
+
def status
|
155
|
+
time = monotonic_now
|
156
|
+
|
157
|
+
return '500' if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
|
158
|
+
return '500' if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
|
159
|
+
|
160
|
+
'204'
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -35,18 +35,9 @@ module Karafka
|
|
35
35
|
# double-processing
|
36
36
|
break if Karafka::App.stopping? && !topic.virtual_partitions?
|
37
37
|
|
38
|
-
# Break if we already know, that one of virtual partitions has failed and we will
|
39
|
-
# be restarting processing all together after all VPs are done. This will minimize
|
40
|
-
# number of jobs that will be re-processed
|
41
|
-
break if topic.virtual_partitions? && failing?
|
42
|
-
|
43
38
|
consume_job(message)
|
44
39
|
|
45
|
-
# We
|
46
|
-
# this could create random markings.
|
47
|
-
# The exception here is the collapsed state where we can move one after another
|
48
|
-
next if topic.virtual_partitions? && !collapsed?
|
49
|
-
|
40
|
+
# We can always mark because of the virtual offset management that we have in VPs
|
50
41
|
mark_as_consumed(message)
|
51
42
|
end
|
52
43
|
end
|
@@ -39,7 +39,7 @@ module Karafka
|
|
39
39
|
fetch_option(job, :dispatch_method, DEFAULTS),
|
40
40
|
dispatch_details(job).merge!(
|
41
41
|
topic: job.queue_name,
|
42
|
-
payload: ::ActiveSupport::JSON.encode(job
|
42
|
+
payload: ::ActiveSupport::JSON.encode(serialize_job(job))
|
43
43
|
)
|
44
44
|
)
|
45
45
|
end
|
@@ -54,7 +54,7 @@ module Karafka
|
|
54
54
|
|
55
55
|
dispatches[d_method] << dispatch_details(job).merge!(
|
56
56
|
topic: job.queue_name,
|
57
|
-
payload: ::ActiveSupport::JSON.encode(job
|
57
|
+
payload: ::ActiveSupport::JSON.encode(serialize_job(job))
|
58
58
|
)
|
59
59
|
end
|
60
60
|
|
@@ -0,0 +1,253 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
# Topic iterator allows you to iterate over topic/partition data and perform lookups for
|
17
|
+
# information that you need.
|
18
|
+
#
|
19
|
+
# It supports early stops on finding the requested data and allows for seeking till
|
20
|
+
# the end. It also allows for signaling, when a given message should be last out of certain
|
21
|
+
# partition, but we still want to continue iterating in other messages.
|
22
|
+
#
|
23
|
+
# It does **not** create a consumer group and does not have any offset management.
|
24
|
+
class Iterator
|
25
|
+
# Local partition reference for librdkafka
|
26
|
+
Partition = Struct.new(:partition, :offset)
|
27
|
+
|
28
|
+
private_constant :Partition
|
29
|
+
|
30
|
+
# A simple API allowing to iterate over topic/partition data, without having to subscribe
|
31
|
+
# and deal with rebalances. This API allows for multi-partition streaming and is optimized
|
32
|
+
# for data lookups. It allows for explicit stopping iteration over any partition during
|
33
|
+
# the iteration process, allowing for optimized lookups.
|
34
|
+
#
|
35
|
+
# @param topics [Array<String>, Hash] list of strings if we want to subscribe to multiple
|
36
|
+
# topics and all of their partitions or a hash where keys are the topics and values are
|
37
|
+
# hashes with partitions and their initial offsets.
|
38
|
+
# @param settings [Hash] extra settings for the consumer. Please keep in mind, that if
|
39
|
+
# overwritten, you may want to include `auto.offset.reset` to match your case.
|
40
|
+
# @param yield_nil [Boolean] should we yield also `nil` values when poll returns nothing.
|
41
|
+
# Useful in particular for long-living iterators.
|
42
|
+
#
|
43
|
+
# @note It is worth keeping in mind, that this API also needs to operate within
|
44
|
+
# `max.poll.interval.ms` limitations on each iteration
|
45
|
+
#
|
46
|
+
# @note In case of a never-ending iterator, you need to set `enable.partition.eof` to `false`
|
47
|
+
# so we don't stop polling data even when reaching the end (end on a given moment)
|
48
|
+
def initialize(
|
49
|
+
topics,
|
50
|
+
settings: { 'auto.offset.reset': 'beginning' },
|
51
|
+
yield_nil: false
|
52
|
+
)
|
53
|
+
@topics_with_partitions = expand_topics_with_partitions(topics)
|
54
|
+
|
55
|
+
@routing_topics = @topics_with_partitions.map do |name, _|
|
56
|
+
[name, ::Karafka::Routing::Router.find_or_initialize_by_name(name)]
|
57
|
+
end.to_h
|
58
|
+
|
59
|
+
@total_partitions = @topics_with_partitions.map(&:last).sum(&:count)
|
60
|
+
|
61
|
+
@stopped_partitions = 0
|
62
|
+
|
63
|
+
@settings = settings
|
64
|
+
@yield_nil = yield_nil
|
65
|
+
end
|
66
|
+
|
67
|
+
# Iterates over requested topic partitions and yields the results with the iterator itself
|
68
|
+
# Iterator instance is yielded because one can run `stop_partition` to stop iterating over
|
69
|
+
# part of data. It is useful for scenarios where we are looking for some information in all
|
70
|
+
# the partitions but once we found it, given partition data is no longer needed and would
|
71
|
+
# only eat up resources.
|
72
|
+
def each
|
73
|
+
Admin.with_consumer(@settings) do |consumer|
|
74
|
+
tpl = tpl_with_expanded_offsets(consumer)
|
75
|
+
consumer.assign(tpl)
|
76
|
+
|
77
|
+
# We need this for self-referenced APIs like pausing
|
78
|
+
@current_consumer = consumer
|
79
|
+
|
80
|
+
# Stream data until we reach the end of all the partitions or until the end user
|
81
|
+
# indicates that they are done
|
82
|
+
until done?
|
83
|
+
message = poll(200)
|
84
|
+
|
85
|
+
# Skip nils if not explicitly required
|
86
|
+
next if message.nil? && !@yield_nil
|
87
|
+
|
88
|
+
if message
|
89
|
+
@current_message = build_message(message)
|
90
|
+
|
91
|
+
yield(@current_message, self)
|
92
|
+
else
|
93
|
+
yield(nil, self)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
@current_message = nil
|
98
|
+
@current_consumer = nil
|
99
|
+
end
|
100
|
+
|
101
|
+
# Reset so we can use the same iterator again if needed
|
102
|
+
@stopped_partitions = 0
|
103
|
+
end
|
104
|
+
|
105
|
+
# Stops the partition we're currently yielded into
|
106
|
+
def stop_current_partition
|
107
|
+
stop_partition(
|
108
|
+
@current_message.topic,
|
109
|
+
@current_message.partition
|
110
|
+
)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Stops processing of a given partition
|
114
|
+
# We expect the partition to be provided because of a scenario, where there is a
|
115
|
+
# multi-partition iteration and we want to stop a different partition that the one that
|
116
|
+
# is currently yielded.
|
117
|
+
#
|
118
|
+
# We pause it forever and no longer work with it.
|
119
|
+
#
|
120
|
+
# @param name [String] topic name of which partition we want to stop
|
121
|
+
# @param partition [Integer] partition we want to stop processing
|
122
|
+
def stop_partition(name, partition)
|
123
|
+
@stopped_partitions += 1
|
124
|
+
|
125
|
+
@current_consumer.pause(
|
126
|
+
Rdkafka::Consumer::TopicPartitionList.new(
|
127
|
+
name => [Partition.new(partition, 0)]
|
128
|
+
)
|
129
|
+
)
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
# Expands topics to which we want to subscribe with partitions information in case this
|
135
|
+
# info is not provided. For our convenience we want to support 5 formats of defining
|
136
|
+
# the subscribed topics:
|
137
|
+
#
|
138
|
+
# - 'topic1' - just a string with one topic name
|
139
|
+
# - ['topic1', 'topic2'] - just the names
|
140
|
+
# - { 'topic1' => -100 } - names with negative lookup offset
|
141
|
+
# - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
|
142
|
+
# - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
|
143
|
+
#
|
144
|
+
# @param topics [Array, Hash] topics definitions
|
145
|
+
# @return [Hash] hash with topics containing partitions definitions
|
146
|
+
def expand_topics_with_partitions(topics)
|
147
|
+
# Simplification for the single topic case
|
148
|
+
topics = [topics] if topics.is_a?(String)
|
149
|
+
# If we've got just array with topics, we need to convert that into a representation
|
150
|
+
# that we can expand with offsets
|
151
|
+
topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
|
152
|
+
|
153
|
+
expanded = Hash.new { |h, k| h[k] = {} }
|
154
|
+
|
155
|
+
topics.map do |topic, details|
|
156
|
+
if details.is_a?(Hash)
|
157
|
+
details.each do |partition, offset|
|
158
|
+
expanded[topic][partition] = offset
|
159
|
+
end
|
160
|
+
else
|
161
|
+
partition_count(topic.to_s).times do |partition|
|
162
|
+
# If no offsets are provided, we just start from zero
|
163
|
+
expanded[topic][partition] = details || 0
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
expanded
|
169
|
+
end
|
170
|
+
|
171
|
+
# @param timeout [Integer] timeout in ms
|
172
|
+
# @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
|
173
|
+
def poll(timeout)
|
174
|
+
@current_consumer.poll(timeout)
|
175
|
+
rescue Rdkafka::RdkafkaError => e
|
176
|
+
# End of partition
|
177
|
+
if e.code == :partition_eof
|
178
|
+
@stopped_partitions += 1
|
179
|
+
|
180
|
+
retry
|
181
|
+
end
|
182
|
+
|
183
|
+
raise e
|
184
|
+
end
|
185
|
+
|
186
|
+
# Converts raw rdkafka message into Karafka message
|
187
|
+
#
|
188
|
+
# @param message [Rdkafka::Consumer::Message] raw rdkafka message
|
189
|
+
# @return [::Karafka::Messages::Message]
|
190
|
+
def build_message(message)
|
191
|
+
Messages::Builders::Message.call(
|
192
|
+
message,
|
193
|
+
@routing_topics.fetch(message.topic),
|
194
|
+
Time.now
|
195
|
+
)
|
196
|
+
end
|
197
|
+
|
198
|
+
# Do we have all the data we wanted or did every topic partition has reached eof.
|
199
|
+
# @return [Boolean]
|
200
|
+
def done?
|
201
|
+
@stopped_partitions >= @total_partitions
|
202
|
+
end
|
203
|
+
|
204
|
+
# Builds the tpl representing all the subscriptions we want to run
|
205
|
+
#
|
206
|
+
# Additionally for negative offsets, does the watermark calculation where to start
|
207
|
+
#
|
208
|
+
# @param consumer [Rdkafka::Consumer] consumer we need in case of negative offsets as
|
209
|
+
# negative are going to be used to do "give me last X". We use the already initialized
|
210
|
+
# consumer instance, not to start another one again.
|
211
|
+
# @return [Rdkafka::Consumer::TopicPartitionList]
|
212
|
+
def tpl_with_expanded_offsets(consumer)
|
213
|
+
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
214
|
+
|
215
|
+
@topics_with_partitions.each do |name, partitions|
|
216
|
+
partitions_with_offsets = {}
|
217
|
+
|
218
|
+
# When no offsets defined, we just start from zero
|
219
|
+
if partitions.is_a?(Array) || partitions.is_a?(Range)
|
220
|
+
partitions_with_offsets = partitions.map { |partition| [partition, 0] }.to_h
|
221
|
+
else
|
222
|
+
# When offsets defined, we can either use them if positive or expand and move back
|
223
|
+
# in case of negative (-1000 means last 1000 messages, etc)
|
224
|
+
partitions.each do |partition, offset|
|
225
|
+
if offset.negative?
|
226
|
+
_, high_watermark_offset = consumer.query_watermark_offsets(name, partition)
|
227
|
+
# We add because this offset is negative
|
228
|
+
partitions_with_offsets[partition] = high_watermark_offset + offset
|
229
|
+
else
|
230
|
+
partitions_with_offsets[partition] = offset
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
tpl.add_topic_and_partitions_with_offsets(name, partitions_with_offsets)
|
236
|
+
end
|
237
|
+
|
238
|
+
tpl
|
239
|
+
end
|
240
|
+
|
241
|
+
# @param name [String] topic name
|
242
|
+
# @return [Integer] number of partitions of the topic we want to iterate over
|
243
|
+
def partition_count(name)
|
244
|
+
Admin
|
245
|
+
.cluster_info
|
246
|
+
.topics
|
247
|
+
.find { |topic| topic.fetch(:topic_name) == name }
|
248
|
+
.fetch(:partitions)
|
249
|
+
.count
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
@@ -17,7 +17,7 @@ module Karafka
|
|
17
17
|
# Pro coordinator that provides extra orchestration methods useful for parallel processing
|
18
18
|
# within the same partition
|
19
19
|
class Coordinator < ::Karafka::Processing::Coordinator
|
20
|
-
attr_reader :filter
|
20
|
+
attr_reader :filter, :virtual_offset_manager
|
21
21
|
|
22
22
|
# @param args [Object] anything the base coordinator accepts
|
23
23
|
def initialize(*args)
|
@@ -27,6 +27,20 @@ module Karafka
|
|
27
27
|
@flow_lock = Mutex.new
|
28
28
|
@collapser = Collapser.new
|
29
29
|
@filter = FiltersApplier.new(self)
|
30
|
+
|
31
|
+
return unless topic.virtual_partitions?
|
32
|
+
|
33
|
+
@virtual_offset_manager = VirtualOffsetManager.new(
|
34
|
+
topic.name,
|
35
|
+
partition
|
36
|
+
)
|
37
|
+
|
38
|
+
# We register our own "internal" filter to support filtering of messages that were marked
|
39
|
+
# as consumed virtually
|
40
|
+
@filter.filters << Filters::VirtualLimiter.new(
|
41
|
+
@virtual_offset_manager,
|
42
|
+
@collapser
|
43
|
+
)
|
30
44
|
end
|
31
45
|
|
32
46
|
# Starts the coordination process
|
@@ -40,6 +54,11 @@ module Karafka
|
|
40
54
|
@filter.apply!(messages)
|
41
55
|
|
42
56
|
@executed.clear
|
57
|
+
|
58
|
+
# We keep the old processed offsets until the collapsing is done and regular processing
|
59
|
+
# with virtualization is restored
|
60
|
+
@virtual_offset_manager.clear if topic.virtual_partitions? && !@collapser.collapsed?
|
61
|
+
|
43
62
|
@last_message = messages.last
|
44
63
|
end
|
45
64
|
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
module Filters
|
18
|
+
# Removes messages that are already marked as consumed in the virtual offset manager
|
19
|
+
# This should operate only when using virtual partitions.
|
20
|
+
#
|
21
|
+
# This cleaner prevents us from duplicated processing of messages that were virtually
|
22
|
+
# marked as consumed even if we could not mark them as consumed in Kafka. This allows us
|
23
|
+
# to limit reprocessing when errors occur drastically when operating with virtual
|
24
|
+
# partitions
|
25
|
+
#
|
26
|
+
# @note It should be registered only when VPs are used
|
27
|
+
class VirtualLimiter < Base
|
28
|
+
# @param manager [Processing::VirtualOffsetManager]
|
29
|
+
# @param collapser [Processing::Collapser]
|
30
|
+
def initialize(manager, collapser)
|
31
|
+
@manager = manager
|
32
|
+
@collapser = collapser
|
33
|
+
|
34
|
+
super()
|
35
|
+
end
|
36
|
+
|
37
|
+
# Remove messages that we already marked as virtually consumed. Does nothing if not in
|
38
|
+
# the collapsed mode.
|
39
|
+
#
|
40
|
+
# @param messages [Array<Karafka::Messages::Message>]
|
41
|
+
def apply!(messages)
|
42
|
+
return unless @collapser.collapsed?
|
43
|
+
|
44
|
+
marked = @manager.marked
|
45
|
+
|
46
|
+
messages.delete_if { |message| marked.include?(message.offset) }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -21,6 +21,10 @@ module Karafka
|
|
21
21
|
# This means that this is the API we expose as a single filter, allowing us to control
|
22
22
|
# the filtering via many filters easily.
|
23
23
|
class FiltersApplier
|
24
|
+
# @return [Array] registered filters array. Useful if we want to inject internal context
|
25
|
+
# aware filters.
|
26
|
+
attr_reader :filters
|
27
|
+
|
24
28
|
# @param coordinator [Pro::Coordinator] pro coordinator
|
25
29
|
def initialize(coordinator)
|
26
30
|
# Builds filters out of their factories
|
@@ -24,7 +24,9 @@ module Karafka
|
|
24
24
|
# This case is a bit of special. Please see the `AjDlqMom` for explanation on how the
|
25
25
|
# offset management works in this case.
|
26
26
|
module DlqLrjMom
|
27
|
-
include Strategies::
|
27
|
+
include Strategies::Default
|
28
|
+
include Strategies::Dlq::Default
|
29
|
+
include Strategies::Aj::LrjMom
|
28
30
|
|
29
31
|
# Features for this strategy
|
30
32
|
FEATURES = %i[
|
@@ -20,9 +20,9 @@ module Karafka
|
|
20
20
|
# Manual offset management enabled
|
21
21
|
# Virtual Partitions enabled
|
22
22
|
module DlqMomVp
|
23
|
-
include Strategies::Dlq::Default
|
24
|
-
include Strategies::Vp::Default
|
25
23
|
include Strategies::Default
|
24
|
+
include Strategies::Dlq::Vp
|
25
|
+
include Strategies::Vp::Default
|
26
26
|
|
27
27
|
# Features for this strategy
|
28
28
|
FEATURES = %i[
|
@@ -55,14 +55,11 @@ module Karafka
|
|
55
55
|
|
56
56
|
return resume if revoked?
|
57
57
|
|
58
|
-
skippable_message,
|
58
|
+
skippable_message, _marked = find_skippable_message
|
59
59
|
dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
|
60
60
|
|
61
|
-
|
62
|
-
|
63
|
-
else
|
64
|
-
pause(skippable_message.offset + 1, nil, false)
|
65
|
-
end
|
61
|
+
coordinator.seek_offset = skippable_message.offset + 1
|
62
|
+
pause(coordinator.seek_offset, nil, false)
|
66
63
|
end
|
67
64
|
end
|
68
65
|
end
|