karafka 2.0.40 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +2 -2
  4. data/CHANGELOG.md +30 -1
  5. data/Gemfile +3 -2
  6. data/Gemfile.lock +13 -1
  7. data/bin/integrations +17 -2
  8. data/config/locales/errors.yml +10 -0
  9. data/config/locales/pro_errors.yml +0 -2
  10. data/lib/karafka/active_job/consumer.rb +16 -11
  11. data/lib/karafka/active_job/current_attributes/loading.rb +36 -0
  12. data/lib/karafka/active_job/current_attributes/persistence.rb +28 -0
  13. data/lib/karafka/active_job/current_attributes.rb +42 -0
  14. data/lib/karafka/active_job/dispatcher.rb +8 -2
  15. data/lib/karafka/admin.rb +17 -13
  16. data/lib/karafka/connection/client.rb +6 -3
  17. data/lib/karafka/errors.rb +3 -0
  18. data/lib/karafka/instrumentation/callbacks/statistics.rb +12 -0
  19. data/lib/karafka/instrumentation/logger_listener.rb +16 -5
  20. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +166 -0
  21. data/lib/karafka/pro/active_job/consumer.rb +1 -10
  22. data/lib/karafka/pro/active_job/dispatcher.rb +2 -2
  23. data/lib/karafka/pro/iterator.rb +253 -0
  24. data/lib/karafka/pro/processing/coordinator.rb +20 -1
  25. data/lib/karafka/pro/processing/filters/virtual_limiter.rb +52 -0
  26. data/lib/karafka/pro/processing/filters_applier.rb +4 -0
  27. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
  28. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
  29. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +2 -2
  30. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
  31. data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +1 -1
  32. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +1 -1
  33. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +3 -6
  34. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom_vp.rb +43 -0
  35. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_vp.rb +1 -0
  36. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +3 -7
  37. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom_vp.rb +41 -0
  38. data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +1 -0
  39. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -6
  40. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom_vp.rb +36 -0
  41. data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +1 -0
  42. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +8 -7
  43. data/lib/karafka/pro/processing/strategies/dlq/mom_vp.rb +37 -0
  44. data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
  45. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom_vp.rb +40 -0
  46. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
  47. data/lib/karafka/pro/processing/strategies/lrj/mom_vp.rb +38 -0
  48. data/lib/karafka/pro/processing/strategies/mom/ftr_vp.rb +37 -0
  49. data/lib/karafka/pro/{base_consumer.rb → processing/strategies/mom/vp.rb} +17 -7
  50. data/lib/karafka/pro/processing/strategies/vp/default.rb +51 -0
  51. data/lib/karafka/pro/processing/virtual_offset_manager.rb +147 -0
  52. data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +0 -17
  53. data/lib/karafka/processing/strategies/default.rb +2 -0
  54. data/lib/karafka/processing/strategies/dlq_mom.rb +9 -7
  55. data/lib/karafka/routing/router.rb +15 -0
  56. data/lib/karafka/setup/config.rb +7 -1
  57. data/lib/karafka/version.rb +1 -1
  58. data/lib/karafka.rb +5 -0
  59. data.tar.gz.sig +0 -0
  60. metadata +17 -4
  61. metadata.gz.sig +0 -0
  62. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +0 -16
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'socket'
4
+
5
+ module Karafka
6
+ module Instrumentation
7
+ module Vendors
8
+ # Namespace for instrumentation related with Kubernetes
9
+ module Kubernetes
10
+ # Kubernetes HTTP listener that does not only reply when process is not fully hanging, but
11
+ # also allows to define max time of processing and looping.
12
+ #
13
+ # Processes like Karafka server can hang while still being reachable. For example, in case
14
+ # something would hang inside of the user code, Karafka could stop polling and no new
15
+ # data would be processed, but process itself would still be active. This listener allows
16
+ # for defining of a ttl that gets bumped on each poll loop and before and after processing
17
+ # of a given messages batch.
18
+ class LivenessListener
19
+ include ::Karafka::Core::Helpers::Time
20
+
21
+ # @param hostname [String, nil] hostname or nil to bind on all
22
+ # @param port [Integer] TCP port on which we want to run our HTTP status server
23
+ # @param consuming_ttl [Integer] time in ms after which we consider consumption hanging.
24
+ # It allows us to define max consumption time after which k8s should consider given
25
+ # process as hanging
26
+ # @param polling_ttl [Integer] max time in ms for polling. If polling (any) does not
27
+ # happen that often, process should be considered dead.
28
+ # @note The default TTL matches the default `max.poll.interval.ms`
29
+ def initialize(
30
+ hostname: nil,
31
+ port: 3000,
32
+ consuming_ttl: 5 * 60 * 1_000,
33
+ polling_ttl: 5 * 60 * 1_000
34
+ )
35
+ @server = TCPServer.new(*[hostname, port].compact)
36
+ @polling_ttl = polling_ttl
37
+ @consuming_ttl = consuming_ttl
38
+ @mutex = Mutex.new
39
+ @pollings = {}
40
+ @consumptions = {}
41
+
42
+ Thread.new do
43
+ loop do
44
+ break unless respond
45
+ end
46
+ end
47
+ end
48
+
49
+ # Tick on each fetch
50
+ # @param _event [Karafka::Core::Monitoring::Event]
51
+ def on_connection_listener_fetch_loop(_event)
52
+ mark_polling_tick
53
+ end
54
+
55
+ # Tick on starting work
56
+ # @param _event [Karafka::Core::Monitoring::Event]
57
+ def on_consumer_consume(_event)
58
+ mark_consumption_tick
59
+ end
60
+
61
+ # Tick on finished work
62
+ # @param _event [Karafka::Core::Monitoring::Event]
63
+ def on_consumer_consumed(_event)
64
+ clear_consumption_tick
65
+ end
66
+
67
+ # @param _event [Karafka::Core::Monitoring::Event]
68
+ def on_consumer_revoke(_event)
69
+ mark_consumption_tick
70
+ end
71
+
72
+ # @param _event [Karafka::Core::Monitoring::Event]
73
+ def on_consumer_revoked(_event)
74
+ clear_consumption_tick
75
+ end
76
+
77
+ # @param _event [Karafka::Core::Monitoring::Event]
78
+ def on_consumer_shutting_down(_event)
79
+ mark_consumption_tick
80
+ end
81
+
82
+ # @param _event [Karafka::Core::Monitoring::Event]
83
+ def on_consumer_shutdown(_event)
84
+ clear_consumption_tick
85
+ end
86
+
87
+ # @param _event [Karafka::Core::Monitoring::Event]
88
+ def on_error_occurred(_event)
89
+ clear_consumption_tick
90
+ clear_polling_tick
91
+ end
92
+
93
+ # Stop the http server when we stop the process
94
+ # @param _event [Karafka::Core::Monitoring::Event]
95
+ def on_app_stopped(_event)
96
+ @server.close
97
+ end
98
+
99
+ private
100
+
101
+ # Wraps the logic with a mutex
102
+ # @param block [Proc] code we want to run in mutex
103
+ def synchronize(&block)
104
+ @mutex.synchronize(&block)
105
+ end
106
+
107
+ # @return [Integer] object id of the current thread
108
+ def thread_id
109
+ Thread.current.object_id
110
+ end
111
+
112
+ # Update the polling tick time for current thread
113
+ def mark_polling_tick
114
+ synchronize do
115
+ @pollings[thread_id] = monotonic_now
116
+ end
117
+ end
118
+
119
+ # Clear current thread polling time tracker
120
+ def clear_polling_tick
121
+ synchronize do
122
+ @pollings.delete(thread_id)
123
+ end
124
+ end
125
+
126
+ # Update the processing tick time
127
+ def mark_consumption_tick
128
+ synchronize do
129
+ @consumptions[thread_id] = monotonic_now
130
+ end
131
+ end
132
+
133
+ # Clear current thread consumption time tracker
134
+ def clear_consumption_tick
135
+ synchronize do
136
+ @consumptions.delete(thread_id)
137
+ end
138
+ end
139
+
140
+ # Responds to a HTTP request with the process liveness status
141
+ def respond
142
+ client = @server.accept
143
+ client.gets
144
+ client.print "HTTP/1.1 #{status}\r\n"
145
+ client.close
146
+
147
+ true
148
+ rescue Errno::ECONNRESET, Errno::EPIPE, IOError
149
+ !@server.closed?
150
+ end
151
+
152
+ # Did we exceed any of the ttls
153
+ # @return [String] 204 string if ok, 500 otherwise
154
+ def status
155
+ time = monotonic_now
156
+
157
+ return '500' if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
158
+ return '500' if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
159
+
160
+ '204'
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
@@ -35,18 +35,9 @@ module Karafka
35
35
  # double-processing
36
36
  break if Karafka::App.stopping? && !topic.virtual_partitions?
37
37
 
38
- # Break if we already know, that one of virtual partitions has failed and we will
39
- # be restarting processing all together after all VPs are done. This will minimize
40
- # number of jobs that will be re-processed
41
- break if topic.virtual_partitions? && failing?
42
-
43
38
  consume_job(message)
44
39
 
45
- # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
46
- # this could create random markings.
47
- # The exception here is the collapsed state where we can move one after another
48
- next if topic.virtual_partitions? && !collapsed?
49
-
40
+ # We can always mark because of the virtual offset management that we have in VPs
50
41
  mark_as_consumed(message)
51
42
  end
52
43
  end
@@ -39,7 +39,7 @@ module Karafka
39
39
  fetch_option(job, :dispatch_method, DEFAULTS),
40
40
  dispatch_details(job).merge!(
41
41
  topic: job.queue_name,
42
- payload: ::ActiveSupport::JSON.encode(job.serialize)
42
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
43
43
  )
44
44
  )
45
45
  end
@@ -54,7 +54,7 @@ module Karafka
54
54
 
55
55
  dispatches[d_method] << dispatch_details(job).merge!(
56
56
  topic: job.queue_name,
57
- payload: ::ActiveSupport::JSON.encode(job.serialize)
57
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
58
58
  )
59
59
  end
60
60
 
@@ -0,0 +1,253 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ # Topic iterator allows you to iterate over topic/partition data and perform lookups for
17
+ # information that you need.
18
+ #
19
+ # It supports early stops on finding the requested data and allows for seeking till
20
+ # the end. It also allows for signaling, when a given message should be last out of certain
21
+ # partition, but we still want to continue iterating in other messages.
22
+ #
23
+ # It does **not** create a consumer group and does not have any offset management.
24
+ class Iterator
25
+ # Local partition reference for librdkafka
26
+ Partition = Struct.new(:partition, :offset)
27
+
28
+ private_constant :Partition
29
+
30
+ # A simple API allowing to iterate over topic/partition data, without having to subscribe
31
+ # and deal with rebalances. This API allows for multi-partition streaming and is optimized
32
+ # for data lookups. It allows for explicit stopping iteration over any partition during
33
+ # the iteration process, allowing for optimized lookups.
34
+ #
35
+ # @param topics [Array<String>, Hash] list of strings if we want to subscribe to multiple
36
+ # topics and all of their partitions or a hash where keys are the topics and values are
37
+ # hashes with partitions and their initial offsets.
38
+ # @param settings [Hash] extra settings for the consumer. Please keep in mind, that if
39
+ # overwritten, you may want to include `auto.offset.reset` to match your case.
40
+ # @param yield_nil [Boolean] should we yield also `nil` values when poll returns nothing.
41
+ # Useful in particular for long-living iterators.
42
+ #
43
+ # @note It is worth keeping in mind, that this API also needs to operate within
44
+ # `max.poll.interval.ms` limitations on each iteration
45
+ #
46
+ # @note In case of a never-ending iterator, you need to set `enable.partition.eof` to `false`
47
+ # so we don't stop polling data even when reaching the end (end on a given moment)
48
+ def initialize(
49
+ topics,
50
+ settings: { 'auto.offset.reset': 'beginning' },
51
+ yield_nil: false
52
+ )
53
+ @topics_with_partitions = expand_topics_with_partitions(topics)
54
+
55
+ @routing_topics = @topics_with_partitions.map do |name, _|
56
+ [name, ::Karafka::Routing::Router.find_or_initialize_by_name(name)]
57
+ end.to_h
58
+
59
+ @total_partitions = @topics_with_partitions.map(&:last).sum(&:count)
60
+
61
+ @stopped_partitions = 0
62
+
63
+ @settings = settings
64
+ @yield_nil = yield_nil
65
+ end
66
+
67
+ # Iterates over requested topic partitions and yields the results with the iterator itself
68
+ # Iterator instance is yielded because one can run `stop_partition` to stop iterating over
69
+ # part of data. It is useful for scenarios where we are looking for some information in all
70
+ # the partitions but once we found it, given partition data is no longer needed and would
71
+ # only eat up resources.
72
+ def each
73
+ Admin.with_consumer(@settings) do |consumer|
74
+ tpl = tpl_with_expanded_offsets(consumer)
75
+ consumer.assign(tpl)
76
+
77
+ # We need this for self-referenced APIs like pausing
78
+ @current_consumer = consumer
79
+
80
+ # Stream data until we reach the end of all the partitions or until the end user
81
+ # indicates that they are done
82
+ until done?
83
+ message = poll(200)
84
+
85
+ # Skip nils if not explicitly required
86
+ next if message.nil? && !@yield_nil
87
+
88
+ if message
89
+ @current_message = build_message(message)
90
+
91
+ yield(@current_message, self)
92
+ else
93
+ yield(nil, self)
94
+ end
95
+ end
96
+
97
+ @current_message = nil
98
+ @current_consumer = nil
99
+ end
100
+
101
+ # Reset so we can use the same iterator again if needed
102
+ @stopped_partitions = 0
103
+ end
104
+
105
+ # Stops the partition we're currently yielded into
106
+ def stop_current_partition
107
+ stop_partition(
108
+ @current_message.topic,
109
+ @current_message.partition
110
+ )
111
+ end
112
+
113
+ # Stops processing of a given partition
114
+ # We expect the partition to be provided because of a scenario, where there is a
115
+ # multi-partition iteration and we want to stop a different partition that the one that
116
+ # is currently yielded.
117
+ #
118
+ # We pause it forever and no longer work with it.
119
+ #
120
+ # @param name [String] topic name of which partition we want to stop
121
+ # @param partition [Integer] partition we want to stop processing
122
+ def stop_partition(name, partition)
123
+ @stopped_partitions += 1
124
+
125
+ @current_consumer.pause(
126
+ Rdkafka::Consumer::TopicPartitionList.new(
127
+ name => [Partition.new(partition, 0)]
128
+ )
129
+ )
130
+ end
131
+
132
+ private
133
+
134
+ # Expands topics to which we want to subscribe with partitions information in case this
135
+ # info is not provided. For our convenience we want to support 5 formats of defining
136
+ # the subscribed topics:
137
+ #
138
+ # - 'topic1' - just a string with one topic name
139
+ # - ['topic1', 'topic2'] - just the names
140
+ # - { 'topic1' => -100 } - names with negative lookup offset
141
+ # - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
142
+ # - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
143
+ #
144
+ # @param topics [Array, Hash] topics definitions
145
+ # @return [Hash] hash with topics containing partitions definitions
146
+ def expand_topics_with_partitions(topics)
147
+ # Simplification for the single topic case
148
+ topics = [topics] if topics.is_a?(String)
149
+ # If we've got just array with topics, we need to convert that into a representation
150
+ # that we can expand with offsets
151
+ topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
152
+
153
+ expanded = Hash.new { |h, k| h[k] = {} }
154
+
155
+ topics.map do |topic, details|
156
+ if details.is_a?(Hash)
157
+ details.each do |partition, offset|
158
+ expanded[topic][partition] = offset
159
+ end
160
+ else
161
+ partition_count(topic.to_s).times do |partition|
162
+ # If no offsets are provided, we just start from zero
163
+ expanded[topic][partition] = details || 0
164
+ end
165
+ end
166
+ end
167
+
168
+ expanded
169
+ end
170
+
171
+ # @param timeout [Integer] timeout in ms
172
+ # @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
173
+ def poll(timeout)
174
+ @current_consumer.poll(timeout)
175
+ rescue Rdkafka::RdkafkaError => e
176
+ # End of partition
177
+ if e.code == :partition_eof
178
+ @stopped_partitions += 1
179
+
180
+ retry
181
+ end
182
+
183
+ raise e
184
+ end
185
+
186
+ # Converts raw rdkafka message into Karafka message
187
+ #
188
+ # @param message [Rdkafka::Consumer::Message] raw rdkafka message
189
+ # @return [::Karafka::Messages::Message]
190
+ def build_message(message)
191
+ Messages::Builders::Message.call(
192
+ message,
193
+ @routing_topics.fetch(message.topic),
194
+ Time.now
195
+ )
196
+ end
197
+
198
+ # Do we have all the data we wanted or did every topic partition has reached eof.
199
+ # @return [Boolean]
200
+ def done?
201
+ @stopped_partitions >= @total_partitions
202
+ end
203
+
204
+ # Builds the tpl representing all the subscriptions we want to run
205
+ #
206
+ # Additionally for negative offsets, does the watermark calculation where to start
207
+ #
208
+ # @param consumer [Rdkafka::Consumer] consumer we need in case of negative offsets as
209
+ # negative are going to be used to do "give me last X". We use the already initialized
210
+ # consumer instance, not to start another one again.
211
+ # @return [Rdkafka::Consumer::TopicPartitionList]
212
+ def tpl_with_expanded_offsets(consumer)
213
+ tpl = Rdkafka::Consumer::TopicPartitionList.new
214
+
215
+ @topics_with_partitions.each do |name, partitions|
216
+ partitions_with_offsets = {}
217
+
218
+ # When no offsets defined, we just start from zero
219
+ if partitions.is_a?(Array) || partitions.is_a?(Range)
220
+ partitions_with_offsets = partitions.map { |partition| [partition, 0] }.to_h
221
+ else
222
+ # When offsets defined, we can either use them if positive or expand and move back
223
+ # in case of negative (-1000 means last 1000 messages, etc)
224
+ partitions.each do |partition, offset|
225
+ if offset.negative?
226
+ _, high_watermark_offset = consumer.query_watermark_offsets(name, partition)
227
+ # We add because this offset is negative
228
+ partitions_with_offsets[partition] = high_watermark_offset + offset
229
+ else
230
+ partitions_with_offsets[partition] = offset
231
+ end
232
+ end
233
+ end
234
+
235
+ tpl.add_topic_and_partitions_with_offsets(name, partitions_with_offsets)
236
+ end
237
+
238
+ tpl
239
+ end
240
+
241
+ # @param name [String] topic name
242
+ # @return [Integer] number of partitions of the topic we want to iterate over
243
+ def partition_count(name)
244
+ Admin
245
+ .cluster_info
246
+ .topics
247
+ .find { |topic| topic.fetch(:topic_name) == name }
248
+ .fetch(:partitions)
249
+ .count
250
+ end
251
+ end
252
+ end
253
+ end
@@ -17,7 +17,7 @@ module Karafka
17
17
  # Pro coordinator that provides extra orchestration methods useful for parallel processing
18
18
  # within the same partition
19
19
  class Coordinator < ::Karafka::Processing::Coordinator
20
- attr_reader :filter
20
+ attr_reader :filter, :virtual_offset_manager
21
21
 
22
22
  # @param args [Object] anything the base coordinator accepts
23
23
  def initialize(*args)
@@ -27,6 +27,20 @@ module Karafka
27
27
  @flow_lock = Mutex.new
28
28
  @collapser = Collapser.new
29
29
  @filter = FiltersApplier.new(self)
30
+
31
+ return unless topic.virtual_partitions?
32
+
33
+ @virtual_offset_manager = VirtualOffsetManager.new(
34
+ topic.name,
35
+ partition
36
+ )
37
+
38
+ # We register our own "internal" filter to support filtering of messages that were marked
39
+ # as consumed virtually
40
+ @filter.filters << Filters::VirtualLimiter.new(
41
+ @virtual_offset_manager,
42
+ @collapser
43
+ )
30
44
  end
31
45
 
32
46
  # Starts the coordination process
@@ -40,6 +54,11 @@ module Karafka
40
54
  @filter.apply!(messages)
41
55
 
42
56
  @executed.clear
57
+
58
+ # We keep the old processed offsets until the collapsing is done and regular processing
59
+ # with virtualization is restored
60
+ @virtual_offset_manager.clear if topic.virtual_partitions? && !@collapser.collapsed?
61
+
43
62
  @last_message = messages.last
44
63
  end
45
64
 
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Filters
18
+ # Removes messages that are already marked as consumed in the virtual offset manager
19
+ # This should operate only when using virtual partitions.
20
+ #
21
+ # This cleaner prevents us from duplicated processing of messages that were virtually
22
+ # marked as consumed even if we could not mark them as consumed in Kafka. This allows us
23
+ # to limit reprocessing when errors occur drastically when operating with virtual
24
+ # partitions
25
+ #
26
+ # @note It should be registered only when VPs are used
27
+ class VirtualLimiter < Base
28
+ # @param manager [Processing::VirtualOffsetManager]
29
+ # @param collapser [Processing::Collapser]
30
+ def initialize(manager, collapser)
31
+ @manager = manager
32
+ @collapser = collapser
33
+
34
+ super()
35
+ end
36
+
37
+ # Remove messages that we already marked as virtually consumed. Does nothing if not in
38
+ # the collapsed mode.
39
+ #
40
+ # @param messages [Array<Karafka::Messages::Message>]
41
+ def apply!(messages)
42
+ return unless @collapser.collapsed?
43
+
44
+ marked = @manager.marked
45
+
46
+ messages.delete_if { |message| marked.include?(message.offset) }
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -21,6 +21,10 @@ module Karafka
21
21
  # This means that this is the API we expose as a single filter, allowing us to control
22
22
  # the filtering via many filters easily.
23
23
  class FiltersApplier
24
+ # @return [Array] registered filters array. Useful if we want to inject internal context
25
+ # aware filters.
26
+ attr_reader :filters
27
+
24
28
  # @param coordinator [Pro::Coordinator] pro coordinator
25
29
  def initialize(coordinator)
26
30
  # Builds filters out of their factories
@@ -22,7 +22,7 @@ module Karafka
22
22
  # - Mom
23
23
  # - VP
24
24
  module DlqFtrMomVp
25
- include Strategies::Vp::Default
25
+ include Strategies::Aj::DlqMomVp
26
26
  include Strategies::Aj::DlqFtrMom
27
27
 
28
28
  # Features for this strategy
@@ -24,7 +24,9 @@ module Karafka
24
24
  # This case is a bit of special. Please see the `AjDlqMom` for explanation on how the
25
25
  # offset management works in this case.
26
26
  module DlqLrjMom
27
- include Strategies::Aj::DlqLrjMomVp
27
+ include Strategies::Default
28
+ include Strategies::Dlq::Default
29
+ include Strategies::Aj::LrjMom
28
30
 
29
31
  # Features for this strategy
30
32
  FEATURES = %i[
@@ -20,9 +20,9 @@ module Karafka
20
20
  # Manual offset management enabled
21
21
  # Virtual Partitions enabled
22
22
  module DlqMomVp
23
- include Strategies::Dlq::Default
24
- include Strategies::Vp::Default
25
23
  include Strategies::Default
24
+ include Strategies::Dlq::Vp
25
+ include Strategies::Vp::Default
26
26
 
27
27
  # Features for this strategy
28
28
  FEATURES = %i[
@@ -34,6 +34,8 @@ module Karafka
34
34
 
35
35
  # No actions needed for the standard flow here
36
36
  def handle_before_enqueue
37
+ super
38
+
37
39
  coordinator.on_enqueued do
38
40
  pause(coordinator.seek_offset, Strategies::Lrj::Default::MAX_PAUSE_TIME, false)
39
41
  end
@@ -20,8 +20,8 @@ module Karafka
20
20
  # Manual offset management enabled
21
21
  # Virtual Partitions enabled
22
22
  module MomVp
23
- include Strategies::Vp::Default
24
23
  include Strategies::Default
24
+ include Strategies::Vp::Default
25
25
 
26
26
  # Features for this strategy
27
27
  FEATURES = %i[
@@ -20,7 +20,7 @@ module Karafka
20
20
  # - Ftr
21
21
  module Ftr
22
22
  include Strategies::Ftr::Default
23
- include Strategies::Dlq::Vp
23
+ include Strategies::Dlq::Default
24
24
 
25
25
  # Features for this strategy
26
26
  FEATURES = %i[
@@ -55,14 +55,11 @@ module Karafka
55
55
 
56
56
  return resume if revoked?
57
57
 
58
- skippable_message, marked = find_skippable_message
58
+ skippable_message, _marked = find_skippable_message
59
59
  dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
60
60
 
61
- if marked
62
- pause(coordinator.seek_offset, nil, false)
63
- else
64
- pause(skippable_message.offset + 1, nil, false)
65
- end
61
+ coordinator.seek_offset = skippable_message.offset + 1
62
+ pause(coordinator.seek_offset, nil, false)
66
63
  end
67
64
  end
68
65
  end