karafka 2.3.0 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +15 -0
  5. data/Gemfile +1 -1
  6. data/Gemfile.lock +22 -22
  7. data/README.md +2 -2
  8. data/bin/integrations +2 -1
  9. data/bin/rspecs +6 -2
  10. data/config/locales/errors.yml +30 -8
  11. data/config/locales/pro_errors.yml +2 -0
  12. data/docker-compose.yml +1 -1
  13. data/lib/karafka/app.rb +14 -0
  14. data/lib/karafka/cli/base.rb +19 -0
  15. data/lib/karafka/cli/server.rb +62 -76
  16. data/lib/karafka/cli/swarm.rb +30 -0
  17. data/lib/karafka/constraints.rb +3 -3
  18. data/lib/karafka/contracts/config.rb +19 -0
  19. data/lib/karafka/errors.rb +12 -0
  20. data/lib/karafka/helpers/async.rb +13 -3
  21. data/lib/karafka/helpers/config_importer.rb +30 -0
  22. data/lib/karafka/instrumentation/logger_listener.rb +31 -0
  23. data/lib/karafka/instrumentation/notifications.rb +9 -0
  24. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -0
  25. data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +72 -0
  26. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +11 -40
  27. data/lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb +54 -0
  28. data/lib/karafka/pro/active_job/job_options_contract.rb +1 -1
  29. data/lib/karafka/pro/base_consumer.rb +16 -0
  30. data/lib/karafka/pro/connection/manager.rb +6 -1
  31. data/lib/karafka/pro/processing/coordinator.rb +13 -3
  32. data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +74 -0
  33. data/lib/karafka/pro/processing/coordinators/filters_applier.rb +107 -0
  34. data/lib/karafka/pro/processing/coordinators/virtual_offset_manager.rb +180 -0
  35. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +5 -7
  36. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +5 -7
  37. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +8 -10
  38. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +8 -16
  39. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +5 -7
  40. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +5 -7
  41. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +8 -10
  42. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +7 -9
  43. data/lib/karafka/pro/processing/strategies/dlq/default.rb +36 -10
  44. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +3 -7
  45. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +4 -8
  46. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +6 -9
  47. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +5 -15
  48. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +4 -8
  49. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +6 -9
  50. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +10 -20
  51. data/lib/karafka/pro/processing/strategies/vp/default.rb +7 -0
  52. data/lib/karafka/pro/routing/features/dead_letter_queue/contracts/topic.rb +6 -0
  53. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +39 -0
  54. data/lib/karafka/pro/swarm/liveness_listener.rb +171 -0
  55. data/lib/karafka/process.rb +27 -1
  56. data/lib/karafka/routing/features/dead_letter_queue/config.rb +2 -0
  57. data/lib/karafka/routing/subscription_group.rb +31 -9
  58. data/lib/karafka/runner.rb +4 -0
  59. data/lib/karafka/server.rb +13 -16
  60. data/lib/karafka/setup/config.rb +41 -2
  61. data/lib/karafka/status.rb +4 -2
  62. data/lib/karafka/swarm/liveness_listener.rb +55 -0
  63. data/lib/karafka/swarm/manager.rb +217 -0
  64. data/lib/karafka/swarm/node.rb +179 -0
  65. data/lib/karafka/swarm/pidfd.rb +131 -0
  66. data/lib/karafka/swarm/supervisor.rb +184 -0
  67. data/lib/karafka/swarm.rb +27 -0
  68. data/lib/karafka/templates/karafka.rb.erb +0 -2
  69. data/lib/karafka/version.rb +1 -1
  70. data/lib/karafka.rb +1 -1
  71. data.tar.gz.sig +0 -0
  72. metadata +17 -4
  73. metadata.gz.sig +0 -0
  74. data/lib/karafka/pro/processing/filters_applier.rb +0 -105
  75. data/lib/karafka/pro/processing/virtual_offset_manager.rb +0 -177
@@ -226,6 +226,34 @@ module Karafka
226
226
  MSG
227
227
  end
228
228
 
229
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
230
+ def on_swarm_manager_stopping(event)
231
+ node = event[:node]
232
+ error "Swarm manager detected unhealthy node #{node.pid}. Sending TERM signal..."
233
+ end
234
+
235
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
236
+ def on_swarm_manager_terminating(event)
237
+ node = event[:node]
238
+ error "Swarm manager detected unresponsive node #{node.pid}. Sending KILL signal..."
239
+ end
240
+
241
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
242
+ def on_swarm_manager_before_fork(event)
243
+ debug "Swarm manager starting node with id: #{event[:node].id}"
244
+ end
245
+
246
+ # @param _event [Karafka::Core::Monitoring::Event] event details including payload
247
+ def on_swarm_node_after_fork(_event)
248
+ info "Swarm node #{::Process.pid} forked from #{::Process.ppid}"
249
+ end
250
+
251
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
252
+ def on_swarm_manager_control(event)
253
+ pids = event[:caller].nodes.map(&:pid).join(', ')
254
+ debug "Swarm manager checking nodes: #{pids}"
255
+ end
256
+
229
257
  # There are many types of errors that can occur in many places, but we provide a single
230
258
  # handler for all of them to simplify error instrumentation.
231
259
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
@@ -259,6 +287,9 @@ module Karafka
259
287
  when 'connection.listener.fetch_loop.error'
260
288
  error "Listener fetch loop error: #{error}"
261
289
  error details
290
+ when 'swarm.supervisor.error'
291
+ fatal "Swarm supervisor crashed due to an error: #{error}"
292
+ fatal details
262
293
  when 'runner.call.error'
263
294
  fatal "Runner crashed due to an error: #{error}"
264
295
  fatal details
@@ -22,6 +22,8 @@ module Karafka
22
22
 
23
23
  app.initializing
24
24
  app.initialized
25
+ app.before_warmup
26
+ app.supervising
25
27
  app.running
26
28
  app.quieting
27
29
  app.quiet
@@ -73,6 +75,13 @@ module Karafka
73
75
 
74
76
  statistics.emitted
75
77
 
78
+ swarm.node.after_fork
79
+ swarm.manager.before_fork
80
+ swarm.manager.after_fork
81
+ swarm.manager.control
82
+ swarm.manager.stopping
83
+ swarm.manager.terminating
84
+
76
85
  worker.process
77
86
  worker.processed
78
87
  worker.completed
@@ -129,6 +129,8 @@ module Karafka
129
129
  fatal "Runner crashed due to an error: #{error}"
130
130
  when 'app.stopping.error'
131
131
  error 'Forceful Karafka server stop'
132
+ when 'swarm.supervisor.error'
133
+ fatal "Swarm supervisor crashed due to an error: #{error}"
132
134
  when 'librdkafka.error'
133
135
  error "librdkafka internal error occurred: #{error}"
134
136
  # Those will only occur when retries in the client fail and when they did not stop
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'socket'
4
+
5
+ module Karafka
6
+ module Instrumentation
7
+ module Vendors
8
+ # Namespace for instrumentation related with Kubernetes
9
+ module Kubernetes
10
+ # Base Kubernetes Listener providing basic HTTP server capabilities to respond with health
11
+ class BaseListener
12
+ include ::Karafka::Core::Helpers::Time
13
+
14
+ # All good with Karafka
15
+ OK_CODE = '204 No Content'
16
+
17
+ # Some timeouts, fail
18
+ FAIL_CODE = '500 Internal Server Error'
19
+
20
+ private_constant :OK_CODE, :FAIL_CODE
21
+
22
+ # @param hostname [String, nil] hostname or nil to bind on all
23
+ # @param port [Integer] TCP port on which we want to run our HTTP status server
24
+ def initialize(
25
+ hostname: nil,
26
+ port: 3000
27
+ )
28
+ @hostname = hostname
29
+ @port = port
30
+ end
31
+
32
+ private
33
+
34
+ # @return [Boolean] true if all good, false if we should tell k8s to kill this process
35
+ def healthy?
36
+ raise NotImplementedError, 'Implement in a subclass'
37
+ end
38
+
39
+ # Responds to a HTTP request with the process liveness status
40
+ def respond
41
+ client = @server.accept
42
+ client.gets
43
+ client.print "HTTP/1.1 #{healthy? ? OK_CODE : FAIL_CODE}\r\n"
44
+ client.print "Content-Type: text/plain\r\n"
45
+ client.print "\r\n"
46
+ client.close
47
+
48
+ true
49
+ rescue Errno::ECONNRESET, Errno::EPIPE, IOError
50
+ !@server.closed?
51
+ end
52
+
53
+ # Starts background thread with micro-http monitoring
54
+ def start
55
+ @server = TCPServer.new(*[@hostname, @port].compact)
56
+
57
+ Thread.new do
58
+ loop do
59
+ break unless respond
60
+ end
61
+ end
62
+ end
63
+
64
+ # Stops the server
65
+ def stop
66
+ @server.close
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'socket'
3
+ require 'karafka/instrumentation/vendors/kubernetes/base_listener'
4
4
 
5
5
  module Karafka
6
6
  module Instrumentation
@@ -23,17 +23,9 @@ module Karafka
23
23
  #
24
24
  # @note In case of usage within an embedding with Puma, you need to select different port
25
25
  # then the one used by Puma itself.
26
- class LivenessListener
27
- include ::Karafka::Core::Helpers::Time
28
-
29
- # All good with Karafka
30
- OK_CODE = '204 No Content'
31
-
32
- # Some timeouts, fail
33
- FAIL_CODE = '500 Internal Server Error'
34
-
35
- private_constant :OK_CODE, :FAIL_CODE
36
-
26
+ #
27
+ # @note Please use `Kubernetes::SwarmLivenessListener` when operating in the swarm mode
28
+ class LivenessListener < BaseListener
37
29
  # @param hostname [String, nil] hostname or nil to bind on all
38
30
  # @param port [Integer] TCP port on which we want to run our HTTP status server
39
31
  # @param consuming_ttl [Integer] time in ms after which we consider consumption hanging.
@@ -48,30 +40,23 @@ module Karafka
48
40
  consuming_ttl: 5 * 60 * 1_000,
49
41
  polling_ttl: 5 * 60 * 1_000
50
42
  )
51
- @hostname = hostname
52
- @port = port
53
43
  @polling_ttl = polling_ttl
54
44
  @consuming_ttl = consuming_ttl
55
45
  @mutex = Mutex.new
56
46
  @pollings = {}
57
47
  @consumptions = {}
48
+ super(hostname: hostname, port: port)
58
49
  end
59
50
 
60
51
  # @param _event [Karafka::Core::Monitoring::Event]
61
52
  def on_app_running(_event)
62
- @server = TCPServer.new(*[@hostname, @port].compact)
63
-
64
- Thread.new do
65
- loop do
66
- break unless respond
67
- end
68
- end
53
+ start
69
54
  end
70
55
 
71
56
  # Stop the http server when we stop the process
72
57
  # @param _event [Karafka::Core::Monitoring::Event]
73
58
  def on_app_stopped(_event)
74
- @server.close
59
+ stop
75
60
  end
76
61
 
77
62
  # Tick on each fetch
@@ -148,29 +133,15 @@ module Karafka
148
133
  end
149
134
  end
150
135
 
151
- # Responds to a HTTP request with the process liveness status
152
- def respond
153
- client = @server.accept
154
- client.gets
155
- client.print "HTTP/1.1 #{status}\r\n"
156
- client.print "Content-Type: text/plain\r\n"
157
- client.print "\r\n"
158
- client.close
159
-
160
- true
161
- rescue Errno::ECONNRESET, Errno::EPIPE, IOError
162
- !@server.closed?
163
- end
164
-
165
136
  # Did we exceed any of the ttls
166
137
  # @return [String] 204 string if ok, 500 otherwise
167
- def status
138
+ def healthy?
168
139
  time = monotonic_now
169
140
 
170
- return FAIL_CODE if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
171
- return FAIL_CODE if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
141
+ return false if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
142
+ return false if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
172
143
 
173
- OK_CODE
144
+ true
174
145
  end
175
146
  end
176
147
  end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'karafka/instrumentation/vendors/kubernetes/base_listener'
4
+
5
+ module Karafka
6
+ module Instrumentation
7
+ module Vendors
8
+ module Kubernetes
9
+ # Kubernetes HTTP listener designed to operate with Karafka running in the swarm mode
10
+ # In the Swarm mode we supervise only the supervisor as other nodes are suppose to be
11
+ # managed by the swarm supervisor
12
+ class SwarmLivenessListener < BaseListener
13
+ # @param hostname [String, nil] hostname or nil to bind on all
14
+ # @param port [Integer] TCP port on which we want to run our HTTP status server
15
+ # @param controlling_ttl [Integer] time in ms after which we consider the supervising
16
+ # thread dead because it is not controlling nodes. When configuring this, please take
17
+ # into consideration, that during shutdown of the swarm, there is no controlling
18
+ # happening.
19
+ def initialize(
20
+ hostname: nil,
21
+ port: 3000,
22
+ controlling_ttl: 60 * 1_000
23
+ )
24
+ @hostname = hostname
25
+ @port = port
26
+ @controlling_ttl = controlling_ttl
27
+ @controlling = monotonic_now
28
+ super(port: port, hostname: hostname)
29
+ end
30
+
31
+ # Starts reporting in the supervisor only when it runs
32
+ # @param _event [Karafka::Core::Monitoring::Event]
33
+ def on_app_supervising(_event)
34
+ start
35
+ end
36
+
37
+ # Tick on each control
38
+ # @param _event [Karafka::Core::Monitoring::Event]
39
+ def on_swarm_manager_control(_event)
40
+ @controlling = monotonic_now
41
+ end
42
+
43
+ private
44
+
45
+ # Did we exceed any of the ttls
46
+ # @return [String] 204 string if ok, 500 otherwise
47
+ def healthy?
48
+ (monotonic_now - @controlling) < @controlling_ttl
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -26,7 +26,7 @@ module Karafka
26
26
  end
27
27
 
28
28
  optional(:partitioner) { |val| val.respond_to?(:call) }
29
- optional(:partition_key_type) { |val| %i[key partition_key].include?(val) }
29
+ optional(:partition_key_type) { |val| %i[key partition_key partition].include?(val) }
30
30
  optional(:dispatch_method) do |val|
31
31
  %i[
32
32
  produce_async
@@ -42,6 +42,22 @@ module Karafka
42
42
 
43
43
  # By default we do nothing when ticking
44
44
  def tick; end
45
+
46
+ # @return [Karafka::Pro::Processing::Coordinators::ErrorsTracker] tracker for errors that
47
+ # occurred during processing until another successful processing
48
+ #
49
+ # @note This will always contain **only** details of errors that occurred during `#consume`
50
+ # because only those are retryable.
51
+ #
52
+ # @note This may contain more than one error because:
53
+ # - this can collect various errors that might have happened during virtual partitions
54
+ # execution
55
+ # - errors can pile up during retries and until a clean run, they will be collected with
56
+ # a limit of last 100. We do not store more because a consumer with an endless error loop
57
+ # would cause memory leaks without such a limit.
58
+ def errors_tracker
59
+ coordinator.errors_tracker
60
+ end
45
61
  end
46
62
  end
47
63
  end
@@ -114,10 +114,15 @@ module Karafka
114
114
  once(:quiet!) { active_listeners.each(&:quiet!) }
115
115
 
116
116
  # If we are in the process of moving to quiet state, we need to check it.
117
- if Karafka::App.quieting? && active_listeners.all?(&:quiet?)
117
+ if Karafka::App.quieting?
118
+ # If we are quieting but not all active listeners are quiet we need to wait for all of
119
+ # them to reach the quiet state
120
+ return unless active_listeners.all?(&:quiet?)
121
+
118
122
  once(:quieted!) { Karafka::App.quieted! }
119
123
  end
120
124
 
125
+ # Do nothing if we moved to quiet state and want to be in it
121
126
  return if Karafka::App.quiet?
122
127
 
123
128
  # Since separate subscription groups are subscribed to different topics, there is no risk
@@ -21,13 +21,14 @@ module Karafka
21
21
 
22
22
  def_delegators :@collapser, :collapsed?, :collapse_until!
23
23
 
24
- attr_reader :filter, :virtual_offset_manager, :shared_mutex
24
+ attr_reader :filter, :virtual_offset_manager, :shared_mutex, :errors_tracker
25
25
 
26
26
  # @param args [Object] anything the base coordinator accepts
27
27
  def initialize(*args)
28
28
  super
29
29
 
30
30
  @executed = []
31
+ @errors_tracker = Coordinators::ErrorsTracker.new
31
32
  @flow_mutex = Mutex.new
32
33
  # Lock for user code synchronization
33
34
  # We do not want to mix coordinator lock with the user lock not to create cases where
@@ -36,11 +37,11 @@ module Karafka
36
37
  # framework and can be used for user-facing locking
37
38
  @shared_mutex = Mutex.new
38
39
  @collapser = Collapser.new
39
- @filter = FiltersApplier.new(self)
40
+ @filter = Coordinators::FiltersApplier.new(self)
40
41
 
41
42
  return unless topic.virtual_partitions?
42
43
 
43
- @virtual_offset_manager = VirtualOffsetManager.new(
44
+ @virtual_offset_manager = Coordinators::VirtualOffsetManager.new(
44
45
  topic.name,
45
46
  partition,
46
47
  topic.virtual_partitions.offset_metadata_strategy
@@ -64,6 +65,14 @@ module Karafka
64
65
 
65
66
  @filter.apply!(messages)
66
67
 
68
+ # Do not clear coordinator errors storage when we are retrying, so we can reference the
69
+ # errors that have happened during recovery. This can be useful for implementing custom
70
+ # flows. There can be more errors than one when running with virtual partitions so we
71
+ # need to make sure we collect them all. Under collapse when we reference a given
72
+ # consumer we should be able to get all the errors and not just first/last.
73
+ #
74
+ # @note We use zero as the attempt mark because we are not "yet" in the attempt 1
75
+ @errors_tracker.clear if attempt.zero?
67
76
  @executed.clear
68
77
 
69
78
  # We keep the old processed offsets until the collapsing is done and regular processing
@@ -79,6 +88,7 @@ module Karafka
79
88
  # @param error [StandardError] error from the failure
80
89
  def failure!(consumer, error)
81
90
  super
91
+ @errors_tracker << error
82
92
  collapse_until!(@last_message.offset + 1)
83
93
  end
84
94
 
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for Pro coordinator related sub-components
18
+ module Coordinators
19
+ # Object used to track errors in between executions to be able to build error-type based
20
+ # recovery flows.
21
+ class ErrorsTracker
22
+ include Enumerable
23
+
24
+ # Max errors we keep in memory.
25
+ # We do not want to keep more because for DLQ-less this would cause memory-leaks.
26
+ STORAGE_LIMIT = 100
27
+
28
+ private_constant :STORAGE_LIMIT
29
+
30
+ def initialize
31
+ @errors = []
32
+ end
33
+
34
+ # Clears all the errors
35
+ def clear
36
+ @errors.clear
37
+ end
38
+
39
+ # @param error [StandardError] adds the error to the tracker
40
+ def <<(error)
41
+ @errors.shift if @errors.size >= STORAGE_LIMIT
42
+ @errors << error
43
+ end
44
+
45
+ # @return [Boolean] is the error tracker empty
46
+ def empty?
47
+ @errors.empty?
48
+ end
49
+
50
+ # @return [Integer] number of elements
51
+ def size
52
+ count
53
+ end
54
+
55
+ # @return [StandardError, nil] last error that occurred or nil if no errors
56
+ def last
57
+ @errors.last
58
+ end
59
+
60
+ # Iterates over errors
61
+ # @param block [Proc] code we want to run on each error
62
+ def each(&block)
63
+ @errors.each(&block)
64
+ end
65
+
66
+ # @return [Array<StandardError>] array with all the errors that occurred
67
+ def all
68
+ @errors
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Coordinators
18
+ # Applier for all filters we want to have. Whether related to limiting messages based
19
+ # on the payload or any other things.
20
+ #
21
+ # From the outside world perspective, this encapsulates all the filters.
22
+ # This means that this is the API we expose as a single filter, allowing us to control
23
+ # the filtering via many filters easily.
24
+ class FiltersApplier
25
+ # @return [Array] registered filters array. Useful if we want to inject internal context
26
+ # aware filters.
27
+ attr_reader :filters
28
+
29
+ # @param coordinator [Pro::Coordinator] pro coordinator
30
+ def initialize(coordinator)
31
+ # Builds filters out of their factories
32
+ # We build it that way (providing topic and partition) because there may be a case
33
+ # where someone wants to have a specific logic that is per topic or partition. Like for
34
+ # example a case where there is a cache bypassing revocations for topic partition.
35
+ #
36
+ # We provide full Karafka routing topic here and not the name only, in case the filter
37
+ # would be customized based on other topic settings (like VPs, etc)
38
+ #
39
+ # This setup allows for biggest flexibility also because topic object holds the
40
+ # reference to the subscription group and consumer group
41
+ @filters = coordinator.topic.filtering.factories.map do |factory|
42
+ factory.call(coordinator.topic, coordinator.partition)
43
+ end
44
+ end
45
+
46
+ # @param messages [Array<Karafka::Messages::Message>] array with messages from the
47
+ # partition
48
+ def apply!(messages)
49
+ return unless active?
50
+
51
+ @filters.each { |filter| filter.apply!(messages) }
52
+ end
53
+
54
+ # @return [Boolean] did we filter out any messages during filtering run
55
+ def applied?
56
+ return false unless active?
57
+
58
+ !applied.empty?
59
+ end
60
+
61
+ # @return [Symbol] consumer post-filtering action that should be taken
62
+ def action
63
+ return :skip unless applied?
64
+
65
+ # The highest priority is on a potential backoff from any of the filters because it is
66
+ # the less risky (delay and continue later)
67
+ return :pause if applied.any? { |filter| filter.action == :pause }
68
+
69
+ # If none of the filters wanted to pause, we can check for any that would want to seek
70
+ # and if there is any, we can go with this strategy
71
+ return :seek if applied.any? { |filter| filter.action == :seek }
72
+
73
+ :skip
74
+ end
75
+
76
+ # @return [Integer] minimum timeout we need to pause. This is the minimum for all the
77
+ # filters to satisfy all of them.
78
+ def timeout
79
+ applied.map(&:timeout).compact.min || 0
80
+ end
81
+
82
+ # The first message we do need to get next time we poll. We use the minimum not to jump
83
+ # accidentally by over any.
84
+ # @return [Karafka::Messages::Message, nil] cursor message or nil if none
85
+ # @note Cursor message can also return the offset in the time format
86
+ def cursor
87
+ return nil unless active?
88
+
89
+ applied.map(&:cursor).compact.min_by(&:offset)
90
+ end
91
+
92
+ private
93
+
94
+ # @return [Boolean] is filtering active
95
+ def active?
96
+ !@filters.empty?
97
+ end
98
+
99
+ # @return [Array<Object>] filters that applied any sort of messages limiting
100
+ def applied
101
+ @filters.select(&:applied?)
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end