karafka 2.3.1 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +12 -0
  5. data/Gemfile.lock +6 -6
  6. data/bin/integrations +2 -1
  7. data/bin/rspecs +6 -2
  8. data/config/locales/errors.yml +30 -8
  9. data/config/locales/pro_errors.yml +2 -0
  10. data/docker-compose.yml +1 -1
  11. data/lib/karafka/app.rb +14 -0
  12. data/lib/karafka/cli/base.rb +19 -0
  13. data/lib/karafka/cli/server.rb +62 -76
  14. data/lib/karafka/cli/swarm.rb +30 -0
  15. data/lib/karafka/constraints.rb +3 -3
  16. data/lib/karafka/contracts/config.rb +19 -0
  17. data/lib/karafka/errors.rb +12 -0
  18. data/lib/karafka/helpers/config_importer.rb +30 -0
  19. data/lib/karafka/instrumentation/logger_listener.rb +31 -0
  20. data/lib/karafka/instrumentation/notifications.rb +9 -0
  21. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -0
  22. data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +72 -0
  23. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +11 -40
  24. data/lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb +54 -0
  25. data/lib/karafka/pro/active_job/job_options_contract.rb +1 -1
  26. data/lib/karafka/pro/base_consumer.rb +16 -0
  27. data/lib/karafka/pro/connection/manager.rb +6 -1
  28. data/lib/karafka/pro/processing/coordinator.rb +13 -3
  29. data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +74 -0
  30. data/lib/karafka/pro/processing/coordinators/filters_applier.rb +107 -0
  31. data/lib/karafka/pro/processing/coordinators/virtual_offset_manager.rb +180 -0
  32. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +5 -7
  33. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +5 -7
  34. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +8 -10
  35. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +8 -16
  36. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +5 -7
  37. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +5 -7
  38. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +8 -10
  39. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +7 -9
  40. data/lib/karafka/pro/processing/strategies/dlq/default.rb +36 -10
  41. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +3 -7
  42. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +4 -8
  43. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +6 -9
  44. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +5 -15
  45. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +4 -8
  46. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +6 -9
  47. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +10 -20
  48. data/lib/karafka/pro/processing/strategies/vp/default.rb +7 -0
  49. data/lib/karafka/pro/routing/features/dead_letter_queue/contracts/topic.rb +6 -0
  50. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +39 -0
  51. data/lib/karafka/pro/swarm/liveness_listener.rb +171 -0
  52. data/lib/karafka/process.rb +27 -1
  53. data/lib/karafka/routing/features/dead_letter_queue/config.rb +2 -0
  54. data/lib/karafka/routing/subscription_group.rb +31 -9
  55. data/lib/karafka/server.rb +11 -13
  56. data/lib/karafka/setup/config.rb +41 -2
  57. data/lib/karafka/status.rb +4 -2
  58. data/lib/karafka/swarm/liveness_listener.rb +55 -0
  59. data/lib/karafka/swarm/manager.rb +217 -0
  60. data/lib/karafka/swarm/node.rb +179 -0
  61. data/lib/karafka/swarm/pidfd.rb +131 -0
  62. data/lib/karafka/swarm/supervisor.rb +184 -0
  63. data/lib/karafka/swarm.rb +27 -0
  64. data/lib/karafka/version.rb +1 -1
  65. data/lib/karafka.rb +1 -1
  66. data.tar.gz.sig +0 -0
  67. metadata +17 -4
  68. metadata.gz.sig +0 -0
  69. data/lib/karafka/pro/processing/filters_applier.rb +0 -105
  70. data/lib/karafka/pro/processing/virtual_offset_manager.rb +0 -177
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'socket'
4
+
5
+ module Karafka
6
+ module Instrumentation
7
+ module Vendors
8
+ # Namespace for instrumentation related with Kubernetes
9
+ module Kubernetes
10
+ # Base Kubernetes Listener providing basic HTTP server capabilities to respond with health
11
+ class BaseListener
12
+ include ::Karafka::Core::Helpers::Time
13
+
14
+ # All good with Karafka
15
+ OK_CODE = '204 No Content'
16
+
17
+ # Some timeouts, fail
18
+ FAIL_CODE = '500 Internal Server Error'
19
+
20
+ private_constant :OK_CODE, :FAIL_CODE
21
+
22
+ # @param hostname [String, nil] hostname or nil to bind on all
23
+ # @param port [Integer] TCP port on which we want to run our HTTP status server
24
+ def initialize(
25
+ hostname: nil,
26
+ port: 3000
27
+ )
28
+ @hostname = hostname
29
+ @port = port
30
+ end
31
+
32
+ private
33
+
34
+ # @return [Boolean] true if all good, false if we should tell k8s to kill this process
35
+ def healthy?
36
+ raise NotImplementedError, 'Implement in a subclass'
37
+ end
38
+
39
+ # Responds to a HTTP request with the process liveness status
40
+ def respond
41
+ client = @server.accept
42
+ client.gets
43
+ client.print "HTTP/1.1 #{healthy? ? OK_CODE : FAIL_CODE}\r\n"
44
+ client.print "Content-Type: text/plain\r\n"
45
+ client.print "\r\n"
46
+ client.close
47
+
48
+ true
49
+ rescue Errno::ECONNRESET, Errno::EPIPE, IOError
50
+ !@server.closed?
51
+ end
52
+
53
+ # Starts background thread with micro-http monitoring
54
+ def start
55
+ @server = TCPServer.new(*[@hostname, @port].compact)
56
+
57
+ Thread.new do
58
+ loop do
59
+ break unless respond
60
+ end
61
+ end
62
+ end
63
+
64
+ # Stops the server
65
+ def stop
66
+ @server.close
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'socket'
3
+ require 'karafka/instrumentation/vendors/kubernetes/base_listener'
4
4
 
5
5
  module Karafka
6
6
  module Instrumentation
@@ -23,17 +23,9 @@ module Karafka
23
23
  #
24
24
  # @note In case of usage within an embedding with Puma, you need to select different port
25
25
  # then the one used by Puma itself.
26
- class LivenessListener
27
- include ::Karafka::Core::Helpers::Time
28
-
29
- # All good with Karafka
30
- OK_CODE = '204 No Content'
31
-
32
- # Some timeouts, fail
33
- FAIL_CODE = '500 Internal Server Error'
34
-
35
- private_constant :OK_CODE, :FAIL_CODE
36
-
26
+ #
27
+ # @note Please use `Kubernetes::SwarmLivenessListener` when operating in the swarm mode
28
+ class LivenessListener < BaseListener
37
29
  # @param hostname [String, nil] hostname or nil to bind on all
38
30
  # @param port [Integer] TCP port on which we want to run our HTTP status server
39
31
  # @param consuming_ttl [Integer] time in ms after which we consider consumption hanging.
@@ -48,30 +40,23 @@ module Karafka
48
40
  consuming_ttl: 5 * 60 * 1_000,
49
41
  polling_ttl: 5 * 60 * 1_000
50
42
  )
51
- @hostname = hostname
52
- @port = port
53
43
  @polling_ttl = polling_ttl
54
44
  @consuming_ttl = consuming_ttl
55
45
  @mutex = Mutex.new
56
46
  @pollings = {}
57
47
  @consumptions = {}
48
+ super(hostname: hostname, port: port)
58
49
  end
59
50
 
60
51
  # @param _event [Karafka::Core::Monitoring::Event]
61
52
  def on_app_running(_event)
62
- @server = TCPServer.new(*[@hostname, @port].compact)
63
-
64
- Thread.new do
65
- loop do
66
- break unless respond
67
- end
68
- end
53
+ start
69
54
  end
70
55
 
71
56
  # Stop the http server when we stop the process
72
57
  # @param _event [Karafka::Core::Monitoring::Event]
73
58
  def on_app_stopped(_event)
74
- @server.close
59
+ stop
75
60
  end
76
61
 
77
62
  # Tick on each fetch
@@ -148,29 +133,15 @@ module Karafka
148
133
  end
149
134
  end
150
135
 
151
- # Responds to a HTTP request with the process liveness status
152
- def respond
153
- client = @server.accept
154
- client.gets
155
- client.print "HTTP/1.1 #{status}\r\n"
156
- client.print "Content-Type: text/plain\r\n"
157
- client.print "\r\n"
158
- client.close
159
-
160
- true
161
- rescue Errno::ECONNRESET, Errno::EPIPE, IOError
162
- !@server.closed?
163
- end
164
-
165
136
  # Did we exceed any of the ttls
166
137
  # @return [String] 204 string if ok, 500 otherwise
167
- def status
138
+ def healthy?
168
139
  time = monotonic_now
169
140
 
170
- return FAIL_CODE if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
171
- return FAIL_CODE if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
141
+ return false if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
142
+ return false if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
172
143
 
173
- OK_CODE
144
+ true
174
145
  end
175
146
  end
176
147
  end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'karafka/instrumentation/vendors/kubernetes/base_listener'
4
+
5
+ module Karafka
6
+ module Instrumentation
7
+ module Vendors
8
+ module Kubernetes
9
+ # Kubernetes HTTP listener designed to operate with Karafka running in the swarm mode
10
+ # In the Swarm mode we supervise only the supervisor as other nodes are suppose to be
11
+ # managed by the swarm supervisor
12
+ class SwarmLivenessListener < BaseListener
13
+ # @param hostname [String, nil] hostname or nil to bind on all
14
+ # @param port [Integer] TCP port on which we want to run our HTTP status server
15
+ # @param controlling_ttl [Integer] time in ms after which we consider the supervising
16
+ # thread dead because it is not controlling nodes. When configuring this, please take
17
+ # into consideration, that during shutdown of the swarm, there is no controlling
18
+ # happening.
19
+ def initialize(
20
+ hostname: nil,
21
+ port: 3000,
22
+ controlling_ttl: 60 * 1_000
23
+ )
24
+ @hostname = hostname
25
+ @port = port
26
+ @controlling_ttl = controlling_ttl
27
+ @controlling = monotonic_now
28
+ super(port: port, hostname: hostname)
29
+ end
30
+
31
+ # Starts reporting in the supervisor only when it runs
32
+ # @param _event [Karafka::Core::Monitoring::Event]
33
+ def on_app_supervising(_event)
34
+ start
35
+ end
36
+
37
+ # Tick on each control
38
+ # @param _event [Karafka::Core::Monitoring::Event]
39
+ def on_swarm_manager_control(_event)
40
+ @controlling = monotonic_now
41
+ end
42
+
43
+ private
44
+
45
+ # Did we exceed any of the ttls
46
+ # @return [String] 204 string if ok, 500 otherwise
47
+ def healthy?
48
+ (monotonic_now - @controlling) < @controlling_ttl
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -26,7 +26,7 @@ module Karafka
26
26
  end
27
27
 
28
28
  optional(:partitioner) { |val| val.respond_to?(:call) }
29
- optional(:partition_key_type) { |val| %i[key partition_key].include?(val) }
29
+ optional(:partition_key_type) { |val| %i[key partition_key partition].include?(val) }
30
30
  optional(:dispatch_method) do |val|
31
31
  %i[
32
32
  produce_async
@@ -42,6 +42,22 @@ module Karafka
42
42
 
43
43
  # By default we do nothing when ticking
44
44
  def tick; end
45
+
46
+ # @return [Karafka::Pro::Processing::Coordinators::ErrorsTracker] tracker for errors that
47
+ # occurred during processing until another successful processing
48
+ #
49
+ # @note This will always contain **only** details of errors that occurred during `#consume`
50
+ # because only those are retryable.
51
+ #
52
+ # @note This may contain more than one error because:
53
+ # - this can collect various errors that might have happened during virtual partitions
54
+ # execution
55
+ # - errors can pile up during retries and until a clean run, they will be collected with
56
+ # a limit of last 100. We do not store more because a consumer with an endless error loop
57
+ # would cause memory leaks without such a limit.
58
+ def errors_tracker
59
+ coordinator.errors_tracker
60
+ end
45
61
  end
46
62
  end
47
63
  end
@@ -114,10 +114,15 @@ module Karafka
114
114
  once(:quiet!) { active_listeners.each(&:quiet!) }
115
115
 
116
116
  # If we are in the process of moving to quiet state, we need to check it.
117
- if Karafka::App.quieting? && active_listeners.all?(&:quiet?)
117
+ if Karafka::App.quieting?
118
+ # If we are quieting but not all active listeners are quiet we need to wait for all of
119
+ # them to reach the quiet state
120
+ return unless active_listeners.all?(&:quiet?)
121
+
118
122
  once(:quieted!) { Karafka::App.quieted! }
119
123
  end
120
124
 
125
+ # Do nothing if we moved to quiet state and want to be in it
121
126
  return if Karafka::App.quiet?
122
127
 
123
128
  # Since separate subscription groups are subscribed to different topics, there is no risk
@@ -21,13 +21,14 @@ module Karafka
21
21
 
22
22
  def_delegators :@collapser, :collapsed?, :collapse_until!
23
23
 
24
- attr_reader :filter, :virtual_offset_manager, :shared_mutex
24
+ attr_reader :filter, :virtual_offset_manager, :shared_mutex, :errors_tracker
25
25
 
26
26
  # @param args [Object] anything the base coordinator accepts
27
27
  def initialize(*args)
28
28
  super
29
29
 
30
30
  @executed = []
31
+ @errors_tracker = Coordinators::ErrorsTracker.new
31
32
  @flow_mutex = Mutex.new
32
33
  # Lock for user code synchronization
33
34
  # We do not want to mix coordinator lock with the user lock not to create cases where
@@ -36,11 +37,11 @@ module Karafka
36
37
  # framework and can be used for user-facing locking
37
38
  @shared_mutex = Mutex.new
38
39
  @collapser = Collapser.new
39
- @filter = FiltersApplier.new(self)
40
+ @filter = Coordinators::FiltersApplier.new(self)
40
41
 
41
42
  return unless topic.virtual_partitions?
42
43
 
43
- @virtual_offset_manager = VirtualOffsetManager.new(
44
+ @virtual_offset_manager = Coordinators::VirtualOffsetManager.new(
44
45
  topic.name,
45
46
  partition,
46
47
  topic.virtual_partitions.offset_metadata_strategy
@@ -64,6 +65,14 @@ module Karafka
64
65
 
65
66
  @filter.apply!(messages)
66
67
 
68
+ # Do not clear coordinator errors storage when we are retrying, so we can reference the
69
+ # errors that have happened during recovery. This can be useful for implementing custom
70
+ # flows. There can be more errors than one when running with virtual partitions so we
71
+ # need to make sure we collect them all. Under collapse when we reference a given
72
+ # consumer we should be able to get all the errors and not just first/last.
73
+ #
74
+ # @note We use zero as the attempt mark because we are not "yet" in the attempt 1
75
+ @errors_tracker.clear if attempt.zero?
67
76
  @executed.clear
68
77
 
69
78
  # We keep the old processed offsets until the collapsing is done and regular processing
@@ -79,6 +88,7 @@ module Karafka
79
88
  # @param error [StandardError] error from the failure
80
89
  def failure!(consumer, error)
81
90
  super
91
+ @errors_tracker << error
82
92
  collapse_until!(@last_message.offset + 1)
83
93
  end
84
94
 
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for Pro coordinator related sub-components
18
+ module Coordinators
19
+ # Object used to track errors in between executions to be able to build error-type based
20
+ # recovery flows.
21
+ class ErrorsTracker
22
+ include Enumerable
23
+
24
+ # Max errors we keep in memory.
25
+ # We do not want to keep more because for DLQ-less this would cause memory-leaks.
26
+ STORAGE_LIMIT = 100
27
+
28
+ private_constant :STORAGE_LIMIT
29
+
30
+ def initialize
31
+ @errors = []
32
+ end
33
+
34
+ # Clears all the errors
35
+ def clear
36
+ @errors.clear
37
+ end
38
+
39
+ # @param error [StandardError] adds the error to the tracker
40
+ def <<(error)
41
+ @errors.shift if @errors.size >= STORAGE_LIMIT
42
+ @errors << error
43
+ end
44
+
45
+ # @return [Boolean] is the error tracker empty
46
+ def empty?
47
+ @errors.empty?
48
+ end
49
+
50
+ # @return [Integer] number of elements
51
+ def size
52
+ count
53
+ end
54
+
55
+ # @return [StandardError, nil] last error that occurred or nil if no errors
56
+ def last
57
+ @errors.last
58
+ end
59
+
60
+ # Iterates over errors
61
+ # @param block [Proc] code we want to run on each error
62
+ def each(&block)
63
+ @errors.each(&block)
64
+ end
65
+
66
+ # @return [Array<StandardError>] array with all the errors that occurred
67
+ def all
68
+ @errors
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Coordinators
18
+ # Applier for all filters we want to have. Whether related to limiting messages based
19
+ # on the payload or any other things.
20
+ #
21
+ # From the outside world perspective, this encapsulates all the filters.
22
+ # This means that this is the API we expose as a single filter, allowing us to control
23
+ # the filtering via many filters easily.
24
+ class FiltersApplier
25
+ # @return [Array] registered filters array. Useful if we want to inject internal context
26
+ # aware filters.
27
+ attr_reader :filters
28
+
29
+ # @param coordinator [Pro::Coordinator] pro coordinator
30
+ def initialize(coordinator)
31
+ # Builds filters out of their factories
32
+ # We build it that way (providing topic and partition) because there may be a case
33
+ # where someone wants to have a specific logic that is per topic or partition. Like for
34
+ # example a case where there is a cache bypassing revocations for topic partition.
35
+ #
36
+ # We provide full Karafka routing topic here and not the name only, in case the filter
37
+ # would be customized based on other topic settings (like VPs, etc)
38
+ #
39
+ # This setup allows for biggest flexibility also because topic object holds the
40
+ # reference to the subscription group and consumer group
41
+ @filters = coordinator.topic.filtering.factories.map do |factory|
42
+ factory.call(coordinator.topic, coordinator.partition)
43
+ end
44
+ end
45
+
46
+ # @param messages [Array<Karafka::Messages::Message>] array with messages from the
47
+ # partition
48
+ def apply!(messages)
49
+ return unless active?
50
+
51
+ @filters.each { |filter| filter.apply!(messages) }
52
+ end
53
+
54
+ # @return [Boolean] did we filter out any messages during filtering run
55
+ def applied?
56
+ return false unless active?
57
+
58
+ !applied.empty?
59
+ end
60
+
61
+ # @return [Symbol] consumer post-filtering action that should be taken
62
+ def action
63
+ return :skip unless applied?
64
+
65
+ # The highest priority is on a potential backoff from any of the filters because it is
66
+ # the less risky (delay and continue later)
67
+ return :pause if applied.any? { |filter| filter.action == :pause }
68
+
69
+ # If none of the filters wanted to pause, we can check for any that would want to seek
70
+ # and if there is any, we can go with this strategy
71
+ return :seek if applied.any? { |filter| filter.action == :seek }
72
+
73
+ :skip
74
+ end
75
+
76
+ # @return [Integer] minimum timeout we need to pause. This is the minimum for all the
77
+ # filters to satisfy all of them.
78
+ def timeout
79
+ applied.map(&:timeout).compact.min || 0
80
+ end
81
+
82
+ # The first message we do need to get next time we poll. We use the minimum not to jump
83
+ # accidentally by over any.
84
+ # @return [Karafka::Messages::Message, nil] cursor message or nil if none
85
+ # @note Cursor message can also return the offset in the time format
86
+ def cursor
87
+ return nil unless active?
88
+
89
+ applied.map(&:cursor).compact.min_by(&:offset)
90
+ end
91
+
92
+ private
93
+
94
+ # @return [Boolean] is filtering active
95
+ def active?
96
+ !@filters.empty?
97
+ end
98
+
99
+ # @return [Array<Object>] filters that applied any sort of messages limiting
100
+ def applied
101
+ @filters.select(&:applied?)
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,180 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Coordinators
18
+ # Manager that keeps track of our offsets with the virtualization layer that are local
19
+ # to given partition assignment. It allows for easier offset management for virtual
20
+ # virtual partition cases as it provides us ability to mark as consumed and move the
21
+ # real offset behind as expected.
22
+ #
23
+ # @note We still use the regular coordinator "real" offset management as we want to have
24
+ # them as separated as possible because the real seek offset management is also used for
25
+ # pausing, filtering and others and should not be impacted by the virtual one
26
+ #
27
+ # @note This manager is **not** thread-safe by itself. It should operate from coordinator
28
+ # locked locations.
29
+ class VirtualOffsetManager
30
+ attr_reader :groups
31
+
32
+ # @param topic [String]
33
+ # @param partition [Integer]
34
+ # @param offset_metadata_strategy [Symbol] what metadata should we select. That is,
35
+ # should we use the most recent or one picked from the offset that is going to be
36
+ # committed
37
+ #
38
+ # @note We need topic and partition because we use a seek message (virtual) for real
39
+ # offset management. We could keep real message reference but this can be memory
40
+ # consuming and not worth it.
41
+ def initialize(topic, partition, offset_metadata_strategy)
42
+ @topic = topic
43
+ @partition = partition
44
+ @groups = []
45
+ @marked = {}
46
+ @offsets_metadata = {}
47
+ @real_offset = -1
48
+ @offset_metadata_strategy = offset_metadata_strategy
49
+ @current_offset_metadata = nil
50
+ end
51
+
52
+ # Clears the manager for a next collective operation
53
+ def clear
54
+ @groups.clear
55
+ @offsets_metadata.clear
56
+ @current_offset_metadata = nil
57
+ @marked.clear
58
+ @real_offset = -1
59
+ end
60
+
61
+ # Registers an offset group coming from one virtual consumer. In order to move the real
62
+ # underlying offset accordingly, we need to make sure to track the virtual consumers
63
+ # offsets groups independently and only materialize the end result.
64
+ #
65
+ # @param offsets_group [Array<Integer>] offsets from one virtual consumer
66
+ def register(offsets_group)
67
+ @groups << offsets_group
68
+
69
+ offsets_group.each { |offset| @marked[offset] = false }
70
+ end
71
+
72
+ # Marks given message as marked (virtually consumed).
73
+ # We mark given message offset and other earlier offsets from the same group as done
74
+ # and we can refresh our real offset representation based on that as it might have
75
+ # changed to a newer real offset.
76
+ # @param message [Karafka::Messages::Message] message coming from VP we want to mark
77
+ # @param offset_metadata [String, nil] offset metadata. `nil` if none
78
+ def mark(message, offset_metadata)
79
+ offset = message.offset
80
+
81
+ # Store metadata when we materialize the most stable offset
82
+ @offsets_metadata[offset] = offset_metadata
83
+ @current_offset_metadata = offset_metadata
84
+
85
+ group = @groups.find { |reg_group| reg_group.include?(offset) }
86
+
87
+ # This case can happen when someone uses MoM and wants to mark message from a previous
88
+ # batch as consumed. We can add it, since the real offset refresh will point to it
89
+ unless group
90
+ group = [offset]
91
+ @groups << group
92
+ end
93
+
94
+ position = group.index(offset)
95
+
96
+ # Mark all previous messages from the same group also as virtually consumed
97
+ group[0..position].each do |markable_offset|
98
+ # Set previous messages metadata offset as the offset of higher one for overwrites
99
+ # unless a different metadata were set explicitely
100
+ @offsets_metadata[markable_offset] ||= offset_metadata
101
+ @marked[markable_offset] = true
102
+ end
103
+
104
+ # Recompute the real offset representation
105
+ materialize_real_offset
106
+ end
107
+
108
+ # Mark all from all groups including the `message`.
109
+ # Useful when operating in a collapsed state for marking
110
+ # @param message [Karafka::Messages::Message]
111
+ # @param offset_metadata [String, nil]
112
+ def mark_until(message, offset_metadata)
113
+ mark(message, offset_metadata)
114
+
115
+ @groups.each do |group|
116
+ group.each do |offset|
117
+ next if offset > message.offset
118
+
119
+ @offsets_metadata[offset] = offset_metadata
120
+ @marked[offset] = true
121
+ end
122
+ end
123
+
124
+ materialize_real_offset
125
+ end
126
+
127
+ # @return [Array<Integer>] Offsets of messages already marked as consumed virtually
128
+ def marked
129
+ @marked.select { |_, status| status }.map(&:first).sort
130
+ end
131
+
132
+ # Is there a real offset we can mark as consumed
133
+ # @return [Boolean]
134
+ def markable?
135
+ !@real_offset.negative?
136
+ end
137
+
138
+ # @return [Array<Messages::Seek, String>] markable message for real offset marking and
139
+ # its associated metadata
140
+ def markable
141
+ raise Errors::InvalidRealOffsetUsageError unless markable?
142
+
143
+ offset_metadata = case @offset_metadata_strategy
144
+ when :exact
145
+ @offsets_metadata.fetch(@real_offset)
146
+ when :current
147
+ @current_offset_metadata
148
+ else
149
+ raise Errors::UnsupportedCaseError, @offset_metadata_strategy
150
+ end
151
+
152
+ [
153
+ Messages::Seek.new(
154
+ @topic,
155
+ @partition,
156
+ @real_offset
157
+ ),
158
+ offset_metadata
159
+ ]
160
+ end
161
+
162
+ private
163
+
164
+ # Recomputes the biggest possible real offset we can have.
165
+ # It picks the the biggest offset that has uninterrupted stream of virtually marked as
166
+ # consumed because this will be the collective offset.
167
+ def materialize_real_offset
168
+ @marked.to_a.sort_by(&:first).each do |offset, marked|
169
+ break unless marked
170
+
171
+ @real_offset = offset
172
+ end
173
+
174
+ @real_offset = (@marked.keys.min - 1) if @real_offset.negative?
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end