karafka 2.2.13 → 2.3.0.alpha1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +38 -12
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +161 -125
  6. data/Gemfile.lock +12 -12
  7. data/README.md +0 -2
  8. data/SECURITY.md +23 -0
  9. data/config/locales/errors.yml +7 -1
  10. data/config/locales/pro_errors.yml +22 -0
  11. data/docker-compose.yml +3 -1
  12. data/karafka.gemspec +2 -2
  13. data/lib/karafka/admin/acl.rb +287 -0
  14. data/lib/karafka/admin.rb +118 -16
  15. data/lib/karafka/app.rb +12 -3
  16. data/lib/karafka/base_consumer.rb +32 -31
  17. data/lib/karafka/cli/base.rb +1 -1
  18. data/lib/karafka/connection/client.rb +94 -84
  19. data/lib/karafka/connection/conductor.rb +28 -0
  20. data/lib/karafka/connection/listener.rb +165 -46
  21. data/lib/karafka/connection/listeners_batch.rb +5 -11
  22. data/lib/karafka/connection/manager.rb +72 -0
  23. data/lib/karafka/connection/messages_buffer.rb +12 -0
  24. data/lib/karafka/connection/proxy.rb +17 -0
  25. data/lib/karafka/connection/status.rb +75 -0
  26. data/lib/karafka/contracts/config.rb +14 -10
  27. data/lib/karafka/contracts/consumer_group.rb +9 -1
  28. data/lib/karafka/contracts/topic.rb +3 -1
  29. data/lib/karafka/errors.rb +13 -0
  30. data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
  31. data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
  32. data/lib/karafka/instrumentation/logger_listener.rb +3 -9
  33. data/lib/karafka/instrumentation/notifications.rb +19 -9
  34. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
  35. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
  36. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
  37. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
  38. data/lib/karafka/pro/base_consumer.rb +47 -0
  39. data/lib/karafka/pro/connection/manager.rb +300 -0
  40. data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
  41. data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
  42. data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
  43. data/lib/karafka/pro/iterator.rb +1 -6
  44. data/lib/karafka/pro/loader.rb +16 -2
  45. data/lib/karafka/pro/processing/coordinator.rb +2 -1
  46. data/lib/karafka/pro/processing/executor.rb +37 -0
  47. data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
  48. data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
  49. data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
  50. data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
  51. data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
  52. data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
  53. data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
  54. data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
  55. data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
  56. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  57. data/lib/karafka/pro/processing/strategies/default.rb +136 -3
  58. data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
  59. data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
  60. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  61. data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
  62. data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
  63. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
  64. data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
  65. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
  66. data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
  67. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
  68. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
  69. data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
  70. data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
  71. data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
  72. data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
  73. data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
  74. data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
  75. data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
  76. data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
  77. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
  78. data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
  79. data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
  80. data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
  81. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
  82. data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
  83. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
  84. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  85. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
  86. data/lib/karafka/process.rb +5 -3
  87. data/lib/karafka/processing/coordinator.rb +5 -1
  88. data/lib/karafka/processing/executor.rb +43 -13
  89. data/lib/karafka/processing/executors_buffer.rb +22 -7
  90. data/lib/karafka/processing/jobs/base.rb +19 -2
  91. data/lib/karafka/processing/jobs/consume.rb +3 -3
  92. data/lib/karafka/processing/jobs/idle.rb +5 -0
  93. data/lib/karafka/processing/jobs/revoked.rb +5 -0
  94. data/lib/karafka/processing/jobs/shutdown.rb +5 -0
  95. data/lib/karafka/processing/jobs_queue.rb +19 -8
  96. data/lib/karafka/processing/schedulers/default.rb +42 -0
  97. data/lib/karafka/processing/strategies/base.rb +13 -4
  98. data/lib/karafka/processing/strategies/default.rb +23 -7
  99. data/lib/karafka/processing/strategies/dlq.rb +36 -0
  100. data/lib/karafka/processing/worker.rb +4 -1
  101. data/lib/karafka/routing/builder.rb +12 -2
  102. data/lib/karafka/routing/consumer_group.rb +5 -5
  103. data/lib/karafka/routing/features/base.rb +44 -8
  104. data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
  105. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  106. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
  107. data/lib/karafka/routing/proxy.rb +4 -3
  108. data/lib/karafka/routing/subscription_group.rb +2 -2
  109. data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
  110. data/lib/karafka/routing/topic.rb +8 -10
  111. data/lib/karafka/routing/topics.rb +1 -1
  112. data/lib/karafka/runner.rb +13 -3
  113. data/lib/karafka/server.rb +5 -9
  114. data/lib/karafka/setup/config.rb +21 -1
  115. data/lib/karafka/status.rb +23 -14
  116. data/lib/karafka/templates/karafka.rb.erb +7 -0
  117. data/lib/karafka/time_trackers/partition_usage.rb +56 -0
  118. data/lib/karafka/version.rb +1 -1
  119. data.tar.gz.sig +0 -0
  120. metadata +47 -13
  121. metadata.gz.sig +0 -0
  122. data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
  123. data/lib/karafka/pro/performance_tracker.rb +0 -84
  124. data/lib/karafka/pro/processing/scheduler.rb +0 -74
  125. data/lib/karafka/processing/scheduler.rb +0 -38
@@ -11,6 +11,15 @@ module Karafka
11
11
  #
12
12
  # @note Executors are not removed after partition is revoked. They are not that big and will
13
13
  # be re-used in case of a re-claim
14
+ #
15
+ # @note Since given consumer can run various operations, executor manages that and its
16
+ # lifecycle. There are following types of operations with appropriate before/after, etc:
17
+ #
18
+ # - consume - primary operation related to running user consumption code
19
+ # - idle - cleanup job that runs on idle runs where no messages would be passed to the end
20
+ # user. This is used for complex flows with filters, etc
21
+ # - revoked - runs after the partition was revoked
22
+ # - shutdown - runs when process is going to shutdown
14
23
  class Executor
15
24
  extend Forwardable
16
25
 
@@ -39,11 +48,11 @@ module Karafka
39
48
  end
40
49
 
41
50
  # Allows us to prepare the consumer in the listener thread prior to the job being send to
42
- # the queue. It also allows to run some code that is time sensitive and cannot wait in the
51
+ # be scheduled. It also allows to run some code that is time sensitive and cannot wait in the
43
52
  # queue as it could cause starvation.
44
53
  #
45
54
  # @param messages [Array<Karafka::Messages::Message>]
46
- def before_enqueue(messages)
55
+ def before_schedule_consume(messages)
47
56
  # Recreate consumer with each batch if persistence is not enabled
48
57
  # We reload the consumers with each batch instead of relying on some external signals
49
58
  # when needed for consistency. That way devs may have it on or off and not in this
@@ -60,7 +69,7 @@ module Karafka
60
69
  Time.now
61
70
  )
62
71
 
63
- consumer.on_before_enqueue
72
+ consumer.on_before_schedule_consume
64
73
  end
65
74
 
66
75
  # Runs setup and warm-up code in the worker prior to running the consumption
@@ -79,23 +88,23 @@ module Karafka
79
88
  consumer.on_after_consume
80
89
  end
81
90
 
91
+ # Runs the code needed before idle work is scheduled
92
+ def before_schedule_idle
93
+ consumer.on_before_schedule_idle
94
+ end
95
+
82
96
  # Runs consumer idle operations
83
97
  # This may include house-keeping or other state management changes that can occur but that
84
98
  # not mean there are any new messages available for the end user to process
85
99
  def idle
86
- # Initializes the messages set in case idle operation would happen before any processing
87
- # This prevents us from having no messages object at all as the messages object and
88
- # its metadata may be used for statistics
89
- consumer.messages ||= Messages::Builders::Messages.call(
90
- [],
91
- topic,
92
- partition,
93
- Time.now
94
- )
95
-
96
100
  consumer.on_idle
97
101
  end
98
102
 
103
+ # Runs code needed before revoked job is scheduled
104
+ def before_schedule_revoked
105
+ consumer.on_before_schedule_revoked if @consumer
106
+ end
107
+
99
108
  # Runs the controller `#revoked` method that should be triggered when a given consumer is
100
109
  # no longer needed due to partitions reassignment.
101
110
  #
@@ -112,6 +121,11 @@ module Karafka
112
121
  consumer.on_revoked if @consumer
113
122
  end
114
123
 
124
+ # Runs code needed before shutdown job is scheduled
125
+ def before_schedule_shutdown
126
+ consumer.on_before_schedule_shutdown if @consumer
127
+ end
128
+
115
129
  # Runs the controller `#shutdown` method that should be triggered when a given consumer is
116
130
  # no longer needed as we're closing the process.
117
131
  #
@@ -146,10 +160,26 @@ module Karafka
146
160
  consumer.client = @client
147
161
  consumer.producer = ::Karafka::App.producer
148
162
  consumer.coordinator = @coordinator
163
+ # Since we have some message-less flows (idle, etc), we initialize consumer with empty
164
+ # messages set. In production we have persistent consumers, so this is not a performance
165
+ # overhead as this will happen only once per consumer lifetime
166
+ consumer.messages = empty_messages
149
167
 
150
168
  consumer
151
169
  end
152
170
  end
171
+
172
+ # Initializes the messages set in case given operation would happen before any processing
173
+ # This prevents us from having no messages object at all as the messages object and
174
+ # its metadata may be used for statistics
175
+ def empty_messages
176
+ Messages::Builders::Messages.call(
177
+ [],
178
+ topic,
179
+ partition,
180
+ Time.now
181
+ )
182
+ end
153
183
  end
154
184
  end
155
185
  end
@@ -13,6 +13,7 @@ module Karafka
13
13
  @client = client
14
14
  # We need two layers here to keep track of topics, partitions and processing groups
15
15
  @buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
16
+ @executor_class = Karafka::App.config.internal.processing.executor_class
16
17
  end
17
18
 
18
19
  # Finds or creates an executor based on the provided details
@@ -21,15 +22,28 @@ module Karafka
21
22
  # @param partition [Integer] partition number
22
23
  # @param parallel_key [String] parallel group key
23
24
  # @param coordinator [Karafka::Processing::Coordinator]
24
- # @return [Executor] consumer executor
25
+ # @return [Executor, Pro::Processing::Executor] consumer executor
25
26
  def find_or_create(topic, partition, parallel_key, coordinator)
26
- @buffer[topic][partition][parallel_key] ||= Executor.new(
27
+ @buffer[topic][partition][parallel_key] ||= @executor_class.new(
27
28
  @subscription_group.id,
28
29
  @client,
29
30
  coordinator
30
31
  )
31
32
  end
32
33
 
34
+ # Finds all existing executors for given topic partition or creates one for it
35
+ # @param topic [String] topic name
36
+ # @param partition [Integer] partition number
37
+ # @param coordinator [Karafka::Processing::Coordinator]
38
+ # @return [Array<Executor, Pro::Processing::Executor>]
39
+ def find_all_or_create(topic, partition, coordinator)
40
+ existing = find_all(topic, partition)
41
+
42
+ return existing unless existing.empty?
43
+
44
+ [find_or_create(topic, partition, 0, coordinator)]
45
+ end
46
+
33
47
  # Revokes executors of a given topic partition, so they won't be used anymore for incoming
34
48
  # messages
35
49
  #
@@ -43,7 +57,8 @@ module Karafka
43
57
  #
44
58
  # @param topic [String] topic name
45
59
  # @param partition [Integer] partition number
46
- # @return [Array<Executor>] executors in use for this topic + partition
60
+ # @return [Array<Executor, Pro::Processing::Executor>] executors in use for this
61
+ # topic + partition
47
62
  def find_all(topic, partition)
48
63
  @buffer[topic][partition].values
49
64
  end
@@ -52,11 +67,11 @@ module Karafka
52
67
  # info
53
68
  # @yieldparam [Routing::Topic] karafka routing topic object
54
69
  # @yieldparam [Integer] partition number
55
- # @yieldparam [Executor] given executor
70
+ # @yieldparam [Executor, Pro::Processing::Executor] given executor
56
71
  def each
57
- @buffer.each do |_, partitions|
58
- partitions.each do |_, executors|
59
- executors.each do |_, executor|
72
+ @buffer.each_value do |partitions|
73
+ partitions.each_value do |executors|
74
+ executors.each_value do |executor|
60
75
  yield(executor)
61
76
  end
62
77
  end
@@ -20,11 +20,14 @@ module Karafka
20
20
  # All jobs are blocking by default and they can release the lock when blocking operations
21
21
  # are done (if needed)
22
22
  @non_blocking = false
23
+ @status = :pending
23
24
  end
24
25
 
25
- # When redefined can run any code prior to the job being enqueued
26
+ # When redefined can run any code prior to the job being scheduled
26
27
  # @note This will run in the listener thread and not in the worker
27
- def before_enqueue; end
28
+ def before_schedule
29
+ raise NotImplementedError, 'Please implement in a subclass'
30
+ end
28
31
 
29
32
  # When redefined can run any code that should run before executing the proper code
30
33
  def before_call; end
@@ -49,6 +52,20 @@ module Karafka
49
52
  def non_blocking?
50
53
  @non_blocking
51
54
  end
55
+
56
+ # @return [Boolean] was this job finished.
57
+ def finished?
58
+ @status == :finished
59
+ end
60
+
61
+ # Marks the job as finished. Used by the worker to indicate, that this job is done.
62
+ #
63
+ # @note Since the scheduler knows exactly when it schedules jobs and when it keeps them
64
+ # pending, we do not need advanced state tracking and the only information from the
65
+ # "outside" is whether it was finished or not after it was scheduled for execution.
66
+ def finish!
67
+ @status = :finished
68
+ end
52
69
  end
53
70
  end
54
71
  end
@@ -20,9 +20,9 @@ module Karafka
20
20
  end
21
21
 
22
22
  # Runs all the preparation code on the executor that needs to happen before the job is
23
- # enqueued.
24
- def before_enqueue
25
- executor.before_enqueue(@messages)
23
+ # scheduled.
24
+ def before_schedule
25
+ executor.before_schedule_consume(@messages)
26
26
  end
27
27
 
28
28
  # Runs the before consumption preparations on the executor
@@ -14,6 +14,11 @@ module Karafka
14
14
  super()
15
15
  end
16
16
 
17
+ # Runs code prior to scheduling this idle job
18
+ def before_schedule
19
+ executor.before_schedule_idle
20
+ end
21
+
17
22
  # Run the idle work via the executor
18
23
  def call
19
24
  executor.idle
@@ -12,6 +12,11 @@ module Karafka
12
12
  super()
13
13
  end
14
14
 
15
+ # Runs code prior to scheduling this revoked job
16
+ def before_schedule
17
+ executor.before_schedule_revoked
18
+ end
19
+
15
20
  # Runs the revoking job via an executor.
16
21
  def call
17
22
  executor.revoked
@@ -13,6 +13,11 @@ module Karafka
13
13
  super()
14
14
  end
15
15
 
16
+ # Runs code prior to scheduling this shutdown job
17
+ def before_schedule
18
+ executor.before_schedule_shutdown
19
+ end
20
+
16
21
  # Runs the shutdown job via an executor.
17
22
  def call
18
23
  executor.shutdown
@@ -23,12 +23,7 @@ module Karafka
23
23
  # scheduled by Ruby hundreds of thousands of times per group.
24
24
  # We cannot use a single semaphore as it could potentially block in listeners that should
25
25
  # process with their data and also could unlock when a given group needs to remain locked
26
- @semaphores = Concurrent::Map.new do |h, k|
27
- # Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
28
- # versions we use our custom queue wrapper
29
- h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
30
- end
31
-
26
+ @semaphores = {}
32
27
  @concurrency = Karafka::App.config.concurrency
33
28
  @tick_interval = ::Karafka::App.config.internal.tick_interval
34
29
  @in_processing = Hash.new { |h, k| h[k] = [] }
@@ -37,6 +32,22 @@ module Karafka
37
32
  @mutex = Mutex.new
38
33
  end
39
34
 
35
+ # Registers given subscription group id in the queue. It is needed so we do not dynamically
36
+ # create semaphore, hence avoiding potential race conditions
37
+ #
38
+ # @param group_id [String]
39
+ def register(group_id)
40
+ # Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
41
+ @mutex.synchronize do
42
+ # versions we use our custom queue wrapper
43
+ #
44
+ # Initializes this semaphore from the mutex, so it is never auto-created
45
+ # Since we always schedule a job before waiting using semaphores, there won't be any
46
+ # concurrency problems
47
+ @semaphores[group_id] = RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new
48
+ end
49
+ end
50
+
40
51
  # Adds the job to the internal main queue, scheduling it for execution in a worker and marks
41
52
  # this job as in processing pipeline.
42
53
  #
@@ -79,7 +90,7 @@ module Karafka
79
90
  # @param group_id [String] id of the group we want to unlock for one tick
80
91
  # @note This does not release the wait lock. It just causes a conditions recheck
81
92
  def tick(group_id)
82
- @semaphores[group_id] << true
93
+ @semaphores.fetch(group_id) << true
83
94
  end
84
95
 
85
96
  # Marks a given job from a given group as completed. When there are no more jobs from a given
@@ -149,7 +160,7 @@ module Karafka
149
160
  while wait?(group_id)
150
161
  yield if block_given?
151
162
 
152
- @semaphores[group_id].pop(timeout: @tick_interval / 1_000.0)
163
+ @semaphores.fetch(group_id).pop(timeout: @tick_interval / 1_000.0)
153
164
  end
154
165
  end
155
166
 
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Namespace for Karafka OSS schedulers
6
+ module Schedulers
7
+ # FIFO scheduler for messages coming from various topics and partitions
8
+ class Default
9
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
10
+ def initialize(queue)
11
+ @queue = queue
12
+ end
13
+
14
+ # Schedules jobs in the fifo order
15
+ #
16
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Consume>] jobs we want to schedule
17
+ def on_schedule_consumption(jobs_array)
18
+ jobs_array.each do |job|
19
+ @queue << job
20
+ end
21
+ end
22
+
23
+ # Revocation, shutdown and idle jobs can also run in fifo by default
24
+ alias on_schedule_revocation on_schedule_consumption
25
+ alias on_schedule_shutdown on_schedule_consumption
26
+ alias on_schedule_idle on_schedule_consumption
27
+
28
+ # This scheduler does not have anything to manage as it is a pass through and has no state
29
+ def on_manage
30
+ nil
31
+ end
32
+
33
+ # This scheduler does not need to be cleared because it is stateless
34
+ #
35
+ # @param _group_id [String] Subscription group id
36
+ def on_clear(_group_id)
37
+ nil
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -11,10 +11,19 @@ module Karafka
11
11
  module Strategies
12
12
  # Base strategy that should be included in each strategy, just to ensure the API
13
13
  module Base
14
- # What should happen before jobs are enqueued
15
- # @note This runs from the listener thread, not recommended to put anything slow here
16
- def handle_before_enqueue
17
- raise NotImplementedError, 'Implement in a subclass'
14
+ # Defines all the before schedule handlers for appropriate actions
15
+ %i[
16
+ consume
17
+ idle
18
+ revoked
19
+ shutdown
20
+ ].each do |action|
21
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
22
+ def handle_before_schedule_#{action}
23
+ # What should happen before scheduling this work
24
+ raise NotImplementedError, 'Implement in a subclass'
25
+ end
26
+ RUBY
18
27
  end
19
28
 
20
29
  # What should happen before we kick in the processing
@@ -13,6 +13,23 @@ module Karafka
13
13
  # Apply strategy for a non-feature based flow
14
14
  FEATURES = %i[].freeze
15
15
 
16
+ # By default on all "before schedule" we just run instrumentation, nothing more
17
+ %i[
18
+ consume
19
+ idle
20
+ revoked
21
+ shutdown
22
+ ].each do |action|
23
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
24
+ # No actions needed for the standard flow here
25
+ def handle_before_schedule_#{action}
26
+ Karafka.monitor.instrument('consumer.before_schedule_#{action}', caller: self)
27
+
28
+ nil
29
+ end
30
+ RUBY
31
+ end
32
+
16
33
  # Marks message as consumed in an async way.
17
34
  #
18
35
  # @param message [Messages::Message] last successfully processed message.
@@ -24,6 +41,9 @@ module Karafka
24
41
  # already processed but rather at the next one. This applies to both sync and async
25
42
  # versions of this method.
26
43
  def mark_as_consumed(message)
44
+ # seek offset can be nil only in case `#seek` was invoked with offset reset request
45
+ # In case like this we ignore marking
46
+ return true if coordinator.seek_offset.nil?
27
47
  # Ignore earlier offsets than the one we already committed
28
48
  return true if coordinator.seek_offset > message.offset
29
49
  return false if revoked?
@@ -40,6 +60,9 @@ module Karafka
40
60
  # @return [Boolean] true if we were able to mark the offset, false otherwise.
41
61
  # False indicates that we were not able and that we have lost the partition.
42
62
  def mark_as_consumed!(message)
63
+ # seek offset can be nil only in case `#seek` was invoked with offset reset request
64
+ # In case like this we ignore marking
65
+ return true if coordinator.seek_offset.nil?
43
66
  # Ignore earlier offsets than the one we already committed
44
67
  return true if coordinator.seek_offset > message.offset
45
68
  return false if revoked?
@@ -76,13 +99,6 @@ module Karafka
76
99
  commit_offsets(async: false)
77
100
  end
78
101
 
79
- # No actions needed for the standard flow here
80
- def handle_before_enqueue
81
- Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
82
-
83
- nil
84
- end
85
-
86
102
  # Increment number of attempts
87
103
  def handle_before_consume
88
104
  coordinator.pause_tracker.increment
@@ -14,6 +14,42 @@ module Karafka
14
14
  dead_letter_queue
15
15
  ].freeze
16
16
 
17
+ # Override of the standard `#mark_as_consumed` in order to handle the pause tracker
18
+ # reset in case DLQ is marked as fully independent. When DLQ is marked independent,
19
+ # any offset marking causes the pause count tracker to reset. This is useful when
20
+ # the error is not due to the collective batch operations state but due to intermediate
21
+ # "crawling" errors that move with it
22
+ #
23
+ # @see `Strategies::Default#mark_as_consumed` for more details
24
+ # @param message [Messages::Message]
25
+ def mark_as_consumed(message)
26
+ # If we are not retrying pause count is already 0, no need to try to reset the state
27
+ return super unless retrying?
28
+ # If we do not use independent marking on DLQ, we just mark as consumed
29
+ return super unless topic.dead_letter_queue.independent?
30
+ # If we were not able to mark no need to reset
31
+ return false unless super
32
+
33
+ coordinator.pause_tracker.reset
34
+
35
+ true
36
+ end
37
+
38
+ # Override of the standard `#mark_as_consumed!`. Resets the pause tracker count in case
39
+ # DLQ was configured with the `independent` flag.
40
+ #
41
+ # @see `Strategies::Default#mark_as_consumed!` for more details
42
+ # @param message [Messages::Message]
43
+ def mark_as_consumed!(message)
44
+ return super unless retrying?
45
+ return super unless topic.dead_letter_queue.independent?
46
+ return false unless super
47
+
48
+ coordinator.pause_tracker.reset
49
+
50
+ true
51
+ end
52
+
17
53
  # When manual offset management is on, we do not mark anything as consumed automatically
18
54
  # and we rely on the user to figure things out
19
55
  def handle_after_consume
@@ -83,7 +83,10 @@ module Karafka
83
83
  )
84
84
  ensure
85
85
  # job can be nil when the queue is being closed
86
- @jobs_queue.complete(job) if job
86
+ if job
87
+ @jobs_queue.complete(job)
88
+ job.finish!
89
+ end
87
90
 
88
91
  # Always publish info, that we completed all the work despite its result
89
92
  Karafka.monitor.instrument('worker.completed', instrument_details)
@@ -109,10 +109,20 @@ module Karafka
109
109
  # subscription group customization
110
110
  # @param subscription_group_name [String, Symbol] subscription group id. When not provided,
111
111
  # a random uuid will be used
112
+ # @param args [Array] any extra arguments accepted by the subscription group builder
112
113
  # @param block [Proc] further topics definitions
113
- def subscription_group(subscription_group_name = SubscriptionGroup.id, &block)
114
+ def subscription_group(
115
+ subscription_group_name = SubscriptionGroup.id,
116
+ **args,
117
+ &block
118
+ )
114
119
  consumer_group('app') do
115
- target.public_send(:subscription_group=, subscription_group_name.to_s, &block)
120
+ target.public_send(
121
+ :subscription_group=,
122
+ subscription_group_name.to_s,
123
+ **args,
124
+ &block
125
+ )
116
126
  end
117
127
  end
118
128
 
@@ -14,7 +14,7 @@ module Karafka
14
14
  # It allows us to store the "current" subscription group defined in the routing
15
15
  # This subscription group id is then injected into topics, so we can compute the subscription
16
16
  # groups
17
- attr_accessor :current_subscription_group_name
17
+ attr_accessor :current_subscription_group_details
18
18
 
19
19
  # @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
20
20
  # yet have an application client_id namespace, this will be added here by default.
@@ -26,7 +26,7 @@ module Karafka
26
26
  @topics = Topics.new([])
27
27
  # Initialize the subscription group so there's always a value for it, since even if not
28
28
  # defined directly, a subscription group will be created
29
- @current_subscription_group_name = SubscriptionGroup.id
29
+ @current_subscription_group_details = { name: SubscriptionGroup.id }
30
30
  end
31
31
 
32
32
  # @return [Boolean] true if this consumer group should be active in our current process
@@ -48,7 +48,7 @@ module Karafka
48
48
  built_topic = @topics.last
49
49
  # We overwrite it conditionally in case it was not set by the user inline in the topic
50
50
  # block definition
51
- built_topic.subscription_group_name ||= current_subscription_group_name
51
+ built_topic.subscription_group_details ||= current_subscription_group_details
52
52
  built_topic
53
53
  end
54
54
 
@@ -59,13 +59,13 @@ module Karafka
59
59
  def subscription_group=(name = SubscriptionGroup.id, &block)
60
60
  # We cast it here, so the routing supports symbol based but that's anyhow later on
61
61
  # validated as a string
62
- @current_subscription_group_name = name.to_s
62
+ @current_subscription_group_details = { name: name.to_s }
63
63
 
64
64
  Proxy.new(self, &block)
65
65
 
66
66
  # We need to reset the current subscription group after it is used, so it won't leak
67
67
  # outside to other topics that would be defined without a defined subscription group
68
- @current_subscription_group_name = SubscriptionGroup.id
68
+ @current_subscription_group_details = { name: SubscriptionGroup.id }
69
69
  end
70
70
 
71
71
  # @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on
@@ -13,17 +13,46 @@ module Karafka
13
13
  class << self
14
14
  # Extends topic and builder with given feature API
15
15
  def activate
16
- Topic.prepend(self::Topic) if const_defined?('Topic', false)
17
- Topics.prepend(self::Topics) if const_defined?('Topics', false)
18
- ConsumerGroup.prepend(self::ConsumerGroup) if const_defined?('ConsumerGroup', false)
19
- Proxy.prepend(self::Proxy) if const_defined?('Proxy', false)
20
- Builder.prepend(self::Builder) if const_defined?('Builder', false)
21
- Builder.prepend(Base::Expander.new(self)) if const_defined?('Contracts', false)
16
+ if const_defined?('Topic', false)
17
+ Topic.prepend(self::Topic)
18
+ end
19
+
20
+ if const_defined?('Topics', false)
21
+ Topics.prepend(self::Topics)
22
+ end
23
+
24
+ if const_defined?('ConsumerGroup', false)
25
+ ConsumerGroup.prepend(self::ConsumerGroup)
26
+ end
27
+
28
+ if const_defined?('Proxy', false)
29
+ Proxy.prepend(self::Proxy)
30
+ end
31
+
32
+ if const_defined?('Builder', false)
33
+ Builder.prepend(self::Builder)
34
+ end
35
+
36
+ if const_defined?('Contracts', false)
37
+ Builder.prepend(Base::Expander.new(self))
38
+ end
39
+
40
+ if const_defined?('SubscriptionGroup', false)
41
+ SubscriptionGroup.prepend(self::SubscriptionGroup)
42
+ end
43
+
44
+ if const_defined?('SubscriptionGroupsBuilder', false)
45
+ SubscriptionGroupsBuilder.prepend(self::SubscriptionGroupsBuilder)
46
+ end
22
47
  end
23
48
 
24
- # Loads all the features and activates them
49
+ # Loads all the features and activates them once
25
50
  def load_all
51
+ return if @loaded
52
+
26
53
  features.each(&:activate)
54
+
55
+ @loaded = true
27
56
  end
28
57
 
29
58
  # @param config [Karafka::Core::Configurable::Node] app config that we can alter with
@@ -41,11 +70,18 @@ module Karafka
41
70
 
42
71
  private
43
72
 
44
- # @return [Array<Class>] all available routing features
73
+ # @return [Array<Class>] all available routing features that are direct descendants of
74
+ # the features base.Approach with using `#superclass` prevents us from accidentally
75
+ # loading Pro components
45
76
  def features
46
77
  ObjectSpace
47
78
  .each_object(Class)
48
79
  .select { |klass| klass < self }
80
+ # Ensures, that Pro components are only loaded when we operate in Pro mode. Since
81
+ # outside of specs Zeitwerk does not require them at all, they will not be loaded
82
+ # anyhow, but for specs this needs to be done as RSpec requires all files to be
83
+ # present
84
+ .reject { |klass| Karafka.pro? ? false : klass.superclass != self }
49
85
  .sort_by(&:to_s)
50
86
  end
51
87
 
@@ -11,8 +11,13 @@ module Karafka
11
11
  :max_retries,
12
12
  # To what topic the skipped messages should be moved
13
13
  :topic,
14
+ # Should retries be handled collectively on a batch or independently per message
15
+ :independent,
14
16
  keyword_init: true
15
- ) { alias_method :active?, :active }
17
+ ) do
18
+ alias_method :active?, :active
19
+ alias_method :independent?, :independent
20
+ end
16
21
  end
17
22
  end
18
23
  end
@@ -18,6 +18,7 @@ module Karafka
18
18
 
19
19
  nested :dead_letter_queue do
20
20
  required(:active) { |val| [true, false].include?(val) }
21
+ required(:independent) { |val| [true, false].include?(val) }
21
22
  required(:max_retries) { |val| val.is_a?(Integer) && val >= 0 }
22
23
  end
23
24