karafka 2.2.13 → 2.3.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +38 -12
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +161 -125
  6. data/Gemfile.lock +12 -12
  7. data/README.md +0 -2
  8. data/SECURITY.md +23 -0
  9. data/config/locales/errors.yml +7 -1
  10. data/config/locales/pro_errors.yml +22 -0
  11. data/docker-compose.yml +3 -1
  12. data/karafka.gemspec +2 -2
  13. data/lib/karafka/admin/acl.rb +287 -0
  14. data/lib/karafka/admin.rb +118 -16
  15. data/lib/karafka/app.rb +12 -3
  16. data/lib/karafka/base_consumer.rb +32 -31
  17. data/lib/karafka/cli/base.rb +1 -1
  18. data/lib/karafka/connection/client.rb +94 -84
  19. data/lib/karafka/connection/conductor.rb +28 -0
  20. data/lib/karafka/connection/listener.rb +165 -46
  21. data/lib/karafka/connection/listeners_batch.rb +5 -11
  22. data/lib/karafka/connection/manager.rb +72 -0
  23. data/lib/karafka/connection/messages_buffer.rb +12 -0
  24. data/lib/karafka/connection/proxy.rb +17 -0
  25. data/lib/karafka/connection/status.rb +75 -0
  26. data/lib/karafka/contracts/config.rb +14 -10
  27. data/lib/karafka/contracts/consumer_group.rb +9 -1
  28. data/lib/karafka/contracts/topic.rb +3 -1
  29. data/lib/karafka/errors.rb +13 -0
  30. data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
  31. data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
  32. data/lib/karafka/instrumentation/logger_listener.rb +3 -9
  33. data/lib/karafka/instrumentation/notifications.rb +19 -9
  34. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
  35. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
  36. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
  37. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
  38. data/lib/karafka/pro/base_consumer.rb +47 -0
  39. data/lib/karafka/pro/connection/manager.rb +300 -0
  40. data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
  41. data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
  42. data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
  43. data/lib/karafka/pro/iterator.rb +1 -6
  44. data/lib/karafka/pro/loader.rb +16 -2
  45. data/lib/karafka/pro/processing/coordinator.rb +2 -1
  46. data/lib/karafka/pro/processing/executor.rb +37 -0
  47. data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
  48. data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
  49. data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
  50. data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
  51. data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
  52. data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
  53. data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
  54. data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
  55. data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
  56. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  57. data/lib/karafka/pro/processing/strategies/default.rb +136 -3
  58. data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
  59. data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
  60. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  61. data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
  62. data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
  63. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
  64. data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
  65. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
  66. data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
  67. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
  68. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
  69. data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
  70. data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
  71. data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
  72. data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
  73. data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
  74. data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
  75. data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
  76. data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
  77. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
  78. data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
  79. data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
  80. data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
  81. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
  82. data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
  83. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
  84. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  85. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
  86. data/lib/karafka/process.rb +5 -3
  87. data/lib/karafka/processing/coordinator.rb +5 -1
  88. data/lib/karafka/processing/executor.rb +43 -13
  89. data/lib/karafka/processing/executors_buffer.rb +22 -7
  90. data/lib/karafka/processing/jobs/base.rb +19 -2
  91. data/lib/karafka/processing/jobs/consume.rb +3 -3
  92. data/lib/karafka/processing/jobs/idle.rb +5 -0
  93. data/lib/karafka/processing/jobs/revoked.rb +5 -0
  94. data/lib/karafka/processing/jobs/shutdown.rb +5 -0
  95. data/lib/karafka/processing/jobs_queue.rb +19 -8
  96. data/lib/karafka/processing/schedulers/default.rb +42 -0
  97. data/lib/karafka/processing/strategies/base.rb +13 -4
  98. data/lib/karafka/processing/strategies/default.rb +23 -7
  99. data/lib/karafka/processing/strategies/dlq.rb +36 -0
  100. data/lib/karafka/processing/worker.rb +4 -1
  101. data/lib/karafka/routing/builder.rb +12 -2
  102. data/lib/karafka/routing/consumer_group.rb +5 -5
  103. data/lib/karafka/routing/features/base.rb +44 -8
  104. data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
  105. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  106. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
  107. data/lib/karafka/routing/proxy.rb +4 -3
  108. data/lib/karafka/routing/subscription_group.rb +2 -2
  109. data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
  110. data/lib/karafka/routing/topic.rb +8 -10
  111. data/lib/karafka/routing/topics.rb +1 -1
  112. data/lib/karafka/runner.rb +13 -3
  113. data/lib/karafka/server.rb +5 -9
  114. data/lib/karafka/setup/config.rb +21 -1
  115. data/lib/karafka/status.rb +23 -14
  116. data/lib/karafka/templates/karafka.rb.erb +7 -0
  117. data/lib/karafka/time_trackers/partition_usage.rb +56 -0
  118. data/lib/karafka/version.rb +1 -1
  119. data.tar.gz.sig +0 -0
  120. metadata +47 -13
  121. metadata.gz.sig +0 -0
  122. data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
  123. data/lib/karafka/pro/performance_tracker.rb +0 -84
  124. data/lib/karafka/pro/processing/scheduler.rb +0 -74
  125. data/lib/karafka/processing/scheduler.rb +0 -38
@@ -11,6 +11,15 @@ module Karafka
11
11
  #
12
12
  # @note Executors are not removed after partition is revoked. They are not that big and will
13
13
  # be re-used in case of a re-claim
14
+ #
15
+ # @note Since given consumer can run various operations, executor manages that and its
16
+ # lifecycle. There are following types of operations with appropriate before/after, etc:
17
+ #
18
+ # - consume - primary operation related to running user consumption code
19
+ # - idle - cleanup job that runs on idle runs where no messages would be passed to the end
20
+ # user. This is used for complex flows with filters, etc
21
+ # - revoked - runs after the partition was revoked
22
+ # - shutdown - runs when process is going to shutdown
14
23
  class Executor
15
24
  extend Forwardable
16
25
 
@@ -39,11 +48,11 @@ module Karafka
39
48
  end
40
49
 
41
50
  # Allows us to prepare the consumer in the listener thread prior to the job being send to
42
- # the queue. It also allows to run some code that is time sensitive and cannot wait in the
51
+ # be scheduled. It also allows to run some code that is time sensitive and cannot wait in the
43
52
  # queue as it could cause starvation.
44
53
  #
45
54
  # @param messages [Array<Karafka::Messages::Message>]
46
- def before_enqueue(messages)
55
+ def before_schedule_consume(messages)
47
56
  # Recreate consumer with each batch if persistence is not enabled
48
57
  # We reload the consumers with each batch instead of relying on some external signals
49
58
  # when needed for consistency. That way devs may have it on or off and not in this
@@ -60,7 +69,7 @@ module Karafka
60
69
  Time.now
61
70
  )
62
71
 
63
- consumer.on_before_enqueue
72
+ consumer.on_before_schedule_consume
64
73
  end
65
74
 
66
75
  # Runs setup and warm-up code in the worker prior to running the consumption
@@ -79,23 +88,23 @@ module Karafka
79
88
  consumer.on_after_consume
80
89
  end
81
90
 
91
+ # Runs the code needed before idle work is scheduled
92
+ def before_schedule_idle
93
+ consumer.on_before_schedule_idle
94
+ end
95
+
82
96
  # Runs consumer idle operations
83
97
  # This may include house-keeping or other state management changes that can occur but that
84
98
  # not mean there are any new messages available for the end user to process
85
99
  def idle
86
- # Initializes the messages set in case idle operation would happen before any processing
87
- # This prevents us from having no messages object at all as the messages object and
88
- # its metadata may be used for statistics
89
- consumer.messages ||= Messages::Builders::Messages.call(
90
- [],
91
- topic,
92
- partition,
93
- Time.now
94
- )
95
-
96
100
  consumer.on_idle
97
101
  end
98
102
 
103
+ # Runs code needed before revoked job is scheduled
104
+ def before_schedule_revoked
105
+ consumer.on_before_schedule_revoked if @consumer
106
+ end
107
+
99
108
  # Runs the controller `#revoked` method that should be triggered when a given consumer is
100
109
  # no longer needed due to partitions reassignment.
101
110
  #
@@ -112,6 +121,11 @@ module Karafka
112
121
  consumer.on_revoked if @consumer
113
122
  end
114
123
 
124
+ # Runs code needed before shutdown job is scheduled
125
+ def before_schedule_shutdown
126
+ consumer.on_before_schedule_shutdown if @consumer
127
+ end
128
+
115
129
  # Runs the controller `#shutdown` method that should be triggered when a given consumer is
116
130
  # no longer needed as we're closing the process.
117
131
  #
@@ -146,10 +160,26 @@ module Karafka
146
160
  consumer.client = @client
147
161
  consumer.producer = ::Karafka::App.producer
148
162
  consumer.coordinator = @coordinator
163
+ # Since we have some message-less flows (idle, etc), we initialize consumer with empty
164
+ # messages set. In production we have persistent consumers, so this is not a performance
165
+ # overhead as this will happen only once per consumer lifetime
166
+ consumer.messages = empty_messages
149
167
 
150
168
  consumer
151
169
  end
152
170
  end
171
+
172
+ # Initializes the messages set in case given operation would happen before any processing
173
+ # This prevents us from having no messages object at all as the messages object and
174
+ # its metadata may be used for statistics
175
+ def empty_messages
176
+ Messages::Builders::Messages.call(
177
+ [],
178
+ topic,
179
+ partition,
180
+ Time.now
181
+ )
182
+ end
153
183
  end
154
184
  end
155
185
  end
@@ -13,6 +13,7 @@ module Karafka
13
13
  @client = client
14
14
  # We need two layers here to keep track of topics, partitions and processing groups
15
15
  @buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
16
+ @executor_class = Karafka::App.config.internal.processing.executor_class
16
17
  end
17
18
 
18
19
  # Finds or creates an executor based on the provided details
@@ -21,15 +22,28 @@ module Karafka
21
22
  # @param partition [Integer] partition number
22
23
  # @param parallel_key [String] parallel group key
23
24
  # @param coordinator [Karafka::Processing::Coordinator]
24
- # @return [Executor] consumer executor
25
+ # @return [Executor, Pro::Processing::Executor] consumer executor
25
26
  def find_or_create(topic, partition, parallel_key, coordinator)
26
- @buffer[topic][partition][parallel_key] ||= Executor.new(
27
+ @buffer[topic][partition][parallel_key] ||= @executor_class.new(
27
28
  @subscription_group.id,
28
29
  @client,
29
30
  coordinator
30
31
  )
31
32
  end
32
33
 
34
+ # Finds all existing executors for given topic partition or creates one for it
35
+ # @param topic [String] topic name
36
+ # @param partition [Integer] partition number
37
+ # @param coordinator [Karafka::Processing::Coordinator]
38
+ # @return [Array<Executor, Pro::Processing::Executor>]
39
+ def find_all_or_create(topic, partition, coordinator)
40
+ existing = find_all(topic, partition)
41
+
42
+ return existing unless existing.empty?
43
+
44
+ [find_or_create(topic, partition, 0, coordinator)]
45
+ end
46
+
33
47
  # Revokes executors of a given topic partition, so they won't be used anymore for incoming
34
48
  # messages
35
49
  #
@@ -43,7 +57,8 @@ module Karafka
43
57
  #
44
58
  # @param topic [String] topic name
45
59
  # @param partition [Integer] partition number
46
- # @return [Array<Executor>] executors in use for this topic + partition
60
+ # @return [Array<Executor, Pro::Processing::Executor>] executors in use for this
61
+ # topic + partition
47
62
  def find_all(topic, partition)
48
63
  @buffer[topic][partition].values
49
64
  end
@@ -52,11 +67,11 @@ module Karafka
52
67
  # info
53
68
  # @yieldparam [Routing::Topic] karafka routing topic object
54
69
  # @yieldparam [Integer] partition number
55
- # @yieldparam [Executor] given executor
70
+ # @yieldparam [Executor, Pro::Processing::Executor] given executor
56
71
  def each
57
- @buffer.each do |_, partitions|
58
- partitions.each do |_, executors|
59
- executors.each do |_, executor|
72
+ @buffer.each_value do |partitions|
73
+ partitions.each_value do |executors|
74
+ executors.each_value do |executor|
60
75
  yield(executor)
61
76
  end
62
77
  end
@@ -20,11 +20,14 @@ module Karafka
20
20
  # All jobs are blocking by default and they can release the lock when blocking operations
21
21
  # are done (if needed)
22
22
  @non_blocking = false
23
+ @status = :pending
23
24
  end
24
25
 
25
- # When redefined can run any code prior to the job being enqueued
26
+ # When redefined can run any code prior to the job being scheduled
26
27
  # @note This will run in the listener thread and not in the worker
27
- def before_enqueue; end
28
+ def before_schedule
29
+ raise NotImplementedError, 'Please implement in a subclass'
30
+ end
28
31
 
29
32
  # When redefined can run any code that should run before executing the proper code
30
33
  def before_call; end
@@ -49,6 +52,20 @@ module Karafka
49
52
  def non_blocking?
50
53
  @non_blocking
51
54
  end
55
+
56
+ # @return [Boolean] was this job finished.
57
+ def finished?
58
+ @status == :finished
59
+ end
60
+
61
+ # Marks the job as finished. Used by the worker to indicate, that this job is done.
62
+ #
63
+ # @note Since the scheduler knows exactly when it schedules jobs and when it keeps them
64
+ # pending, we do not need advanced state tracking and the only information from the
65
+ # "outside" is whether it was finished or not after it was scheduled for execution.
66
+ def finish!
67
+ @status = :finished
68
+ end
52
69
  end
53
70
  end
54
71
  end
@@ -20,9 +20,9 @@ module Karafka
20
20
  end
21
21
 
22
22
  # Runs all the preparation code on the executor that needs to happen before the job is
23
- # enqueued.
24
- def before_enqueue
25
- executor.before_enqueue(@messages)
23
+ # scheduled.
24
+ def before_schedule
25
+ executor.before_schedule_consume(@messages)
26
26
  end
27
27
 
28
28
  # Runs the before consumption preparations on the executor
@@ -14,6 +14,11 @@ module Karafka
14
14
  super()
15
15
  end
16
16
 
17
+ # Runs code prior to scheduling this idle job
18
+ def before_schedule
19
+ executor.before_schedule_idle
20
+ end
21
+
17
22
  # Run the idle work via the executor
18
23
  def call
19
24
  executor.idle
@@ -12,6 +12,11 @@ module Karafka
12
12
  super()
13
13
  end
14
14
 
15
+ # Runs code prior to scheduling this revoked job
16
+ def before_schedule
17
+ executor.before_schedule_revoked
18
+ end
19
+
15
20
  # Runs the revoking job via an executor.
16
21
  def call
17
22
  executor.revoked
@@ -13,6 +13,11 @@ module Karafka
13
13
  super()
14
14
  end
15
15
 
16
+ # Runs code prior to scheduling this shutdown job
17
+ def before_schedule
18
+ executor.before_schedule_shutdown
19
+ end
20
+
16
21
  # Runs the shutdown job via an executor.
17
22
  def call
18
23
  executor.shutdown
@@ -23,12 +23,7 @@ module Karafka
23
23
  # scheduled by Ruby hundreds of thousands of times per group.
24
24
  # We cannot use a single semaphore as it could potentially block in listeners that should
25
25
  # process with their data and also could unlock when a given group needs to remain locked
26
- @semaphores = Concurrent::Map.new do |h, k|
27
- # Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
28
- # versions we use our custom queue wrapper
29
- h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
30
- end
31
-
26
+ @semaphores = {}
32
27
  @concurrency = Karafka::App.config.concurrency
33
28
  @tick_interval = ::Karafka::App.config.internal.tick_interval
34
29
  @in_processing = Hash.new { |h, k| h[k] = [] }
@@ -37,6 +32,22 @@ module Karafka
37
32
  @mutex = Mutex.new
38
33
  end
39
34
 
35
+ # Registers given subscription group id in the queue. It is needed so we do not dynamically
36
+ # create semaphore, hence avoiding potential race conditions
37
+ #
38
+ # @param group_id [String]
39
+ def register(group_id)
40
+ # Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
41
+ @mutex.synchronize do
42
+ # versions we use our custom queue wrapper
43
+ #
44
+ # Initializes this semaphore from the mutex, so it is never auto-created
45
+ # Since we always schedule a job before waiting using semaphores, there won't be any
46
+ # concurrency problems
47
+ @semaphores[group_id] = RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new
48
+ end
49
+ end
50
+
40
51
  # Adds the job to the internal main queue, scheduling it for execution in a worker and marks
41
52
  # this job as in processing pipeline.
42
53
  #
@@ -79,7 +90,7 @@ module Karafka
79
90
  # @param group_id [String] id of the group we want to unlock for one tick
80
91
  # @note This does not release the wait lock. It just causes a conditions recheck
81
92
  def tick(group_id)
82
- @semaphores[group_id] << true
93
+ @semaphores.fetch(group_id) << true
83
94
  end
84
95
 
85
96
  # Marks a given job from a given group as completed. When there are no more jobs from a given
@@ -149,7 +160,7 @@ module Karafka
149
160
  while wait?(group_id)
150
161
  yield if block_given?
151
162
 
152
- @semaphores[group_id].pop(timeout: @tick_interval / 1_000.0)
163
+ @semaphores.fetch(group_id).pop(timeout: @tick_interval / 1_000.0)
153
164
  end
154
165
  end
155
166
 
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Namespace for Karafka OSS schedulers
6
+ module Schedulers
7
+ # FIFO scheduler for messages coming from various topics and partitions
8
+ class Default
9
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
10
+ def initialize(queue)
11
+ @queue = queue
12
+ end
13
+
14
+ # Schedules jobs in the fifo order
15
+ #
16
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Consume>] jobs we want to schedule
17
+ def on_schedule_consumption(jobs_array)
18
+ jobs_array.each do |job|
19
+ @queue << job
20
+ end
21
+ end
22
+
23
+ # Revocation, shutdown and idle jobs can also run in fifo by default
24
+ alias on_schedule_revocation on_schedule_consumption
25
+ alias on_schedule_shutdown on_schedule_consumption
26
+ alias on_schedule_idle on_schedule_consumption
27
+
28
+ # This scheduler does not have anything to manage as it is a pass through and has no state
29
+ def on_manage
30
+ nil
31
+ end
32
+
33
+ # This scheduler does not need to be cleared because it is stateless
34
+ #
35
+ # @param _group_id [String] Subscription group id
36
+ def on_clear(_group_id)
37
+ nil
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -11,10 +11,19 @@ module Karafka
11
11
  module Strategies
12
12
  # Base strategy that should be included in each strategy, just to ensure the API
13
13
  module Base
14
- # What should happen before jobs are enqueued
15
- # @note This runs from the listener thread, not recommended to put anything slow here
16
- def handle_before_enqueue
17
- raise NotImplementedError, 'Implement in a subclass'
14
+ # Defines all the before schedule handlers for appropriate actions
15
+ %i[
16
+ consume
17
+ idle
18
+ revoked
19
+ shutdown
20
+ ].each do |action|
21
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
22
+ def handle_before_schedule_#{action}
23
+ # What should happen before scheduling this work
24
+ raise NotImplementedError, 'Implement in a subclass'
25
+ end
26
+ RUBY
18
27
  end
19
28
 
20
29
  # What should happen before we kick in the processing
@@ -13,6 +13,23 @@ module Karafka
13
13
  # Apply strategy for a non-feature based flow
14
14
  FEATURES = %i[].freeze
15
15
 
16
+ # By default on all "before schedule" we just run instrumentation, nothing more
17
+ %i[
18
+ consume
19
+ idle
20
+ revoked
21
+ shutdown
22
+ ].each do |action|
23
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
24
+ # No actions needed for the standard flow here
25
+ def handle_before_schedule_#{action}
26
+ Karafka.monitor.instrument('consumer.before_schedule_#{action}', caller: self)
27
+
28
+ nil
29
+ end
30
+ RUBY
31
+ end
32
+
16
33
  # Marks message as consumed in an async way.
17
34
  #
18
35
  # @param message [Messages::Message] last successfully processed message.
@@ -24,6 +41,9 @@ module Karafka
24
41
  # already processed but rather at the next one. This applies to both sync and async
25
42
  # versions of this method.
26
43
  def mark_as_consumed(message)
44
+ # seek offset can be nil only in case `#seek` was invoked with offset reset request
45
+ # In case like this we ignore marking
46
+ return true if coordinator.seek_offset.nil?
27
47
  # Ignore earlier offsets than the one we already committed
28
48
  return true if coordinator.seek_offset > message.offset
29
49
  return false if revoked?
@@ -40,6 +60,9 @@ module Karafka
40
60
  # @return [Boolean] true if we were able to mark the offset, false otherwise.
41
61
  # False indicates that we were not able and that we have lost the partition.
42
62
  def mark_as_consumed!(message)
63
+ # seek offset can be nil only in case `#seek` was invoked with offset reset request
64
+ # In case like this we ignore marking
65
+ return true if coordinator.seek_offset.nil?
43
66
  # Ignore earlier offsets than the one we already committed
44
67
  return true if coordinator.seek_offset > message.offset
45
68
  return false if revoked?
@@ -76,13 +99,6 @@ module Karafka
76
99
  commit_offsets(async: false)
77
100
  end
78
101
 
79
- # No actions needed for the standard flow here
80
- def handle_before_enqueue
81
- Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
82
-
83
- nil
84
- end
85
-
86
102
  # Increment number of attempts
87
103
  def handle_before_consume
88
104
  coordinator.pause_tracker.increment
@@ -14,6 +14,42 @@ module Karafka
14
14
  dead_letter_queue
15
15
  ].freeze
16
16
 
17
+ # Override of the standard `#mark_as_consumed` in order to handle the pause tracker
18
+ # reset in case DLQ is marked as fully independent. When DLQ is marked independent,
19
+ # any offset marking causes the pause count tracker to reset. This is useful when
20
+ # the error is not due to the collective batch operations state but due to intermediate
21
+ # "crawling" errors that move with it
22
+ #
23
+ # @see `Strategies::Default#mark_as_consumed` for more details
24
+ # @param message [Messages::Message]
25
+ def mark_as_consumed(message)
26
+ # If we are not retrying pause count is already 0, no need to try to reset the state
27
+ return super unless retrying?
28
+ # If we do not use independent marking on DLQ, we just mark as consumed
29
+ return super unless topic.dead_letter_queue.independent?
30
+ # If we were not able to mark no need to reset
31
+ return false unless super
32
+
33
+ coordinator.pause_tracker.reset
34
+
35
+ true
36
+ end
37
+
38
+ # Override of the standard `#mark_as_consumed!`. Resets the pause tracker count in case
39
+ # DLQ was configured with the `independent` flag.
40
+ #
41
+ # @see `Strategies::Default#mark_as_consumed!` for more details
42
+ # @param message [Messages::Message]
43
+ def mark_as_consumed!(message)
44
+ return super unless retrying?
45
+ return super unless topic.dead_letter_queue.independent?
46
+ return false unless super
47
+
48
+ coordinator.pause_tracker.reset
49
+
50
+ true
51
+ end
52
+
17
53
  # When manual offset management is on, we do not mark anything as consumed automatically
18
54
  # and we rely on the user to figure things out
19
55
  def handle_after_consume
@@ -83,7 +83,10 @@ module Karafka
83
83
  )
84
84
  ensure
85
85
  # job can be nil when the queue is being closed
86
- @jobs_queue.complete(job) if job
86
+ if job
87
+ @jobs_queue.complete(job)
88
+ job.finish!
89
+ end
87
90
 
88
91
  # Always publish info, that we completed all the work despite its result
89
92
  Karafka.monitor.instrument('worker.completed', instrument_details)
@@ -109,10 +109,20 @@ module Karafka
109
109
  # subscription group customization
110
110
  # @param subscription_group_name [String, Symbol] subscription group id. When not provided,
111
111
  # a random uuid will be used
112
+ # @param args [Array] any extra arguments accepted by the subscription group builder
112
113
  # @param block [Proc] further topics definitions
113
- def subscription_group(subscription_group_name = SubscriptionGroup.id, &block)
114
+ def subscription_group(
115
+ subscription_group_name = SubscriptionGroup.id,
116
+ **args,
117
+ &block
118
+ )
114
119
  consumer_group('app') do
115
- target.public_send(:subscription_group=, subscription_group_name.to_s, &block)
120
+ target.public_send(
121
+ :subscription_group=,
122
+ subscription_group_name.to_s,
123
+ **args,
124
+ &block
125
+ )
116
126
  end
117
127
  end
118
128
 
@@ -14,7 +14,7 @@ module Karafka
14
14
  # It allows us to store the "current" subscription group defined in the routing
15
15
  # This subscription group id is then injected into topics, so we can compute the subscription
16
16
  # groups
17
- attr_accessor :current_subscription_group_name
17
+ attr_accessor :current_subscription_group_details
18
18
 
19
19
  # @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
20
20
  # yet have an application client_id namespace, this will be added here by default.
@@ -26,7 +26,7 @@ module Karafka
26
26
  @topics = Topics.new([])
27
27
  # Initialize the subscription group so there's always a value for it, since even if not
28
28
  # defined directly, a subscription group will be created
29
- @current_subscription_group_name = SubscriptionGroup.id
29
+ @current_subscription_group_details = { name: SubscriptionGroup.id }
30
30
  end
31
31
 
32
32
  # @return [Boolean] true if this consumer group should be active in our current process
@@ -48,7 +48,7 @@ module Karafka
48
48
  built_topic = @topics.last
49
49
  # We overwrite it conditionally in case it was not set by the user inline in the topic
50
50
  # block definition
51
- built_topic.subscription_group_name ||= current_subscription_group_name
51
+ built_topic.subscription_group_details ||= current_subscription_group_details
52
52
  built_topic
53
53
  end
54
54
 
@@ -59,13 +59,13 @@ module Karafka
59
59
  def subscription_group=(name = SubscriptionGroup.id, &block)
60
60
  # We cast it here, so the routing supports symbol based but that's anyhow later on
61
61
  # validated as a string
62
- @current_subscription_group_name = name.to_s
62
+ @current_subscription_group_details = { name: name.to_s }
63
63
 
64
64
  Proxy.new(self, &block)
65
65
 
66
66
  # We need to reset the current subscription group after it is used, so it won't leak
67
67
  # outside to other topics that would be defined without a defined subscription group
68
- @current_subscription_group_name = SubscriptionGroup.id
68
+ @current_subscription_group_details = { name: SubscriptionGroup.id }
69
69
  end
70
70
 
71
71
  # @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on
@@ -13,17 +13,46 @@ module Karafka
13
13
  class << self
14
14
  # Extends topic and builder with given feature API
15
15
  def activate
16
- Topic.prepend(self::Topic) if const_defined?('Topic', false)
17
- Topics.prepend(self::Topics) if const_defined?('Topics', false)
18
- ConsumerGroup.prepend(self::ConsumerGroup) if const_defined?('ConsumerGroup', false)
19
- Proxy.prepend(self::Proxy) if const_defined?('Proxy', false)
20
- Builder.prepend(self::Builder) if const_defined?('Builder', false)
21
- Builder.prepend(Base::Expander.new(self)) if const_defined?('Contracts', false)
16
+ if const_defined?('Topic', false)
17
+ Topic.prepend(self::Topic)
18
+ end
19
+
20
+ if const_defined?('Topics', false)
21
+ Topics.prepend(self::Topics)
22
+ end
23
+
24
+ if const_defined?('ConsumerGroup', false)
25
+ ConsumerGroup.prepend(self::ConsumerGroup)
26
+ end
27
+
28
+ if const_defined?('Proxy', false)
29
+ Proxy.prepend(self::Proxy)
30
+ end
31
+
32
+ if const_defined?('Builder', false)
33
+ Builder.prepend(self::Builder)
34
+ end
35
+
36
+ if const_defined?('Contracts', false)
37
+ Builder.prepend(Base::Expander.new(self))
38
+ end
39
+
40
+ if const_defined?('SubscriptionGroup', false)
41
+ SubscriptionGroup.prepend(self::SubscriptionGroup)
42
+ end
43
+
44
+ if const_defined?('SubscriptionGroupsBuilder', false)
45
+ SubscriptionGroupsBuilder.prepend(self::SubscriptionGroupsBuilder)
46
+ end
22
47
  end
23
48
 
24
- # Loads all the features and activates them
49
+ # Loads all the features and activates them once
25
50
  def load_all
51
+ return if @loaded
52
+
26
53
  features.each(&:activate)
54
+
55
+ @loaded = true
27
56
  end
28
57
 
29
58
  # @param config [Karafka::Core::Configurable::Node] app config that we can alter with
@@ -41,11 +70,18 @@ module Karafka
41
70
 
42
71
  private
43
72
 
44
- # @return [Array<Class>] all available routing features
73
+ # @return [Array<Class>] all available routing features that are direct descendants of
74
+ # the features base.Approach with using `#superclass` prevents us from accidentally
75
+ # loading Pro components
45
76
  def features
46
77
  ObjectSpace
47
78
  .each_object(Class)
48
79
  .select { |klass| klass < self }
80
+ # Ensures, that Pro components are only loaded when we operate in Pro mode. Since
81
+ # outside of specs Zeitwerk does not require them at all, they will not be loaded
82
+ # anyhow, but for specs this needs to be done as RSpec requires all files to be
83
+ # present
84
+ .reject { |klass| Karafka.pro? ? false : klass.superclass != self }
49
85
  .sort_by(&:to_s)
50
86
  end
51
87
 
@@ -11,8 +11,13 @@ module Karafka
11
11
  :max_retries,
12
12
  # To what topic the skipped messages should be moved
13
13
  :topic,
14
+ # Should retries be handled collectively on a batch or independently per message
15
+ :independent,
14
16
  keyword_init: true
15
- ) { alias_method :active?, :active }
17
+ ) do
18
+ alias_method :active?, :active
19
+ alias_method :independent?, :independent
20
+ end
16
21
  end
17
22
  end
18
23
  end
@@ -18,6 +18,7 @@ module Karafka
18
18
 
19
19
  nested :dead_letter_queue do
20
20
  required(:active) { |val| [true, false].include?(val) }
21
+ required(:independent) { |val| [true, false].include?(val) }
21
22
  required(:max_retries) { |val| val.is_a?(Integer) && val >= 0 }
22
23
  end
23
24