karafka 2.2.13 → 2.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +138 -125
  4. data/Gemfile.lock +3 -3
  5. data/docker-compose.yml +2 -0
  6. data/lib/karafka/admin.rb +109 -3
  7. data/lib/karafka/app.rb +7 -0
  8. data/lib/karafka/base_consumer.rb +23 -30
  9. data/lib/karafka/connection/client.rb +13 -10
  10. data/lib/karafka/connection/listener.rb +11 -9
  11. data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
  12. data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
  13. data/lib/karafka/instrumentation/logger_listener.rb +0 -9
  14. data/lib/karafka/instrumentation/notifications.rb +6 -4
  15. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -2
  16. data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
  17. data/lib/karafka/pro/loader.rb +2 -2
  18. data/lib/karafka/pro/processing/schedulers/base.rb +127 -0
  19. data/lib/karafka/pro/processing/schedulers/default.rb +109 -0
  20. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  21. data/lib/karafka/pro/processing/strategies/default.rb +2 -2
  22. data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
  23. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  24. data/lib/karafka/pro/processing/strategies/vp/default.rb +1 -1
  25. data/lib/karafka/processing/executor.rb +27 -3
  26. data/lib/karafka/processing/executors_buffer.rb +3 -3
  27. data/lib/karafka/processing/jobs/base.rb +19 -2
  28. data/lib/karafka/processing/jobs/consume.rb +3 -3
  29. data/lib/karafka/processing/jobs/idle.rb +5 -0
  30. data/lib/karafka/processing/jobs/revoked.rb +5 -0
  31. data/lib/karafka/processing/jobs/shutdown.rb +5 -0
  32. data/lib/karafka/processing/jobs_queue.rb +19 -8
  33. data/lib/karafka/processing/schedulers/default.rb +41 -0
  34. data/lib/karafka/processing/strategies/base.rb +13 -4
  35. data/lib/karafka/processing/strategies/default.rb +17 -7
  36. data/lib/karafka/processing/worker.rb +4 -1
  37. data/lib/karafka/routing/proxy.rb +4 -3
  38. data/lib/karafka/routing/topics.rb +1 -1
  39. data/lib/karafka/setup/config.rb +4 -1
  40. data/lib/karafka/version.rb +1 -1
  41. data.tar.gz.sig +0 -0
  42. metadata +7 -5
  43. metadata.gz.sig +0 -0
  44. data/lib/karafka/pro/performance_tracker.rb +0 -84
  45. data/lib/karafka/pro/processing/scheduler.rb +0 -74
  46. data/lib/karafka/processing/scheduler.rb +0 -38
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for Pro schedulers related components
18
+ module Schedulers
19
+ # Base for all the Pro custom schedulers
20
+ #
21
+ # It wraps the Scheduler API with mutex to ensure, that during scheduling we do not start
22
+ # scheduling other work that could impact the decision making in between multiple
23
+ # subscription groups running in separate threads.
24
+ #
25
+ # @note All the `on_` methods can be redefined with a non-thread-safe versions without
26
+ # locks if needed, however when doing so, ensure that your scheduler is stateless.
27
+ class Base
28
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
29
+ def initialize(queue)
30
+ @queue = queue
31
+ @mutex = Mutex.new
32
+ end
33
+
34
+ # Runs the consumption jobs scheduling flow under a mutex
35
+ #
36
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
37
+ def on_schedule_consumption(jobs_array)
38
+ @mutex.synchronize do
39
+ schedule_consumption(jobs_array)
40
+ end
41
+ end
42
+
43
+ # Should schedule the consumption jobs
44
+ #
45
+ # @param _jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
46
+ def schedule_consumption(_jobs_array)
47
+ raise NotImplementedError, 'Implement in a subclass'
48
+ end
49
+
50
+ # Runs the revocation jobs scheduling flow under a mutex
51
+ #
52
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
53
+ def on_schedule_revocation(jobs_array)
54
+ @mutex.synchronize do
55
+ schedule_revocation(jobs_array)
56
+ end
57
+ end
58
+
59
+ # Schedules the revocation jobs.
60
+ #
61
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
62
+ #
63
+ # @note We provide a default scheduler logic here because by default revocation jobs
64
+ # should be scheduled as fast as possible.
65
+ def schedule_revocation(jobs_array)
66
+ jobs_array.each do |job|
67
+ @queue << job
68
+ end
69
+ end
70
+
71
+ # Runs the shutdown jobs scheduling flow under a mutex
72
+ #
73
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
74
+ def on_schedule_shutdown(jobs_array)
75
+ @mutex.synchronize do
76
+ schedule_shutdown(jobs_array)
77
+ end
78
+ end
79
+
80
+ # Schedules the shutdown jobs.
81
+ #
82
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
83
+ #
84
+ # @note We provide a default scheduler logic here because by default revocation jobs
85
+ # should be scheduled as fast as possible.
86
+ def schedule_shutdown(jobs_array)
87
+ jobs_array.each do |job|
88
+ @queue << job
89
+ end
90
+ end
91
+
92
+ # Runs the manage tick under mutex
93
+ def on_manage
94
+ @mutex.synchronize { manage }
95
+ end
96
+
97
+ # Should manage scheduling on jobs state changes
98
+ #
99
+ # By default does nothing as default schedulers are stateless
100
+ def manage
101
+ nil
102
+ end
103
+
104
+ # Runs clearing under mutex
105
+ #
106
+ # @param group_id [String] Subscription group id
107
+ def on_clear(group_id)
108
+ @mutex.synchronize { clear(group_id) }
109
+ end
110
+
111
+ # By default schedulers are stateless, so nothing to clear.
112
+ #
113
+ # @param _group_id [String] Subscription group id
114
+ def clear(_group_id)
115
+ nil
116
+ end
117
+
118
+ private
119
+
120
+ # @return [Karafka::Processing::JobsQueue] jobs queue reference for internal usage
121
+ # inside of the scheduler
122
+ attr_reader :queue
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for Pro schedulers
18
+ module Schedulers
19
+ # Optimizes scheduler that takes into consideration of execution time needed to process
20
+ # messages from given topics partitions. It uses the non-preemptive LJF algorithm
21
+ #
22
+ # This scheduler is designed to optimize execution times on jobs that perform IO operations
23
+ # as when taking IO into consideration, the can achieve optimized parallel processing.
24
+ #
25
+ # This scheduler can also work with virtual partitions.
26
+ #
27
+ # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
28
+ # default FIFO scheduler from the default Karafka scheduler
29
+ #
30
+ # @note This is a stateless scheduler, thus we can override the `#on_` API.
31
+ class Default < Base
32
+ # Schedules jobs in the LJF order for consumption
33
+ #
34
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
35
+ def on_schedule_consumption(jobs_array)
36
+ perf_tracker = Instrumentation::PerformanceTracker.instance
37
+
38
+ ordered = []
39
+
40
+ jobs_array.each do |job|
41
+ ordered << [
42
+ job,
43
+ processing_cost(perf_tracker, job)
44
+ ]
45
+ end
46
+
47
+ ordered.sort_by!(&:last)
48
+ ordered.reverse!
49
+ ordered.map!(&:first)
50
+
51
+ ordered.each do |job|
52
+ @queue << job
53
+ end
54
+ end
55
+
56
+ # Schedules jobs in the fifo order
57
+ #
58
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
59
+ def on_schedule_revocation(jobs_array)
60
+ jobs_array.each do |job|
61
+ @queue << job
62
+ end
63
+ end
64
+
65
+ # Schedules jobs in the fifo order
66
+ #
67
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
68
+ def on_schedule_shutdown(jobs_array)
69
+ jobs_array.each do |job|
70
+ @queue << job
71
+ end
72
+ end
73
+
74
+ # This scheduler does not have anything to manage as it is a pass through and has no
75
+ # state
76
+ def on_manage
77
+ nil
78
+ end
79
+
80
+ # This scheduler does not need to be cleared because it is stateless
81
+ #
82
+ # @param _group_id [String] Subscription group id
83
+ def on_clear(_group_id)
84
+ nil
85
+ end
86
+
87
+ private
88
+
89
+ # @param perf_tracker [PerformanceTracker]
90
+ # @param job [Karafka::Processing::Jobs::Base] job we will be processing
91
+ # @return [Numeric] estimated cost of processing this job
92
+ def processing_cost(perf_tracker, job)
93
+ if job.is_a?(::Karafka::Processing::Jobs::Consume)
94
+ messages = job.messages
95
+ message = messages.first
96
+
97
+ perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
98
+ else
99
+ # LJF will set first the most expensive, but we want to run the zero cost jobs
100
+ # related to the lifecycle always first. That is why we "emulate" that they
101
+ # the longest possible jobs that anyone can run
102
+ Float::INFINITY
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -33,7 +33,7 @@ module Karafka
33
33
  ].freeze
34
34
 
35
35
  # No actions needed for the standard flow here
36
- def handle_before_enqueue
36
+ def handle_before_schedule_consume
37
37
  super
38
38
 
39
39
  coordinator.on_enqueued do
@@ -28,8 +28,8 @@ module Karafka
28
28
  FEATURES = %i[].freeze
29
29
 
30
30
  # No actions needed for the standard flow here
31
- def handle_before_enqueue
32
- Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
31
+ def handle_before_schedule_consume
32
+ Karafka.monitor.instrument('consumer.before_schedule_consume', caller: self)
33
33
 
34
34
  nil
35
35
  end
@@ -29,7 +29,7 @@ module Karafka
29
29
  ].freeze
30
30
 
31
31
  # We always need to pause prior to doing any jobs for LRJ
32
- def handle_before_enqueue
32
+ def handle_before_schedule_consume
33
33
  super
34
34
 
35
35
  # This ensures that when running LRJ with VP, things operate as expected run only
@@ -29,7 +29,7 @@ module Karafka
29
29
  ].freeze
30
30
 
31
31
  # We always need to pause prior to doing any jobs for LRJ
32
- def handle_before_enqueue
32
+ def handle_before_schedule_consume
33
33
  super
34
34
 
35
35
  # This ensures that when running LRJ with VP, things operate as expected run only
@@ -112,7 +112,7 @@ module Karafka
112
112
  #
113
113
  # @note This can be done without the mutex, because it happens from the same thread
114
114
  # for all the work (listener thread)
115
- def handle_before_enqueue
115
+ def handle_before_schedule_consume
116
116
  super
117
117
 
118
118
  coordinator.virtual_offset_manager.register(
@@ -11,6 +11,15 @@ module Karafka
11
11
  #
12
12
  # @note Executors are not removed after partition is revoked. They are not that big and will
13
13
  # be re-used in case of a re-claim
14
+ #
15
+ # @note Since given consumer can run various operations, executor manages that and its
16
+ # lifecycle. There are following types of operations with appropriate before/after, etc:
17
+ #
18
+ # - consume - primary operation related to running user consumption code
19
+ # - idle - cleanup job that runs on idle runs where no messages would be passed to the end
20
+ # user. This is used for complex flows with filters, etc
21
+ # - revoked - runs after the partition was revoked
22
+ # - shutdown - runs when process is going to shutdown
14
23
  class Executor
15
24
  extend Forwardable
16
25
 
@@ -39,11 +48,11 @@ module Karafka
39
48
  end
40
49
 
41
50
  # Allows us to prepare the consumer in the listener thread prior to the job being send to
42
- # the queue. It also allows to run some code that is time sensitive and cannot wait in the
51
+ # be scheduled. It also allows to run some code that is time sensitive and cannot wait in the
43
52
  # queue as it could cause starvation.
44
53
  #
45
54
  # @param messages [Array<Karafka::Messages::Message>]
46
- def before_enqueue(messages)
55
+ def before_schedule_consume(messages)
47
56
  # Recreate consumer with each batch if persistence is not enabled
48
57
  # We reload the consumers with each batch instead of relying on some external signals
49
58
  # when needed for consistency. That way devs may have it on or off and not in this
@@ -60,7 +69,7 @@ module Karafka
60
69
  Time.now
61
70
  )
62
71
 
63
- consumer.on_before_enqueue
72
+ consumer.on_before_schedule_consume
64
73
  end
65
74
 
66
75
  # Runs setup and warm-up code in the worker prior to running the consumption
@@ -79,6 +88,11 @@ module Karafka
79
88
  consumer.on_after_consume
80
89
  end
81
90
 
91
+ # Runs the code needed before idle work is scheduled
92
+ def before_schedule_idle
93
+ consumer.on_before_schedule_idle
94
+ end
95
+
82
96
  # Runs consumer idle operations
83
97
  # This may include house-keeping or other state management changes that can occur but that
84
98
  # not mean there are any new messages available for the end user to process
@@ -96,6 +110,11 @@ module Karafka
96
110
  consumer.on_idle
97
111
  end
98
112
 
113
+ # Runs code needed before revoked job is scheduled
114
+ def before_schedule_revoked
115
+ consumer.on_before_schedule_revoked if @consumer
116
+ end
117
+
99
118
  # Runs the controller `#revoked` method that should be triggered when a given consumer is
100
119
  # no longer needed due to partitions reassignment.
101
120
  #
@@ -112,6 +131,11 @@ module Karafka
112
131
  consumer.on_revoked if @consumer
113
132
  end
114
133
 
134
+ # Runs code needed before shutdown job is scheduled
135
+ def before_schedule_shutdown
136
+ consumer.on_before_schedule_shutdown if @consumer
137
+ end
138
+
115
139
  # Runs the controller `#shutdown` method that should be triggered when a given consumer is
116
140
  # no longer needed as we're closing the process.
117
141
  #
@@ -54,9 +54,9 @@ module Karafka
54
54
  # @yieldparam [Integer] partition number
55
55
  # @yieldparam [Executor] given executor
56
56
  def each
57
- @buffer.each do |_, partitions|
58
- partitions.each do |_, executors|
59
- executors.each do |_, executor|
57
+ @buffer.each_value do |partitions|
58
+ partitions.each_value do |executors|
59
+ executors.each_value do |executor|
60
60
  yield(executor)
61
61
  end
62
62
  end
@@ -20,11 +20,14 @@ module Karafka
20
20
  # All jobs are blocking by default and they can release the lock when blocking operations
21
21
  # are done (if needed)
22
22
  @non_blocking = false
23
+ @status = :pending
23
24
  end
24
25
 
25
- # When redefined can run any code prior to the job being enqueued
26
+ # When redefined can run any code prior to the job being scheduled
26
27
  # @note This will run in the listener thread and not in the worker
27
- def before_enqueue; end
28
+ def before_schedule
29
+ raise NotImplementedError, 'Please implement in a subclass'
30
+ end
28
31
 
29
32
  # When redefined can run any code that should run before executing the proper code
30
33
  def before_call; end
@@ -49,6 +52,20 @@ module Karafka
49
52
  def non_blocking?
50
53
  @non_blocking
51
54
  end
55
+
56
+ # @return [Boolean] was this job finished.
57
+ def finished?
58
+ @status == :finished
59
+ end
60
+
61
+ # Marks the job as finished. Used by the worker to indicate, that this job is done.
62
+ #
63
+ # @note Since the scheduler knows exactly when it schedules jobs and when it keeps them
64
+ # pending, we do not need advanced state tracking and the only information from the
65
+ # "outside" is whether it was finished or not after it was scheduled for execution.
66
+ def finish!
67
+ @status = :finished
68
+ end
52
69
  end
53
70
  end
54
71
  end
@@ -20,9 +20,9 @@ module Karafka
20
20
  end
21
21
 
22
22
  # Runs all the preparation code on the executor that needs to happen before the job is
23
- # enqueued.
24
- def before_enqueue
25
- executor.before_enqueue(@messages)
23
+ # scheduled.
24
+ def before_schedule
25
+ executor.before_schedule_consume(@messages)
26
26
  end
27
27
 
28
28
  # Runs the before consumption preparations on the executor
@@ -14,6 +14,11 @@ module Karafka
14
14
  super()
15
15
  end
16
16
 
17
+ # Runs code prior to scheduling this idle job
18
+ def before_schedule
19
+ executor.before_schedule_idle
20
+ end
21
+
17
22
  # Run the idle work via the executor
18
23
  def call
19
24
  executor.idle
@@ -12,6 +12,11 @@ module Karafka
12
12
  super()
13
13
  end
14
14
 
15
+ # Runs code prior to scheduling this revoked job
16
+ def before_schedule
17
+ executor.before_schedule_revoked
18
+ end
19
+
15
20
  # Runs the revoking job via an executor.
16
21
  def call
17
22
  executor.revoked
@@ -13,6 +13,11 @@ module Karafka
13
13
  super()
14
14
  end
15
15
 
16
+ # Runs code prior to scheduling this shutdown job
17
+ def before_schedule
18
+ executor.before_schedule_shutdown
19
+ end
20
+
16
21
  # Runs the shutdown job via an executor.
17
22
  def call
18
23
  executor.shutdown
@@ -23,12 +23,7 @@ module Karafka
23
23
  # scheduled by Ruby hundreds of thousands of times per group.
24
24
  # We cannot use a single semaphore as it could potentially block in listeners that should
25
25
  # process with their data and also could unlock when a given group needs to remain locked
26
- @semaphores = Concurrent::Map.new do |h, k|
27
- # Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
28
- # versions we use our custom queue wrapper
29
- h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
30
- end
31
-
26
+ @semaphores = {}
32
27
  @concurrency = Karafka::App.config.concurrency
33
28
  @tick_interval = ::Karafka::App.config.internal.tick_interval
34
29
  @in_processing = Hash.new { |h, k| h[k] = [] }
@@ -37,6 +32,22 @@ module Karafka
37
32
  @mutex = Mutex.new
38
33
  end
39
34
 
35
+ # Registers given subscription group id in the queue. It is needed so we do not dynamically
36
+ # create semaphore, hence avoiding potential race conditions
37
+ #
38
+ # @param group_id [String]
39
+ def register(group_id)
40
+ # Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
41
+ @mutex.synchronize do
42
+ # versions we use our custom queue wrapper
43
+ #
44
+ # Initializes this semaphore from the mutex, so it is never auto-created
45
+ # Since we always schedule a job before waiting using semaphores, there won't be any
46
+ # concurrency problems
47
+ @semaphores[group_id] = RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new
48
+ end
49
+ end
50
+
40
51
  # Adds the job to the internal main queue, scheduling it for execution in a worker and marks
41
52
  # this job as in processing pipeline.
42
53
  #
@@ -79,7 +90,7 @@ module Karafka
79
90
  # @param group_id [String] id of the group we want to unlock for one tick
80
91
  # @note This does not release the wait lock. It just causes a conditions recheck
81
92
  def tick(group_id)
82
- @semaphores[group_id] << true
93
+ @semaphores.fetch(group_id) << true
83
94
  end
84
95
 
85
96
  # Marks a given job from a given group as completed. When there are no more jobs from a given
@@ -149,7 +160,7 @@ module Karafka
149
160
  while wait?(group_id)
150
161
  yield if block_given?
151
162
 
152
- @semaphores[group_id].pop(timeout: @tick_interval / 1_000.0)
163
+ @semaphores.fetch(group_id).pop(timeout: @tick_interval / 1_000.0)
153
164
  end
154
165
  end
155
166
 
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Namespace for Karafka OSS schedulers
6
+ module Schedulers
7
+ # FIFO scheduler for messages coming from various topics and partitions
8
+ class Default
9
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
10
+ def initialize(queue)
11
+ @queue = queue
12
+ end
13
+
14
+ # Schedules jobs in the fifo order
15
+ #
16
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
17
+ def on_schedule_consumption(jobs_array)
18
+ jobs_array.each do |job|
19
+ @queue << job
20
+ end
21
+ end
22
+
23
+ # Both revocation and shutdown jobs can also run in fifo by default
24
+ alias on_schedule_revocation on_schedule_consumption
25
+ alias on_schedule_shutdown on_schedule_consumption
26
+
27
+ # This scheduler does not have anything to manage as it is a pass through and has no state
28
+ def on_manage
29
+ nil
30
+ end
31
+
32
+ # This scheduler does not need to be cleared because it is stateless
33
+ #
34
+ # @param _group_id [String] Subscription group id
35
+ def on_clear(_group_id)
36
+ nil
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -11,10 +11,19 @@ module Karafka
11
11
  module Strategies
12
12
  # Base strategy that should be included in each strategy, just to ensure the API
13
13
  module Base
14
- # What should happen before jobs are enqueued
15
- # @note This runs from the listener thread, not recommended to put anything slow here
16
- def handle_before_enqueue
17
- raise NotImplementedError, 'Implement in a subclass'
14
+ # Defines all the before schedule handlers for appropriate actions
15
+ %i[
16
+ consume
17
+ idle
18
+ revoked
19
+ shutdown
20
+ ].each do |action|
21
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
22
+ def handle_before_schedule_#{action}
23
+ # What should happen before scheduling this work
24
+ raise NotImplementedError, 'Implement in a subclass'
25
+ end
26
+ RUBY
18
27
  end
19
28
 
20
29
  # What should happen before we kick in the processing
@@ -13,6 +13,23 @@ module Karafka
13
13
  # Apply strategy for a non-feature based flow
14
14
  FEATURES = %i[].freeze
15
15
 
16
+ # By default on all "before schedule" we just run instrumentation, nothing more
17
+ %i[
18
+ consume
19
+ idle
20
+ revoked
21
+ shutdown
22
+ ].each do |action|
23
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
24
+ # No actions needed for the standard flow here
25
+ def handle_before_schedule_#{action}
26
+ Karafka.monitor.instrument('consumer.before_schedule_#{action}', caller: self)
27
+
28
+ nil
29
+ end
30
+ RUBY
31
+ end
32
+
16
33
  # Marks message as consumed in an async way.
17
34
  #
18
35
  # @param message [Messages::Message] last successfully processed message.
@@ -76,13 +93,6 @@ module Karafka
76
93
  commit_offsets(async: false)
77
94
  end
78
95
 
79
- # No actions needed for the standard flow here
80
- def handle_before_enqueue
81
- Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
82
-
83
- nil
84
- end
85
-
86
96
  # Increment number of attempts
87
97
  def handle_before_consume
88
98
  coordinator.pause_tracker.increment
@@ -83,7 +83,10 @@ module Karafka
83
83
  )
84
84
  ensure
85
85
  # job can be nil when the queue is being closed
86
- @jobs_queue.complete(job) if job
86
+ if job
87
+ @jobs_queue.complete(job)
88
+ job.finish!
89
+ end
87
90
 
88
91
  # Always publish info, that we completed all the work despite its result
89
92
  Karafka.monitor.instrument('worker.completed', instrument_details)
@@ -10,11 +10,12 @@ module Karafka
10
10
  # @param target [Object] target object to which we proxy any DSL call
11
11
  # @param defaults [Proc] defaults for target that should be applicable after the proper
12
12
  # proxy context (if needed)
13
- # @param block [Proc] block that we want to evaluate in the proxy context
13
+ # @param block [Proc, nil] block that we want to evaluate in the proxy context or nil if no
14
+ # proxy block context for example because whole context is taken from defaults
14
15
  def initialize(target, defaults = ->(_) {}, &block)
15
16
  @target = target
16
- instance_eval(&block)
17
- instance_eval(&defaults)
17
+ instance_eval(&block) if block
18
+ instance_eval(&defaults) if defaults
18
19
  end
19
20
 
20
21
  # Ruby 2.7.0 to 2.7.2 do not have arg forwarding, so we fallback to the old way
@@ -9,7 +9,7 @@ module Karafka
9
9
  include Enumerable
10
10
  extend Forwardable
11
11
 
12
- def_delegators :@accumulator, :[], :size, :empty?, :last, :<<
12
+ def_delegators :@accumulator, :[], :size, :empty?, :last, :<<, :map!, :sort_by!, :reverse!
13
13
 
14
14
  # @param topics_array [Array<Karafka::Routing::Topic>] array with topics
15
15
  def initialize(topics_array)