karafka 2.2.13 → 2.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +138 -125
  4. data/Gemfile.lock +3 -3
  5. data/docker-compose.yml +2 -0
  6. data/lib/karafka/admin.rb +109 -3
  7. data/lib/karafka/app.rb +7 -0
  8. data/lib/karafka/base_consumer.rb +23 -30
  9. data/lib/karafka/connection/client.rb +13 -10
  10. data/lib/karafka/connection/listener.rb +11 -9
  11. data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
  12. data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
  13. data/lib/karafka/instrumentation/logger_listener.rb +0 -9
  14. data/lib/karafka/instrumentation/notifications.rb +6 -4
  15. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -2
  16. data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
  17. data/lib/karafka/pro/loader.rb +2 -2
  18. data/lib/karafka/pro/processing/schedulers/base.rb +127 -0
  19. data/lib/karafka/pro/processing/schedulers/default.rb +109 -0
  20. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  21. data/lib/karafka/pro/processing/strategies/default.rb +2 -2
  22. data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
  23. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  24. data/lib/karafka/pro/processing/strategies/vp/default.rb +1 -1
  25. data/lib/karafka/processing/executor.rb +27 -3
  26. data/lib/karafka/processing/executors_buffer.rb +3 -3
  27. data/lib/karafka/processing/jobs/base.rb +19 -2
  28. data/lib/karafka/processing/jobs/consume.rb +3 -3
  29. data/lib/karafka/processing/jobs/idle.rb +5 -0
  30. data/lib/karafka/processing/jobs/revoked.rb +5 -0
  31. data/lib/karafka/processing/jobs/shutdown.rb +5 -0
  32. data/lib/karafka/processing/jobs_queue.rb +19 -8
  33. data/lib/karafka/processing/schedulers/default.rb +41 -0
  34. data/lib/karafka/processing/strategies/base.rb +13 -4
  35. data/lib/karafka/processing/strategies/default.rb +17 -7
  36. data/lib/karafka/processing/worker.rb +4 -1
  37. data/lib/karafka/routing/proxy.rb +4 -3
  38. data/lib/karafka/routing/topics.rb +1 -1
  39. data/lib/karafka/setup/config.rb +4 -1
  40. data/lib/karafka/version.rb +1 -1
  41. data.tar.gz.sig +0 -0
  42. metadata +7 -5
  43. metadata.gz.sig +0 -0
  44. data/lib/karafka/pro/performance_tracker.rb +0 -84
  45. data/lib/karafka/pro/processing/scheduler.rb +0 -74
  46. data/lib/karafka/processing/scheduler.rb +0 -38
@@ -211,7 +211,7 @@ module Karafka
211
211
  setting :processing do
212
212
  setting :jobs_queue_class, default: Processing::JobsQueue
213
213
  # option scheduler [Object] scheduler we will be using
214
- setting :scheduler_class, default: Processing::Scheduler
214
+ setting :scheduler_class, default: Processing::Schedulers::Default
215
215
  # option jobs_builder [Object] jobs builder we want to use
216
216
  setting :jobs_builder, default: Processing::JobsBuilder.new
217
217
  # option coordinator [Class] work coordinator we want to user for processing coordination
@@ -278,6 +278,9 @@ module Karafka
278
278
  # are also configured
279
279
  Pro::Loader.post_setup_all(config) if Karafka.pro?
280
280
 
281
+ # Subscribe the assignments tracker so we can always query all current assignments
282
+ config.monitor.subscribe(Instrumentation::AssignmentsTracker.instance)
283
+
281
284
  Karafka::App.initialized!
282
285
  end
283
286
 
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.2.13'
6
+ VERSION = '2.2.14'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.13
4
+ version: 2.2.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2023-11-17 00:00:00.000000000 Z
38
+ date: 2023-12-07 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -183,6 +183,7 @@ files:
183
183
  - lib/karafka/helpers/colorize.rb
184
184
  - lib/karafka/helpers/interval_runner.rb
185
185
  - lib/karafka/helpers/multi_delegator.rb
186
+ - lib/karafka/instrumentation/assignments_tracker.rb
186
187
  - lib/karafka/instrumentation/callbacks/error.rb
187
188
  - lib/karafka/instrumentation/callbacks/rebalance.rb
188
189
  - lib/karafka/instrumentation/callbacks/statistics.rb
@@ -229,11 +230,11 @@ files:
229
230
  - lib/karafka/pro/encryption/messages/middleware.rb
230
231
  - lib/karafka/pro/encryption/messages/parser.rb
231
232
  - lib/karafka/pro/encryption/setup/config.rb
233
+ - lib/karafka/pro/instrumentation/performance_tracker.rb
232
234
  - lib/karafka/pro/iterator.rb
233
235
  - lib/karafka/pro/iterator/expander.rb
234
236
  - lib/karafka/pro/iterator/tpl_builder.rb
235
237
  - lib/karafka/pro/loader.rb
236
- - lib/karafka/pro/performance_tracker.rb
237
238
  - lib/karafka/pro/processing/collapser.rb
238
239
  - lib/karafka/pro/processing/coordinator.rb
239
240
  - lib/karafka/pro/processing/filters/base.rb
@@ -248,7 +249,8 @@ files:
248
249
  - lib/karafka/pro/processing/jobs_builder.rb
249
250
  - lib/karafka/pro/processing/jobs_queue.rb
250
251
  - lib/karafka/pro/processing/partitioner.rb
251
- - lib/karafka/pro/processing/scheduler.rb
252
+ - lib/karafka/pro/processing/schedulers/base.rb
253
+ - lib/karafka/pro/processing/schedulers/default.rb
252
254
  - lib/karafka/pro/processing/strategies.rb
253
255
  - lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb
254
256
  - lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb
@@ -367,7 +369,7 @@ files:
367
369
  - lib/karafka/processing/jobs_queue.rb
368
370
  - lib/karafka/processing/partitioner.rb
369
371
  - lib/karafka/processing/result.rb
370
- - lib/karafka/processing/scheduler.rb
372
+ - lib/karafka/processing/schedulers/default.rb
371
373
  - lib/karafka/processing/strategies/aj_dlq_mom.rb
372
374
  - lib/karafka/processing/strategies/aj_mom.rb
373
375
  - lib/karafka/processing/strategies/base.rb
metadata.gz.sig CHANGED
Binary file
@@ -1,84 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component under a commercial license.
4
- # This Karafka component is NOT licensed under LGPL.
5
- #
6
- # All of the commercial components are present in the lib/karafka/pro directory of this
7
- # repository and their usage requires commercial license agreement.
8
- #
9
- # Karafka has also commercial-friendly license, commercial support and commercial components.
10
- #
11
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
- # your code to Maciej Mensfeld.
13
-
14
- module Karafka
15
- module Pro
16
- # Tracker used to keep track of performance metrics
17
- # It provides insights that can be used to optimize processing flow
18
- class PerformanceTracker
19
- include Singleton
20
-
21
- # How many samples do we collect per topic partition
22
- SAMPLES_COUNT = 200
23
-
24
- private_constant :SAMPLES_COUNT
25
-
26
- # Builds up nested concurrent hash for data tracking
27
- def initialize
28
- @processing_times = Concurrent::Map.new do |topics_hash, topic|
29
- topics_hash.compute_if_absent(topic) do
30
- Concurrent::Map.new do |partitions_hash, partition|
31
- # This array does not have to be concurrent because we always access single
32
- # partition data via instrumentation that operates in a single thread via consumer
33
- partitions_hash.compute_if_absent(partition) { [] }
34
- end
35
- end
36
- end
37
- end
38
-
39
- # @param topic [String]
40
- # @param partition [Integer]
41
- # @return [Float] p95 processing time of a single message from a single topic partition
42
- def processing_time_p95(topic, partition)
43
- values = @processing_times[topic][partition]
44
-
45
- return 0 if values.empty?
46
- return values.first if values.size == 1
47
-
48
- percentile(0.95, values)
49
- end
50
-
51
- # @private
52
- # @param event [Karafka::Core::Monitoring::Event] event details
53
- # Tracks time taken to process a single message of a given topic partition
54
- def on_consumer_consumed(event)
55
- consumer = event[:caller]
56
- messages = consumer.messages
57
- topic = messages.metadata.topic
58
- partition = messages.metadata.partition
59
-
60
- samples = @processing_times[topic][partition]
61
- samples << event[:time] / messages.count
62
-
63
- return unless samples.size > SAMPLES_COUNT
64
-
65
- samples.shift
66
- end
67
-
68
- private
69
-
70
- # Computers the requested percentile out of provided values
71
- # @param percentile [Float]
72
- # @param values [Array<String>] all the values based on which we should
73
- # @return [Float] computed percentile
74
- def percentile(percentile, values)
75
- values_sorted = values.sort
76
-
77
- floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
78
- mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
79
-
80
- values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
81
- end
82
- end
83
- end
84
- end
@@ -1,74 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component under a commercial license.
4
- # This Karafka component is NOT licensed under LGPL.
5
- #
6
- # All of the commercial components are present in the lib/karafka/pro directory of this
7
- # repository and their usage requires commercial license agreement.
8
- #
9
- # Karafka has also commercial-friendly license, commercial support and commercial components.
10
- #
11
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
- # your code to Maciej Mensfeld.
13
-
14
- module Karafka
15
- module Pro
16
- module Processing
17
- # Optimizes scheduler that takes into consideration of execution time needed to process
18
- # messages from given topics partitions. It uses the non-preemptive LJF algorithm
19
- #
20
- # This scheduler is designed to optimize execution times on jobs that perform IO operations
21
- # as when taking IO into consideration, the can achieve optimized parallel processing.
22
- #
23
- # This scheduler can also work with virtual partitions.
24
- #
25
- # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
26
- # default FIFO scheduler from the default Karafka scheduler
27
- class Scheduler < ::Karafka::Processing::Scheduler
28
- # Schedules jobs in the LJF order for consumption
29
- #
30
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
31
- #
32
- def schedule_consumption(jobs_array)
33
- perf_tracker = PerformanceTracker.instance
34
-
35
- ordered = []
36
-
37
- jobs_array.each do |job|
38
- ordered << [
39
- job,
40
- processing_cost(perf_tracker, job)
41
- ]
42
- end
43
-
44
- ordered.sort_by!(&:last)
45
- ordered.reverse!
46
- ordered.map!(&:first)
47
-
48
- ordered.each do |job|
49
- @queue << job
50
- end
51
- end
52
-
53
- private
54
-
55
- # @param perf_tracker [PerformanceTracker]
56
- # @param job [Karafka::Processing::Jobs::Base] job we will be processing
57
- # @return [Numeric] estimated cost of processing this job
58
- def processing_cost(perf_tracker, job)
59
- if job.is_a?(::Karafka::Processing::Jobs::Consume)
60
- messages = job.messages
61
- message = messages.first
62
-
63
- perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
64
- else
65
- # LJF will set first the most expensive, but we want to run the zero cost jobs
66
- # related to the lifecycle always first. That is why we "emulate" that they
67
- # the longest possible jobs that anyone can run
68
- Float::INFINITY
69
- end
70
- end
71
- end
72
- end
73
- end
74
- end
@@ -1,38 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- module Processing
5
- # FIFO scheduler for messages coming from various topics and partitions
6
- class Scheduler
7
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
8
- def initialize(queue)
9
- @queue = queue
10
- end
11
-
12
- # Schedules jobs in the fifo order
13
- #
14
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
15
- def schedule_consumption(jobs_array)
16
- jobs_array.each do |job|
17
- @queue << job
18
- end
19
- end
20
-
21
- # Both revocation and shutdown jobs can also run in fifo by default
22
- alias schedule_revocation schedule_consumption
23
- alias schedule_shutdown schedule_consumption
24
-
25
- # This scheduler does not have anything to manage as it is a pass through and has no state
26
- def manage
27
- nil
28
- end
29
-
30
- # This scheduler does not need to be cleared because it is stateless
31
- #
32
- # @param _group_id [String] Subscription group id
33
- def clear(_group_id)
34
- nil
35
- end
36
- end
37
- end
38
- end