karafka 2.2.12 → 2.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +141 -121
  4. data/Gemfile.lock +10 -10
  5. data/config/locales/errors.yml +2 -1
  6. data/docker-compose.yml +2 -0
  7. data/lib/karafka/admin.rb +109 -3
  8. data/lib/karafka/app.rb +7 -0
  9. data/lib/karafka/base_consumer.rb +23 -30
  10. data/lib/karafka/connection/client.rb +13 -10
  11. data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
  12. data/lib/karafka/connection/listener.rb +18 -10
  13. data/lib/karafka/connection/listeners_batch.rb +6 -1
  14. data/lib/karafka/contracts/config.rb +2 -1
  15. data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
  16. data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
  17. data/lib/karafka/instrumentation/logger_listener.rb +0 -9
  18. data/lib/karafka/instrumentation/notifications.rb +6 -3
  19. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -2
  20. data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
  21. data/lib/karafka/pro/loader.rb +3 -2
  22. data/lib/karafka/pro/processing/coordinator.rb +12 -6
  23. data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
  24. data/lib/karafka/pro/processing/schedulers/base.rb +127 -0
  25. data/lib/karafka/pro/processing/schedulers/default.rb +109 -0
  26. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  27. data/lib/karafka/pro/processing/strategies/default.rb +3 -1
  28. data/lib/karafka/pro/processing/strategies/lrj/default.rb +10 -1
  29. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  30. data/lib/karafka/pro/processing/strategies/vp/default.rb +9 -5
  31. data/lib/karafka/processing/coordinator.rb +13 -7
  32. data/lib/karafka/processing/executor.rb +27 -3
  33. data/lib/karafka/processing/executors_buffer.rb +3 -3
  34. data/lib/karafka/processing/jobs/base.rb +19 -2
  35. data/lib/karafka/processing/jobs/consume.rb +3 -3
  36. data/lib/karafka/processing/jobs/idle.rb +5 -0
  37. data/lib/karafka/processing/jobs/revoked.rb +5 -0
  38. data/lib/karafka/processing/jobs/shutdown.rb +5 -0
  39. data/lib/karafka/processing/jobs_queue.rb +45 -17
  40. data/lib/karafka/processing/schedulers/default.rb +41 -0
  41. data/lib/karafka/processing/strategies/base.rb +13 -4
  42. data/lib/karafka/processing/strategies/default.rb +17 -5
  43. data/lib/karafka/processing/worker.rb +4 -1
  44. data/lib/karafka/routing/builder.rb +32 -17
  45. data/lib/karafka/routing/proxy.rb +4 -3
  46. data/lib/karafka/routing/subscription_group.rb +11 -6
  47. data/lib/karafka/routing/topics.rb +1 -1
  48. data/lib/karafka/runner.rb +1 -1
  49. data/lib/karafka/setup/config.rb +5 -1
  50. data/lib/karafka/version.rb +1 -1
  51. data/lib/karafka.rb +0 -1
  52. data.tar.gz.sig +0 -0
  53. metadata +8 -5
  54. metadata.gz.sig +0 -0
  55. data/lib/karafka/pro/performance_tracker.rb +0 -84
  56. data/lib/karafka/pro/processing/scheduler.rb +0 -75
  57. data/lib/karafka/processing/scheduler.rb +0 -22
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.12
4
+ version: 2.2.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2023-11-09 00:00:00.000000000 Z
38
+ date: 2023-12-07 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -183,6 +183,7 @@ files:
183
183
  - lib/karafka/helpers/colorize.rb
184
184
  - lib/karafka/helpers/interval_runner.rb
185
185
  - lib/karafka/helpers/multi_delegator.rb
186
+ - lib/karafka/instrumentation/assignments_tracker.rb
186
187
  - lib/karafka/instrumentation/callbacks/error.rb
187
188
  - lib/karafka/instrumentation/callbacks/rebalance.rb
188
189
  - lib/karafka/instrumentation/callbacks/statistics.rb
@@ -229,11 +230,11 @@ files:
229
230
  - lib/karafka/pro/encryption/messages/middleware.rb
230
231
  - lib/karafka/pro/encryption/messages/parser.rb
231
232
  - lib/karafka/pro/encryption/setup/config.rb
233
+ - lib/karafka/pro/instrumentation/performance_tracker.rb
232
234
  - lib/karafka/pro/iterator.rb
233
235
  - lib/karafka/pro/iterator/expander.rb
234
236
  - lib/karafka/pro/iterator/tpl_builder.rb
235
237
  - lib/karafka/pro/loader.rb
236
- - lib/karafka/pro/performance_tracker.rb
237
238
  - lib/karafka/pro/processing/collapser.rb
238
239
  - lib/karafka/pro/processing/coordinator.rb
239
240
  - lib/karafka/pro/processing/filters/base.rb
@@ -246,8 +247,10 @@ files:
246
247
  - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
247
248
  - lib/karafka/pro/processing/jobs/revoked_non_blocking.rb
248
249
  - lib/karafka/pro/processing/jobs_builder.rb
250
+ - lib/karafka/pro/processing/jobs_queue.rb
249
251
  - lib/karafka/pro/processing/partitioner.rb
250
- - lib/karafka/pro/processing/scheduler.rb
252
+ - lib/karafka/pro/processing/schedulers/base.rb
253
+ - lib/karafka/pro/processing/schedulers/default.rb
251
254
  - lib/karafka/pro/processing/strategies.rb
252
255
  - lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb
253
256
  - lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb
@@ -366,7 +369,7 @@ files:
366
369
  - lib/karafka/processing/jobs_queue.rb
367
370
  - lib/karafka/processing/partitioner.rb
368
371
  - lib/karafka/processing/result.rb
369
- - lib/karafka/processing/scheduler.rb
372
+ - lib/karafka/processing/schedulers/default.rb
370
373
  - lib/karafka/processing/strategies/aj_dlq_mom.rb
371
374
  - lib/karafka/processing/strategies/aj_mom.rb
372
375
  - lib/karafka/processing/strategies/base.rb
metadata.gz.sig CHANGED
Binary file
@@ -1,84 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component under a commercial license.
4
- # This Karafka component is NOT licensed under LGPL.
5
- #
6
- # All of the commercial components are present in the lib/karafka/pro directory of this
7
- # repository and their usage requires commercial license agreement.
8
- #
9
- # Karafka has also commercial-friendly license, commercial support and commercial components.
10
- #
11
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
- # your code to Maciej Mensfeld.
13
-
14
- module Karafka
15
- module Pro
16
- # Tracker used to keep track of performance metrics
17
- # It provides insights that can be used to optimize processing flow
18
- class PerformanceTracker
19
- include Singleton
20
-
21
- # How many samples do we collect per topic partition
22
- SAMPLES_COUNT = 200
23
-
24
- private_constant :SAMPLES_COUNT
25
-
26
- # Builds up nested concurrent hash for data tracking
27
- def initialize
28
- @processing_times = Concurrent::Map.new do |topics_hash, topic|
29
- topics_hash.compute_if_absent(topic) do
30
- Concurrent::Map.new do |partitions_hash, partition|
31
- # This array does not have to be concurrent because we always access single
32
- # partition data via instrumentation that operates in a single thread via consumer
33
- partitions_hash.compute_if_absent(partition) { [] }
34
- end
35
- end
36
- end
37
- end
38
-
39
- # @param topic [String]
40
- # @param partition [Integer]
41
- # @return [Float] p95 processing time of a single message from a single topic partition
42
- def processing_time_p95(topic, partition)
43
- values = @processing_times[topic][partition]
44
-
45
- return 0 if values.empty?
46
- return values.first if values.size == 1
47
-
48
- percentile(0.95, values)
49
- end
50
-
51
- # @private
52
- # @param event [Karafka::Core::Monitoring::Event] event details
53
- # Tracks time taken to process a single message of a given topic partition
54
- def on_consumer_consumed(event)
55
- consumer = event[:caller]
56
- messages = consumer.messages
57
- topic = messages.metadata.topic
58
- partition = messages.metadata.partition
59
-
60
- samples = @processing_times[topic][partition]
61
- samples << event[:time] / messages.count
62
-
63
- return unless samples.size > SAMPLES_COUNT
64
-
65
- samples.shift
66
- end
67
-
68
- private
69
-
70
- # Computers the requested percentile out of provided values
71
- # @param percentile [Float]
72
- # @param values [Array<String>] all the values based on which we should
73
- # @return [Float] computed percentile
74
- def percentile(percentile, values)
75
- values_sorted = values.sort
76
-
77
- floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
78
- mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
79
-
80
- values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
81
- end
82
- end
83
- end
84
- end
@@ -1,75 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component under a commercial license.
4
- # This Karafka component is NOT licensed under LGPL.
5
- #
6
- # All of the commercial components are present in the lib/karafka/pro directory of this
7
- # repository and their usage requires commercial license agreement.
8
- #
9
- # Karafka has also commercial-friendly license, commercial support and commercial components.
10
- #
11
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
- # your code to Maciej Mensfeld.
13
-
14
- module Karafka
15
- module Pro
16
- module Processing
17
- # Optimizes scheduler that takes into consideration of execution time needed to process
18
- # messages from given topics partitions. It uses the non-preemptive LJF algorithm
19
- #
20
- # This scheduler is designed to optimize execution times on jobs that perform IO operations
21
- # as when taking IO into consideration, the can achieve optimized parallel processing.
22
- #
23
- # This scheduler can also work with virtual partitions.
24
- #
25
- # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
26
- # default FIFO scheduler from the default Karafka scheduler
27
- class Scheduler < ::Karafka::Processing::Scheduler
28
- # Schedules jobs in the LJF order for consumption
29
- #
30
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
31
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
32
- #
33
- def schedule_consumption(queue, jobs_array)
34
- perf_tracker = PerformanceTracker.instance
35
-
36
- ordered = []
37
-
38
- jobs_array.each do |job|
39
- ordered << [
40
- job,
41
- processing_cost(perf_tracker, job)
42
- ]
43
- end
44
-
45
- ordered.sort_by!(&:last)
46
- ordered.reverse!
47
- ordered.map!(&:first)
48
-
49
- ordered.each do |job|
50
- queue << job
51
- end
52
- end
53
-
54
- private
55
-
56
- # @param perf_tracker [PerformanceTracker]
57
- # @param job [Karafka::Processing::Jobs::Base] job we will be processing
58
- # @return [Numeric] estimated cost of processing this job
59
- def processing_cost(perf_tracker, job)
60
- if job.is_a?(::Karafka::Processing::Jobs::Consume)
61
- messages = job.messages
62
- message = messages.first
63
-
64
- perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
65
- else
66
- # LJF will set first the most expensive, but we want to run the zero cost jobs
67
- # related to the lifecycle always first. That is why we "emulate" that they
68
- # the longest possible jobs that anyone can run
69
- Float::INFINITY
70
- end
71
- end
72
- end
73
- end
74
- end
75
- end
@@ -1,22 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- module Processing
5
- # FIFO scheduler for messages coming from various topics and partitions
6
- class Scheduler
7
- # Schedules jobs in the fifo order
8
- #
9
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
10
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
11
- def schedule_consumption(queue, jobs_array)
12
- jobs_array.each do |job|
13
- queue << job
14
- end
15
- end
16
-
17
- # Both revocation and shutdown jobs can also run in fifo by default
18
- alias schedule_revocation schedule_consumption
19
- alias schedule_shutdown schedule_consumption
20
- end
21
- end
22
- end