karafka 2.2.13 → 2.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +138 -125
- data/Gemfile.lock +3 -3
- data/docker-compose.yml +2 -0
- data/lib/karafka/admin.rb +109 -3
- data/lib/karafka/app.rb +7 -0
- data/lib/karafka/base_consumer.rb +23 -30
- data/lib/karafka/connection/client.rb +13 -10
- data/lib/karafka/connection/listener.rb +11 -9
- data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
- data/lib/karafka/instrumentation/logger_listener.rb +0 -9
- data/lib/karafka/instrumentation/notifications.rb +6 -4
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -2
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
- data/lib/karafka/pro/loader.rb +2 -2
- data/lib/karafka/pro/processing/schedulers/base.rb +127 -0
- data/lib/karafka/pro/processing/schedulers/default.rb +109 -0
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +2 -2
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/vp/default.rb +1 -1
- data/lib/karafka/processing/executor.rb +27 -3
- data/lib/karafka/processing/executors_buffer.rb +3 -3
- data/lib/karafka/processing/jobs/base.rb +19 -2
- data/lib/karafka/processing/jobs/consume.rb +3 -3
- data/lib/karafka/processing/jobs/idle.rb +5 -0
- data/lib/karafka/processing/jobs/revoked.rb +5 -0
- data/lib/karafka/processing/jobs/shutdown.rb +5 -0
- data/lib/karafka/processing/jobs_queue.rb +19 -8
- data/lib/karafka/processing/schedulers/default.rb +41 -0
- data/lib/karafka/processing/strategies/base.rb +13 -4
- data/lib/karafka/processing/strategies/default.rb +17 -7
- data/lib/karafka/processing/worker.rb +4 -1
- data/lib/karafka/routing/proxy.rb +4 -3
- data/lib/karafka/routing/topics.rb +1 -1
- data/lib/karafka/setup/config.rb +4 -1
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +7 -5
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/performance_tracker.rb +0 -84
- data/lib/karafka/pro/processing/scheduler.rb +0 -74
- data/lib/karafka/processing/scheduler.rb +0 -38
data/lib/karafka/setup/config.rb
CHANGED
|
@@ -211,7 +211,7 @@ module Karafka
|
|
|
211
211
|
setting :processing do
|
|
212
212
|
setting :jobs_queue_class, default: Processing::JobsQueue
|
|
213
213
|
# option scheduler [Object] scheduler we will be using
|
|
214
|
-
setting :scheduler_class, default: Processing::
|
|
214
|
+
setting :scheduler_class, default: Processing::Schedulers::Default
|
|
215
215
|
# option jobs_builder [Object] jobs builder we want to use
|
|
216
216
|
setting :jobs_builder, default: Processing::JobsBuilder.new
|
|
217
217
|
# option coordinator [Class] work coordinator we want to user for processing coordination
|
|
@@ -278,6 +278,9 @@ module Karafka
|
|
|
278
278
|
# are also configured
|
|
279
279
|
Pro::Loader.post_setup_all(config) if Karafka.pro?
|
|
280
280
|
|
|
281
|
+
# Subscribe the assignments tracker so we can always query all current assignments
|
|
282
|
+
config.monitor.subscribe(Instrumentation::AssignmentsTracker.instance)
|
|
283
|
+
|
|
281
284
|
Karafka::App.initialized!
|
|
282
285
|
end
|
|
283
286
|
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
|
Binary file
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: karafka
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.2.
|
|
4
|
+
version: 2.2.14
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Maciej Mensfeld
|
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
|
35
35
|
AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
|
|
36
36
|
msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
|
|
37
37
|
-----END CERTIFICATE-----
|
|
38
|
-
date: 2023-
|
|
38
|
+
date: 2023-12-07 00:00:00.000000000 Z
|
|
39
39
|
dependencies:
|
|
40
40
|
- !ruby/object:Gem::Dependency
|
|
41
41
|
name: karafka-core
|
|
@@ -183,6 +183,7 @@ files:
|
|
|
183
183
|
- lib/karafka/helpers/colorize.rb
|
|
184
184
|
- lib/karafka/helpers/interval_runner.rb
|
|
185
185
|
- lib/karafka/helpers/multi_delegator.rb
|
|
186
|
+
- lib/karafka/instrumentation/assignments_tracker.rb
|
|
186
187
|
- lib/karafka/instrumentation/callbacks/error.rb
|
|
187
188
|
- lib/karafka/instrumentation/callbacks/rebalance.rb
|
|
188
189
|
- lib/karafka/instrumentation/callbacks/statistics.rb
|
|
@@ -229,11 +230,11 @@ files:
|
|
|
229
230
|
- lib/karafka/pro/encryption/messages/middleware.rb
|
|
230
231
|
- lib/karafka/pro/encryption/messages/parser.rb
|
|
231
232
|
- lib/karafka/pro/encryption/setup/config.rb
|
|
233
|
+
- lib/karafka/pro/instrumentation/performance_tracker.rb
|
|
232
234
|
- lib/karafka/pro/iterator.rb
|
|
233
235
|
- lib/karafka/pro/iterator/expander.rb
|
|
234
236
|
- lib/karafka/pro/iterator/tpl_builder.rb
|
|
235
237
|
- lib/karafka/pro/loader.rb
|
|
236
|
-
- lib/karafka/pro/performance_tracker.rb
|
|
237
238
|
- lib/karafka/pro/processing/collapser.rb
|
|
238
239
|
- lib/karafka/pro/processing/coordinator.rb
|
|
239
240
|
- lib/karafka/pro/processing/filters/base.rb
|
|
@@ -248,7 +249,8 @@ files:
|
|
|
248
249
|
- lib/karafka/pro/processing/jobs_builder.rb
|
|
249
250
|
- lib/karafka/pro/processing/jobs_queue.rb
|
|
250
251
|
- lib/karafka/pro/processing/partitioner.rb
|
|
251
|
-
- lib/karafka/pro/processing/
|
|
252
|
+
- lib/karafka/pro/processing/schedulers/base.rb
|
|
253
|
+
- lib/karafka/pro/processing/schedulers/default.rb
|
|
252
254
|
- lib/karafka/pro/processing/strategies.rb
|
|
253
255
|
- lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb
|
|
254
256
|
- lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb
|
|
@@ -367,7 +369,7 @@ files:
|
|
|
367
369
|
- lib/karafka/processing/jobs_queue.rb
|
|
368
370
|
- lib/karafka/processing/partitioner.rb
|
|
369
371
|
- lib/karafka/processing/result.rb
|
|
370
|
-
- lib/karafka/processing/
|
|
372
|
+
- lib/karafka/processing/schedulers/default.rb
|
|
371
373
|
- lib/karafka/processing/strategies/aj_dlq_mom.rb
|
|
372
374
|
- lib/karafka/processing/strategies/aj_mom.rb
|
|
373
375
|
- lib/karafka/processing/strategies/base.rb
|
metadata.gz.sig
CHANGED
|
Binary file
|
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
-
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
-
#
|
|
6
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
-
# repository and their usage requires commercial license agreement.
|
|
8
|
-
#
|
|
9
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
-
#
|
|
11
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
-
# your code to Maciej Mensfeld.
|
|
13
|
-
|
|
14
|
-
module Karafka
|
|
15
|
-
module Pro
|
|
16
|
-
# Tracker used to keep track of performance metrics
|
|
17
|
-
# It provides insights that can be used to optimize processing flow
|
|
18
|
-
class PerformanceTracker
|
|
19
|
-
include Singleton
|
|
20
|
-
|
|
21
|
-
# How many samples do we collect per topic partition
|
|
22
|
-
SAMPLES_COUNT = 200
|
|
23
|
-
|
|
24
|
-
private_constant :SAMPLES_COUNT
|
|
25
|
-
|
|
26
|
-
# Builds up nested concurrent hash for data tracking
|
|
27
|
-
def initialize
|
|
28
|
-
@processing_times = Concurrent::Map.new do |topics_hash, topic|
|
|
29
|
-
topics_hash.compute_if_absent(topic) do
|
|
30
|
-
Concurrent::Map.new do |partitions_hash, partition|
|
|
31
|
-
# This array does not have to be concurrent because we always access single
|
|
32
|
-
# partition data via instrumentation that operates in a single thread via consumer
|
|
33
|
-
partitions_hash.compute_if_absent(partition) { [] }
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# @param topic [String]
|
|
40
|
-
# @param partition [Integer]
|
|
41
|
-
# @return [Float] p95 processing time of a single message from a single topic partition
|
|
42
|
-
def processing_time_p95(topic, partition)
|
|
43
|
-
values = @processing_times[topic][partition]
|
|
44
|
-
|
|
45
|
-
return 0 if values.empty?
|
|
46
|
-
return values.first if values.size == 1
|
|
47
|
-
|
|
48
|
-
percentile(0.95, values)
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# @private
|
|
52
|
-
# @param event [Karafka::Core::Monitoring::Event] event details
|
|
53
|
-
# Tracks time taken to process a single message of a given topic partition
|
|
54
|
-
def on_consumer_consumed(event)
|
|
55
|
-
consumer = event[:caller]
|
|
56
|
-
messages = consumer.messages
|
|
57
|
-
topic = messages.metadata.topic
|
|
58
|
-
partition = messages.metadata.partition
|
|
59
|
-
|
|
60
|
-
samples = @processing_times[topic][partition]
|
|
61
|
-
samples << event[:time] / messages.count
|
|
62
|
-
|
|
63
|
-
return unless samples.size > SAMPLES_COUNT
|
|
64
|
-
|
|
65
|
-
samples.shift
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
private
|
|
69
|
-
|
|
70
|
-
# Computers the requested percentile out of provided values
|
|
71
|
-
# @param percentile [Float]
|
|
72
|
-
# @param values [Array<String>] all the values based on which we should
|
|
73
|
-
# @return [Float] computed percentile
|
|
74
|
-
def percentile(percentile, values)
|
|
75
|
-
values_sorted = values.sort
|
|
76
|
-
|
|
77
|
-
floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
|
|
78
|
-
mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
|
|
79
|
-
|
|
80
|
-
values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
|
|
81
|
-
end
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
end
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
-
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
-
#
|
|
6
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
-
# repository and their usage requires commercial license agreement.
|
|
8
|
-
#
|
|
9
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
-
#
|
|
11
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
-
# your code to Maciej Mensfeld.
|
|
13
|
-
|
|
14
|
-
module Karafka
|
|
15
|
-
module Pro
|
|
16
|
-
module Processing
|
|
17
|
-
# Optimizes scheduler that takes into consideration of execution time needed to process
|
|
18
|
-
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
|
19
|
-
#
|
|
20
|
-
# This scheduler is designed to optimize execution times on jobs that perform IO operations
|
|
21
|
-
# as when taking IO into consideration, the can achieve optimized parallel processing.
|
|
22
|
-
#
|
|
23
|
-
# This scheduler can also work with virtual partitions.
|
|
24
|
-
#
|
|
25
|
-
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
|
26
|
-
# default FIFO scheduler from the default Karafka scheduler
|
|
27
|
-
class Scheduler < ::Karafka::Processing::Scheduler
|
|
28
|
-
# Schedules jobs in the LJF order for consumption
|
|
29
|
-
#
|
|
30
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
|
31
|
-
#
|
|
32
|
-
def schedule_consumption(jobs_array)
|
|
33
|
-
perf_tracker = PerformanceTracker.instance
|
|
34
|
-
|
|
35
|
-
ordered = []
|
|
36
|
-
|
|
37
|
-
jobs_array.each do |job|
|
|
38
|
-
ordered << [
|
|
39
|
-
job,
|
|
40
|
-
processing_cost(perf_tracker, job)
|
|
41
|
-
]
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
ordered.sort_by!(&:last)
|
|
45
|
-
ordered.reverse!
|
|
46
|
-
ordered.map!(&:first)
|
|
47
|
-
|
|
48
|
-
ordered.each do |job|
|
|
49
|
-
@queue << job
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
private
|
|
54
|
-
|
|
55
|
-
# @param perf_tracker [PerformanceTracker]
|
|
56
|
-
# @param job [Karafka::Processing::Jobs::Base] job we will be processing
|
|
57
|
-
# @return [Numeric] estimated cost of processing this job
|
|
58
|
-
def processing_cost(perf_tracker, job)
|
|
59
|
-
if job.is_a?(::Karafka::Processing::Jobs::Consume)
|
|
60
|
-
messages = job.messages
|
|
61
|
-
message = messages.first
|
|
62
|
-
|
|
63
|
-
perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
|
|
64
|
-
else
|
|
65
|
-
# LJF will set first the most expensive, but we want to run the zero cost jobs
|
|
66
|
-
# related to the lifecycle always first. That is why we "emulate" that they
|
|
67
|
-
# the longest possible jobs that anyone can run
|
|
68
|
-
Float::INFINITY
|
|
69
|
-
end
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
end
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Karafka
|
|
4
|
-
module Processing
|
|
5
|
-
# FIFO scheduler for messages coming from various topics and partitions
|
|
6
|
-
class Scheduler
|
|
7
|
-
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
|
8
|
-
def initialize(queue)
|
|
9
|
-
@queue = queue
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
# Schedules jobs in the fifo order
|
|
13
|
-
#
|
|
14
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
|
15
|
-
def schedule_consumption(jobs_array)
|
|
16
|
-
jobs_array.each do |job|
|
|
17
|
-
@queue << job
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
# Both revocation and shutdown jobs can also run in fifo by default
|
|
22
|
-
alias schedule_revocation schedule_consumption
|
|
23
|
-
alias schedule_shutdown schedule_consumption
|
|
24
|
-
|
|
25
|
-
# This scheduler does not have anything to manage as it is a pass through and has no state
|
|
26
|
-
def manage
|
|
27
|
-
nil
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
# This scheduler does not need to be cleared because it is stateless
|
|
31
|
-
#
|
|
32
|
-
# @param _group_id [String] Subscription group id
|
|
33
|
-
def clear(_group_id)
|
|
34
|
-
nil
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
end
|