karafka 2.0.0.alpha6 → 2.0.0.beta3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +42 -2
- data/Gemfile.lock +9 -9
- data/bin/integrations +36 -14
- data/bin/scenario +29 -0
- data/config/errors.yml +1 -0
- data/docker-compose.yml +3 -0
- data/karafka.gemspec +1 -1
- data/lib/active_job/karafka.rb +2 -2
- data/lib/karafka/active_job/routing/extensions.rb +31 -0
- data/lib/karafka/base_consumer.rb +74 -6
- data/lib/karafka/connection/client.rb +39 -16
- data/lib/karafka/connection/listener.rb +103 -34
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +48 -61
- data/lib/karafka/connection/pauses_manager.rb +2 -2
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/contracts/config.rb +10 -1
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/instrumentation/logger_listener.rb +37 -10
- data/lib/karafka/instrumentation/monitor.rb +4 -0
- data/lib/karafka/licenser.rb +26 -7
- data/lib/karafka/messages/batch_metadata.rb +26 -3
- data/lib/karafka/messages/builders/batch_metadata.rb +17 -29
- data/lib/karafka/messages/builders/message.rb +1 -0
- data/lib/karafka/messages/builders/messages.rb +4 -12
- data/lib/karafka/pro/active_job/consumer.rb +48 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +3 -3
- data/lib/karafka/pro/active_job/job_options_contract.rb +2 -2
- data/lib/karafka/pro/base_consumer_extensions.rb +66 -0
- data/lib/karafka/pro/loader.rb +27 -4
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +31 -0
- data/lib/karafka/pro/routing/extensions.rb +32 -0
- data/lib/karafka/pro/scheduler.rb +54 -0
- data/lib/karafka/processing/executor.rb +26 -11
- data/lib/karafka/processing/executors_buffer.rb +15 -7
- data/lib/karafka/processing/jobs/base.rb +28 -0
- data/lib/karafka/processing/jobs/consume.rb +11 -4
- data/lib/karafka/processing/jobs_builder.rb +28 -0
- data/lib/karafka/processing/jobs_queue.rb +28 -16
- data/lib/karafka/processing/worker.rb +39 -10
- data/lib/karafka/processing/workers_batch.rb +5 -0
- data/lib/karafka/routing/consumer_group.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +2 -2
- data/lib/karafka/routing/subscription_groups_builder.rb +3 -2
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +19 -27
- data/lib/karafka/scheduler.rb +20 -0
- data/lib/karafka/server.rb +24 -23
- data/lib/karafka/setup/config.rb +6 -1
- data/lib/karafka/status.rb +1 -3
- data/lib/karafka/time_trackers/pause.rb +10 -2
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +19 -4
- metadata.gz.sig +0 -0
- data/lib/karafka/active_job/routing_extensions.rb +0 -18
data/lib/karafka/licenser.rb
CHANGED
@@ -33,6 +33,8 @@ module Karafka
|
|
33
33
|
|
34
34
|
return if license_config.expires_on > Date.today
|
35
35
|
|
36
|
+
raise_expired_license_token_in_dev(license_config.expires_on)
|
37
|
+
|
36
38
|
notify_if_license_expired(license_config.expires_on)
|
37
39
|
end
|
38
40
|
|
@@ -53,24 +55,41 @@ module Karafka
|
|
53
55
|
)
|
54
56
|
end
|
55
57
|
|
58
|
+
# Raises an error for test and dev environments if running pro with expired license
|
59
|
+
# We never want to cause any non-dev problems and we should never crash anything else than
|
60
|
+
# tests and development envs.
|
61
|
+
#
|
62
|
+
# @param expires_on [Date] when the license expires
|
63
|
+
def raise_expired_license_token_in_dev(expires_on)
|
64
|
+
env = Karafka::App.env
|
65
|
+
|
66
|
+
return unless env.development? || env.test?
|
67
|
+
|
68
|
+
raise Errors::ExpiredLicenseTokenError.new, expired_message(expires_on)
|
69
|
+
end
|
70
|
+
|
56
71
|
# We do not raise an error here as we don't want to cause any problems to someone that runs
|
57
72
|
# Karafka on production. Error message is enough.
|
58
73
|
#
|
59
74
|
# @param expires_on [Date] when the license expires
|
60
75
|
def notify_if_license_expired(expires_on)
|
61
|
-
|
62
|
-
Your license expired on #{expires_on}.
|
63
|
-
Please reach us at contact@karafka.io or visit https://karafka.io to obtain a valid one.
|
64
|
-
MSG
|
65
|
-
|
66
|
-
Karafka.logger.error(message)
|
76
|
+
Karafka.logger.error(expired_message(expires_on))
|
67
77
|
|
68
78
|
Karafka.monitor.instrument(
|
69
79
|
'error.occurred',
|
70
80
|
caller: self,
|
71
|
-
error: Errors::ExpiredLicenseTokenError.new(
|
81
|
+
error: Errors::ExpiredLicenseTokenError.new(expired_message(expires_on)),
|
72
82
|
type: 'licenser.expired'
|
73
83
|
)
|
74
84
|
end
|
85
|
+
|
86
|
+
# @param expires_on [Date] when the license expires
|
87
|
+
# @return [String] expired message
|
88
|
+
def expired_message(expires_on)
|
89
|
+
<<~MSG.tr("\n", ' ')
|
90
|
+
Your license expired on #{expires_on}.
|
91
|
+
Please reach us at contact@karafka.io or visit https://karafka.io to obtain a valid one.
|
92
|
+
MSG
|
93
|
+
end
|
75
94
|
end
|
76
95
|
end
|
@@ -13,10 +13,33 @@ module Karafka
|
|
13
13
|
:deserializer,
|
14
14
|
:partition,
|
15
15
|
:topic,
|
16
|
+
:created_at,
|
16
17
|
:scheduled_at,
|
17
|
-
:
|
18
|
-
:processing_lag,
|
18
|
+
:processed_at,
|
19
19
|
keyword_init: true
|
20
|
-
)
|
20
|
+
) do
|
21
|
+
# This lag describes how long did it take for a message to be consumed from the moment it was
|
22
|
+
# created
|
23
|
+
def consumption_lag
|
24
|
+
time_distance_in_ms(processed_at, created_at)
|
25
|
+
end
|
26
|
+
|
27
|
+
# This lag describes how long did a batch have to wait before it was picked up by one of the
|
28
|
+
# workers
|
29
|
+
def processing_lag
|
30
|
+
time_distance_in_ms(processed_at, scheduled_at)
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
# Computes time distance in between two times in ms
|
36
|
+
#
|
37
|
+
# @param time1 [Time]
|
38
|
+
# @param time2 [Time]
|
39
|
+
# @return [Integer] distance in between two times in ms
|
40
|
+
def time_distance_in_ms(time1, time2)
|
41
|
+
((time1 - time2) * 1_000).round
|
42
|
+
end
|
43
|
+
end
|
21
44
|
end
|
22
45
|
end
|
@@ -8,42 +8,30 @@ module Karafka
|
|
8
8
|
class << self
|
9
9
|
# Creates metadata based on the kafka batch data.
|
10
10
|
#
|
11
|
-
# @param
|
11
|
+
# @param messages [Array<Karafka::Messages::Message>] messages array
|
12
12
|
# @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
|
13
13
|
# @param scheduled_at [Time] moment when the batch was scheduled for processing
|
14
14
|
# @return [Karafka::Messages::BatchMetadata] batch metadata object
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
def call(
|
19
|
-
now = Time.now
|
20
|
-
|
15
|
+
#
|
16
|
+
# @note We do not set `processed_at` as this needs to be assigned when the batch is
|
17
|
+
# picked up for processing.
|
18
|
+
def call(messages, topic, scheduled_at)
|
21
19
|
Karafka::Messages::BatchMetadata.new(
|
22
|
-
size:
|
23
|
-
first_offset:
|
24
|
-
last_offset:
|
20
|
+
size: messages.count,
|
21
|
+
first_offset: messages.first.offset,
|
22
|
+
last_offset: messages.last.offset,
|
25
23
|
deserializer: topic.deserializer,
|
26
|
-
partition:
|
24
|
+
partition: messages.first.partition,
|
27
25
|
topic: topic.name,
|
26
|
+
# We go with the assumption that the creation of the whole batch is the last message
|
27
|
+
# creation time
|
28
|
+
created_at: messages.last.timestamp,
|
29
|
+
# When this batch was built and scheduled for execution
|
28
30
|
scheduled_at: scheduled_at,
|
29
|
-
#
|
30
|
-
#
|
31
|
-
|
32
|
-
|
33
|
-
# one of the workers
|
34
|
-
processing_lag: time_distance_in_ms(now, scheduled_at)
|
35
|
-
).freeze
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
# Computes time distance in between two times in ms
|
41
|
-
#
|
42
|
-
# @param time1 [Time]
|
43
|
-
# @param time2 [Time]
|
44
|
-
# @return [Integer] distance in between two times in ms
|
45
|
-
def time_distance_in_ms(time1, time2)
|
46
|
-
((time1 - time2) * 1_000).round
|
31
|
+
# We build the batch metadata when we pick up the job in the worker, thus we can use
|
32
|
+
# current time here
|
33
|
+
processed_at: Time.now
|
34
|
+
)
|
47
35
|
end
|
48
36
|
end
|
49
37
|
end
|
@@ -9,27 +9,19 @@ module Karafka
|
|
9
9
|
# Creates messages batch with messages inside based on the incoming messages and the
|
10
10
|
# topic from which it comes.
|
11
11
|
#
|
12
|
-
# @param
|
12
|
+
# @param messages [Array<Karafka::Messages::Message>] karafka messages array
|
13
13
|
# @param topic [Karafka::Routing::Topic] topic for which we're received messages
|
14
14
|
# @param received_at [Time] moment in time when the messages were received
|
15
15
|
# @return [Karafka::Messages::Messages] messages batch object
|
16
|
-
def call(
|
17
|
-
messages_array = kafka_messages.map do |message|
|
18
|
-
Karafka::Messages::Builders::Message.call(
|
19
|
-
message,
|
20
|
-
topic,
|
21
|
-
received_at
|
22
|
-
)
|
23
|
-
end
|
24
|
-
|
16
|
+
def call(messages, topic, received_at)
|
25
17
|
metadata = BatchMetadata.call(
|
26
|
-
|
18
|
+
messages,
|
27
19
|
topic,
|
28
20
|
received_at
|
29
21
|
).freeze
|
30
22
|
|
31
23
|
Karafka::Messages::Messages.new(
|
32
|
-
|
24
|
+
messages,
|
33
25
|
metadata
|
34
26
|
).freeze
|
35
27
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module ActiveJob
|
15
|
+
# Pro ActiveJob consumer that is suppose to handle long-running jobs as well as short
|
16
|
+
# running jobs
|
17
|
+
#
|
18
|
+
# When in LRJ, it will pause a given partition forever and will resume its processing only
|
19
|
+
# when all the jobs are done processing.
|
20
|
+
#
|
21
|
+
# It contains slightly better revocation warranties than the regular blocking consumer as
|
22
|
+
# it can stop processing batch of jobs in the middle after the revocation.
|
23
|
+
class Consumer < Karafka::ActiveJob::Consumer
|
24
|
+
# Runs ActiveJob jobs processing and handles lrj if needed
|
25
|
+
def consume
|
26
|
+
messages.each do |message|
|
27
|
+
# If for any reason we've lost this partition, not worth iterating over new messages
|
28
|
+
# as they are no longer ours
|
29
|
+
return if revoked?
|
30
|
+
break if Karafka::App.stopping?
|
31
|
+
|
32
|
+
::ActiveJob::Base.execute(
|
33
|
+
::ActiveSupport::JSON.decode(message.raw_payload)
|
34
|
+
)
|
35
|
+
|
36
|
+
# We check it twice as the job may be long running
|
37
|
+
return if revoked?
|
38
|
+
|
39
|
+
mark_as_consumed(message)
|
40
|
+
|
41
|
+
# Do not process more if we are shutting down
|
42
|
+
break if Karafka::App.stopping?
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
-
# and their usage requires commercial license agreement.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
6
|
#
|
7
7
|
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
8
|
#
|
@@ -18,7 +18,7 @@ module Karafka
|
|
18
18
|
# much better and more granular control over the dispatch and consumption process.
|
19
19
|
class Dispatcher < ::Karafka::ActiveJob::Dispatcher
|
20
20
|
# Defaults for dispatching
|
21
|
-
#
|
21
|
+
# They can be updated by using `#karafka_options` on the job
|
22
22
|
DEFAULTS = {
|
23
23
|
dispatch_method: :produce_async,
|
24
24
|
# We don't create a dummy proc based partitioner as we would have to evaluate it with
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
-
# and their usage requires commercial license agreement.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
6
|
#
|
7
7
|
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
8
|
#
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Extensions to the base consumer that make it more pro and fancy
|
15
|
+
#
|
16
|
+
# @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
|
17
|
+
# after each batch is processed.
|
18
|
+
#
|
19
|
+
# They need to be added to the consumer via `#prepend`
|
20
|
+
module BaseConsumerExtensions
|
21
|
+
# Pause for tops 31 years
|
22
|
+
MAX_PAUSE_TIME = 1_000_000_000_000
|
23
|
+
|
24
|
+
private_constant :MAX_PAUSE_TIME
|
25
|
+
|
26
|
+
# Pauses processing of a given partition until we're done with the processing
|
27
|
+
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
28
|
+
def on_prepare
|
29
|
+
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
30
|
+
# any messages
|
31
|
+
pause(messages.first.offset, MAX_PAUSE_TIME) if topic.long_running_job?
|
32
|
+
|
33
|
+
super
|
34
|
+
end
|
35
|
+
|
36
|
+
# After user code, we seek and un-pause our partition
|
37
|
+
def on_consume
|
38
|
+
# If anything went wrong here, we should not run any partition management as it's Karafka
|
39
|
+
# core that will handle the backoff
|
40
|
+
return unless super
|
41
|
+
|
42
|
+
return unless topic.long_running_job?
|
43
|
+
|
44
|
+
# Nothing to resume if it was revoked
|
45
|
+
return if revoked?
|
46
|
+
|
47
|
+
# Once processing is done, we move to the new offset based on commits
|
48
|
+
seek(@seek_offset || messages.first.offset)
|
49
|
+
resume
|
50
|
+
end
|
51
|
+
|
52
|
+
# Marks this consumer revoked state as true
|
53
|
+
# This allows us for things like lrj to finish early as this state may change during lrj
|
54
|
+
# execution
|
55
|
+
def on_revoked
|
56
|
+
@revoked = true
|
57
|
+
super
|
58
|
+
end
|
59
|
+
|
60
|
+
# @return [Boolean] true if partition was revoked from the current consumer
|
61
|
+
def revoked?
|
62
|
+
@revoked || false
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -1,27 +1,50 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
-
# and their usage requires commercial license agreement.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
6
|
#
|
7
7
|
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
8
|
#
|
9
9
|
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
10
|
# your code to Maciej Mensfeld.
|
11
|
+
|
11
12
|
module Karafka
|
12
13
|
module Pro
|
13
14
|
# Loader requires and loads all the pro components only when they are needed
|
14
15
|
class Loader
|
16
|
+
# All the pro components that need to be loaded
|
17
|
+
COMPONENTS = %w[
|
18
|
+
performance_tracker
|
19
|
+
scheduler
|
20
|
+
base_consumer_extensions
|
21
|
+
processing/jobs/consume_non_blocking
|
22
|
+
processing/jobs_builder
|
23
|
+
routing/extensions
|
24
|
+
active_job/consumer
|
25
|
+
active_job/dispatcher
|
26
|
+
active_job/job_options_contract
|
27
|
+
].freeze
|
28
|
+
|
29
|
+
private_constant :COMPONENTS
|
30
|
+
|
15
31
|
class << self
|
16
32
|
# Loads all the pro components and configures them wherever it is expected
|
17
33
|
# @param config [Dry::Configurable::Config] whole app config that we can alter with pro
|
18
34
|
# components
|
19
35
|
def setup(config)
|
20
|
-
require_relative
|
21
|
-
require_relative 'active_job/job_options_contract'
|
36
|
+
COMPONENTS.each { |component| require_relative(component) }
|
22
37
|
|
38
|
+
config.internal.scheduler = Scheduler.new
|
39
|
+
config.internal.jobs_builder = Processing::JobsBuilder.new
|
40
|
+
config.internal.active_job.consumer = ActiveJob::Consumer
|
23
41
|
config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
|
24
42
|
config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
43
|
+
|
44
|
+
::Karafka::Routing::Topic.include(Routing::Extensions)
|
45
|
+
::Karafka::BaseConsumer.prepend(BaseConsumerExtensions)
|
46
|
+
|
47
|
+
config.monitor.subscribe(PerformanceTracker.instance)
|
25
48
|
end
|
26
49
|
end
|
27
50
|
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Tracker used to keep track of performance metrics
|
15
|
+
# It provides insights that can be used to optimize processing flow
|
16
|
+
class PerformanceTracker
|
17
|
+
include Singleton
|
18
|
+
|
19
|
+
# How many samples do we collect per topic partition
|
20
|
+
SAMPLES_COUNT = 200
|
21
|
+
|
22
|
+
private_constant :SAMPLES_COUNT
|
23
|
+
|
24
|
+
# Builds up nested concurrent hash for data tracking
|
25
|
+
def initialize
|
26
|
+
@processing_times = Concurrent::Hash.new do |topics_hash, topic|
|
27
|
+
topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
|
28
|
+
# This array does not have to be concurrent because we always access single partition
|
29
|
+
# data via instrumentation that operates in a single thread via consumer
|
30
|
+
partitions_hash[partition] = []
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# @param topic [String]
|
36
|
+
# @param partition [Integer]
|
37
|
+
# @return [Float] p95 processing time of a single message from a single topic partition
|
38
|
+
def processing_time_p95(topic, partition)
|
39
|
+
values = @processing_times[topic][partition]
|
40
|
+
|
41
|
+
return 0 if values.empty?
|
42
|
+
return values.first if values.size == 1
|
43
|
+
|
44
|
+
percentile(0.95, values)
|
45
|
+
end
|
46
|
+
|
47
|
+
# @private
|
48
|
+
# @param event [Dry::Events::Event] event details
|
49
|
+
# Tracks time taken to process a single message of a given topic partition
|
50
|
+
def on_consumer_consumed(event)
|
51
|
+
consumer = event[:caller]
|
52
|
+
messages = consumer.messages
|
53
|
+
topic = messages.metadata.topic
|
54
|
+
partition = messages.metadata.partition
|
55
|
+
|
56
|
+
samples = @processing_times[topic][partition]
|
57
|
+
samples << event[:time] / messages.count
|
58
|
+
|
59
|
+
return unless samples.size > SAMPLES_COUNT
|
60
|
+
|
61
|
+
samples.shift
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
# Computers the requested percentile out of provided values
|
67
|
+
# @param percentile [Float]
|
68
|
+
# @param values [Array<String>] all the values based on which we should
|
69
|
+
# @return [Float] computed percentile
|
70
|
+
def percentile(percentile, values)
|
71
|
+
values_sorted = values.sort
|
72
|
+
|
73
|
+
floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
|
74
|
+
mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
|
75
|
+
|
76
|
+
values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Pro components related to processing part of Karafka
|
15
|
+
module Processing
|
16
|
+
# Pro jobs
|
17
|
+
module Jobs
|
18
|
+
# The main job type in a non-blocking variant.
|
19
|
+
# This variant works "like" the regular consumption but pauses the partition for as long
|
20
|
+
# as it is needed until a job is done.
|
21
|
+
#
|
22
|
+
# It can be useful when having long lasting jobs that would exceed `max.poll.interval`
|
23
|
+
# if would block.
|
24
|
+
#
|
25
|
+
# @note It needs to be working with a proper consumer that will handle the partition
|
26
|
+
# management. This layer of the framework knows nothing about Kafka messages consumption.
|
27
|
+
class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
|
28
|
+
# Releases the blocking lock after it is done with the preparation phase for this job
|
29
|
+
def prepare
|
30
|
+
super
|
31
|
+
@non_blocking = true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Processing
|
15
|
+
# Pro jobs builder that supports lrj
|
16
|
+
class JobsBuilder < ::Karafka::Processing::JobsBuilder
|
17
|
+
# @param executor [Karafka::Processing::Executor]
|
18
|
+
# @param messages [Karafka::Messages::Messages] messages batch to be consumed
|
19
|
+
# @return [Karafka::Processing::Jobs::Consume] blocking job
|
20
|
+
# @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
|
21
|
+
def consume(executor, messages)
|
22
|
+
if executor.topic.long_running_job?
|
23
|
+
Jobs::ConsumeNonBlocking.new(executor, messages)
|
24
|
+
else
|
25
|
+
super
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Pro routing components
|
15
|
+
module Routing
|
16
|
+
# Routing extensions that allow to configure some extra PRO routing options
|
17
|
+
module Extensions
|
18
|
+
class << self
|
19
|
+
# @param base [Class] class we extend
|
20
|
+
def included(base)
|
21
|
+
base.attr_accessor :long_running_job
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# @return [Boolean] is a given job on a topic a long running one
|
26
|
+
def long_running_job?
|
27
|
+
@long_running_job || false
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Optimizes scheduler that takes into consideration of execution time needed to process
|
15
|
+
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
16
|
+
#
|
17
|
+
# This scheduler is designed to optimize execution times on jobs that perform IO operations as
|
18
|
+
# when taking IO into consideration, the can achieve optimized parallel processing.
|
19
|
+
#
|
20
|
+
# This scheduler can also work with virtual partitions.
|
21
|
+
#
|
22
|
+
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
23
|
+
# default FIFO scheduler from the default Karafka scheduler
|
24
|
+
class Scheduler < ::Karafka::Scheduler
|
25
|
+
# Schedules jobs in the LJF order for consumption
|
26
|
+
#
|
27
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
28
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
29
|
+
#
|
30
|
+
def schedule_consumption(queue, jobs_array)
|
31
|
+
pt = PerformanceTracker.instance
|
32
|
+
|
33
|
+
ordered = []
|
34
|
+
|
35
|
+
jobs_array.each do |job|
|
36
|
+
messages = job.messages
|
37
|
+
message = messages.first
|
38
|
+
|
39
|
+
cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
|
40
|
+
|
41
|
+
ordered << [job, cost]
|
42
|
+
end
|
43
|
+
|
44
|
+
ordered.sort_by!(&:last)
|
45
|
+
ordered.reverse!
|
46
|
+
ordered.map!(&:first)
|
47
|
+
|
48
|
+
ordered.each do |job|
|
49
|
+
queue << job
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|