karafka 2.0.0.alpha6 → 2.0.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +9 -2
- data/Gemfile.lock +6 -6
- data/docker-compose.yml +1 -0
- data/lib/karafka/base_consumer.rb +58 -5
- data/lib/karafka/connection/listener.rb +11 -7
- data/lib/karafka/connection/messages_buffer.rb +4 -13
- data/lib/karafka/connection/pauses_manager.rb +2 -2
- data/lib/karafka/contracts/config.rb +1 -0
- data/lib/karafka/instrumentation/monitor.rb +1 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +9 -9
- data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
- data/lib/karafka/pro/loader.rb +13 -8
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/processing/executor.rb +15 -10
- data/lib/karafka/processing/jobs/base.rb +16 -0
- data/lib/karafka/processing/jobs/consume.rb +7 -2
- data/lib/karafka/processing/jobs_queue.rb +18 -9
- data/lib/karafka/processing/worker.rb +23 -0
- data/lib/karafka/scheduler.rb +21 -0
- data/lib/karafka/setup/config.rb +2 -0
- data/lib/karafka/time_trackers/pause.rb +10 -2
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +5 -3
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f108cb4288d0ed0510381f51c77d49e052b947f6180c9b9c0b06e0ac2b599894
|
4
|
+
data.tar.gz: 3d79066d0107c08f450ca9f4c3b5c4a39aae497836c80bf8380c65f1406b82c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4aae257010c992c59ce4b01ead54ff2cfd4e8ccd8cbe6b52214b3cedf8f879690e0d577f2b41f44b1ab6888d7e27bbc92f3ba4a69e8b127687fb4c43bff51fbc
|
7
|
+
data.tar.gz: f65e425cb84152d20a055bdb9a94fd98280597cdf5e431337cb8604040534cacbfdd03efd6dc23b86c9ecf25721c860bd55ca75ad3f98e4c66136a88c1efc4e7
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.1.
|
1
|
+
3.1.2
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.0.0-beta1 (2022-05-22)
|
4
|
+
- Update the jobs queue blocking engine and allow for non-blocking jobs execution
|
5
|
+
- Provide `#prepared` hook that always runs before the fetching loop is unblocked
|
6
|
+
- [Pro] Introduce performance tracker for scheduling optimizer
|
7
|
+
- Provide ability to pause (`#pause`) and resume (`#resume`) given partitions from the consumers
|
8
|
+
- Small integration specs refactoring + specs for pausing scenarios
|
9
|
+
|
3
10
|
## 2.0.0-alpha6 (2022-04-17)
|
4
11
|
- Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
|
5
12
|
- Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
|
@@ -26,12 +33,12 @@
|
|
26
33
|
|
27
34
|
## 2.0.0-alpha2 (2022-02-19)
|
28
35
|
- Require `kafka` keys to be symbols
|
29
|
-
- Added ActiveJob Pro adapter
|
36
|
+
- [Pro] Added ActiveJob Pro adapter
|
30
37
|
- Small updates to the license and docs
|
31
38
|
|
32
39
|
## 2.0.0-alpha1 (2022-01-30)
|
33
40
|
- Change license to `LGPL-3.0`
|
34
|
-
- Introduce a Pro subscription
|
41
|
+
- [Pro] Introduce a Pro subscription
|
35
42
|
- Switch from `ruby-kafka` to `librdkafka` as an underlying driver
|
36
43
|
- Introduce fully automatic integration tests that go through the whole server lifecycle
|
37
44
|
- Integrate WaterDrop tightly with autoconfiguration inheritance and an option to redefine it
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.0.0.
|
4
|
+
karafka (2.0.0.beta1)
|
5
5
|
dry-configurable (~> 0.13)
|
6
6
|
dry-monitor (~> 0.5)
|
7
7
|
dry-validation (~> 1.7)
|
@@ -13,10 +13,10 @@ PATH
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
-
activejob (7.0.
|
17
|
-
activesupport (= 7.0.
|
16
|
+
activejob (7.0.3)
|
17
|
+
activesupport (= 7.0.3)
|
18
18
|
globalid (>= 0.3.6)
|
19
|
-
activesupport (7.0.
|
19
|
+
activesupport (7.0.3)
|
20
20
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
21
21
|
i18n (>= 1.6, < 2)
|
22
22
|
minitest (>= 5.1)
|
@@ -25,7 +25,7 @@ GEM
|
|
25
25
|
concurrent-ruby (1.1.10)
|
26
26
|
diff-lcs (1.5.0)
|
27
27
|
docile (1.4.0)
|
28
|
-
dry-configurable (0.
|
28
|
+
dry-configurable (0.15.0)
|
29
29
|
concurrent-ruby (~> 1.0)
|
30
30
|
dry-core (~> 0.6)
|
31
31
|
dry-container (0.9.0)
|
@@ -121,4 +121,4 @@ DEPENDENCIES
|
|
121
121
|
simplecov
|
122
122
|
|
123
123
|
BUNDLED WITH
|
124
|
-
2.3.
|
124
|
+
2.3.11
|
data/docker-compose.yml
CHANGED
@@ -10,8 +10,8 @@ module Karafka
|
|
10
10
|
attr_accessor :messages
|
11
11
|
# @return [Karafka::Connection::Client] kafka connection client
|
12
12
|
attr_accessor :client
|
13
|
-
# @return [Karafka::TimeTrackers::Pause] current topic partition pause
|
14
|
-
attr_accessor :
|
13
|
+
# @return [Karafka::TimeTrackers::Pause] current topic partition pause tracker
|
14
|
+
attr_accessor :pause_tracker
|
15
15
|
# @return [Waterdrop::Producer] producer instance
|
16
16
|
attr_accessor :producer
|
17
17
|
|
@@ -24,7 +24,7 @@ module Karafka
|
|
24
24
|
Karafka.monitor.instrument('consumer.consumed', caller: self) do
|
25
25
|
consume
|
26
26
|
|
27
|
-
|
27
|
+
pause_tracker.reset
|
28
28
|
|
29
29
|
# Mark as consumed only if manual offset management is not on
|
30
30
|
return if topic.manual_offset_management
|
@@ -40,8 +40,8 @@ module Karafka
|
|
40
40
|
caller: self,
|
41
41
|
type: 'consumer.consume.error'
|
42
42
|
)
|
43
|
-
|
44
|
-
pause.
|
43
|
+
|
44
|
+
pause(@seek_offset || messages.first.offset)
|
45
45
|
end
|
46
46
|
|
47
47
|
# Trigger method for running on shutdown.
|
@@ -76,8 +76,31 @@ module Karafka
|
|
76
76
|
)
|
77
77
|
end
|
78
78
|
|
79
|
+
# Can be used to run preparation code
|
80
|
+
#
|
81
|
+
# @private
|
82
|
+
# @note This should not be used by the end users as it is part of the lifecycle of things but
|
83
|
+
# not as part of the public api. This can act as a hook when creating non-blocking
|
84
|
+
# consumers and doing other advanced stuff
|
85
|
+
def on_prepared
|
86
|
+
Karafka.monitor.instrument('consumer.prepared', caller: self) do
|
87
|
+
prepared
|
88
|
+
end
|
89
|
+
rescue StandardError => e
|
90
|
+
Karafka.monitor.instrument(
|
91
|
+
'error.occurred',
|
92
|
+
error: e,
|
93
|
+
caller: self,
|
94
|
+
type: 'consumer.prepared.error'
|
95
|
+
)
|
96
|
+
end
|
97
|
+
|
79
98
|
private
|
80
99
|
|
100
|
+
# Method that gets called in the blocking flow allowing to setup any type of resources or to
|
101
|
+
# send additional commands to Kafka before the proper execution starts.
|
102
|
+
def prepared; end
|
103
|
+
|
81
104
|
# Method that will perform business logic and on data received from Kafka (it will consume
|
82
105
|
# the data)
|
83
106
|
# @note This method needs bo be implemented in a subclass. We stub it here as a failover if
|
@@ -97,6 +120,10 @@ module Karafka
|
|
97
120
|
# Marks message as consumed in an async way.
|
98
121
|
#
|
99
122
|
# @param message [Messages::Message] last successfully processed message.
|
123
|
+
# @note We keep track of this offset in case we would mark as consumed and got error when
|
124
|
+
# processing another message. In case like this we do not pause on the message we've already
|
125
|
+
# processed but rather at the next one. This applies to both sync and async versions of this
|
126
|
+
# method.
|
100
127
|
def mark_as_consumed(message)
|
101
128
|
client.mark_as_consumed(message)
|
102
129
|
@seek_offset = message.offset + 1
|
@@ -110,6 +137,32 @@ module Karafka
|
|
110
137
|
@seek_offset = message.offset + 1
|
111
138
|
end
|
112
139
|
|
140
|
+
# Pauses processing on a given offset for the current topic partition
|
141
|
+
#
|
142
|
+
# After given partition is resumed, it will continue processing from the given offset
|
143
|
+
# @param offset [Integer] offset from which we want to restart the processing
|
144
|
+
# @param timeout [Integer, nil] how long in milliseconds do we want to pause or nil to use the
|
145
|
+
# default exponential pausing strategy defined for retries
|
146
|
+
def pause(offset, timeout = nil)
|
147
|
+
client.pause(
|
148
|
+
messages.metadata.topic,
|
149
|
+
messages.metadata.partition,
|
150
|
+
offset
|
151
|
+
)
|
152
|
+
|
153
|
+
timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
|
154
|
+
end
|
155
|
+
|
156
|
+
# Resumes processing of the current topic partition
|
157
|
+
def resume
|
158
|
+
client.resume(
|
159
|
+
messages.metadata.topic,
|
160
|
+
messages.metadata.partition
|
161
|
+
)
|
162
|
+
|
163
|
+
pause_tracker.expire
|
164
|
+
end
|
165
|
+
|
113
166
|
# Seeks in the context of current topic and partition
|
114
167
|
#
|
115
168
|
# @param offset [Integer] offset where we want to seek
|
@@ -15,6 +15,8 @@ module Karafka
|
|
15
15
|
@pauses_manager = PausesManager.new
|
16
16
|
@client = Client.new(@subscription_group)
|
17
17
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
18
|
+
# We reference scheduler here as it is much faster than fetching this each time
|
19
|
+
@scheduler = ::Karafka::App.config.internal.scheduler
|
18
20
|
end
|
19
21
|
|
20
22
|
# Runs the main listener fetch loop.
|
@@ -66,9 +68,9 @@ module Karafka
|
|
66
68
|
# distributing consuming jobs as upon revoking, we might get assigned to the same
|
67
69
|
# partitions, thus getting their jobs. The revoking jobs need to finish before
|
68
70
|
# appropriate consumers are taken down and re-created
|
69
|
-
wait(@subscription_group) if
|
71
|
+
wait(@subscription_group) if schedule_revoke_lost_partitions_jobs
|
70
72
|
|
71
|
-
|
73
|
+
schedule_partitions_jobs(messages_buffer)
|
72
74
|
|
73
75
|
# We wait only on jobs from our subscription group. Other groups are independent.
|
74
76
|
wait(@subscription_group)
|
@@ -103,15 +105,17 @@ module Karafka
|
|
103
105
|
|
104
106
|
# Enqueues revoking jobs for partitions that were taken away from the running process.
|
105
107
|
# @return [Boolean] was there anything to revoke
|
106
|
-
|
108
|
+
# @note We do not use scheduler here as those jobs are not meant to be order optimized in
|
109
|
+
# any way. Since they operate occasionally it is irrelevant.
|
110
|
+
def schedule_revoke_lost_partitions_jobs
|
107
111
|
revoked_partitions = @client.rebalance_manager.revoked_partitions
|
108
112
|
|
109
113
|
return false if revoked_partitions.empty?
|
110
114
|
|
111
115
|
revoked_partitions.each do |topic, partitions|
|
112
116
|
partitions.each do |partition|
|
113
|
-
|
114
|
-
executor = @executors.fetch(topic, partition,
|
117
|
+
pause_tracker = @pauses_manager.fetch(topic, partition)
|
118
|
+
executor = @executors.fetch(topic, partition, pause_tracker)
|
115
119
|
@jobs_queue << Processing::Jobs::Revoked.new(executor)
|
116
120
|
end
|
117
121
|
end
|
@@ -122,8 +126,8 @@ module Karafka
|
|
122
126
|
# Takes the messages per topic partition and enqueues processing jobs in threads.
|
123
127
|
#
|
124
128
|
# @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
|
125
|
-
def
|
126
|
-
messages_buffer
|
129
|
+
def schedule_partitions_jobs(messages_buffer)
|
130
|
+
@scheduler.call(messages_buffer) do |topic, partition, messages|
|
127
131
|
pause = @pauses_manager.fetch(topic, partition)
|
128
132
|
|
129
133
|
next if pause.paused?
|
@@ -10,6 +10,10 @@ module Karafka
|
|
10
10
|
class MessagesBuffer
|
11
11
|
attr_reader :size
|
12
12
|
|
13
|
+
extend Forwardable
|
14
|
+
|
15
|
+
def_delegators :@groups, :each
|
16
|
+
|
13
17
|
# @return [Karafka::Connection::MessagesBuffer] buffer instance
|
14
18
|
def initialize
|
15
19
|
@size = 0
|
@@ -20,19 +24,6 @@ module Karafka
|
|
20
24
|
end
|
21
25
|
end
|
22
26
|
|
23
|
-
# Iterates over aggregated data providing messages per topic partition.
|
24
|
-
#
|
25
|
-
# @yieldparam [String] topic name
|
26
|
-
# @yieldparam [Integer] partition number
|
27
|
-
# @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
|
28
|
-
def each
|
29
|
-
@groups.each do |topic, partitions|
|
30
|
-
partitions.each do |partition, messages|
|
31
|
-
yield(topic, partition, messages)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
27
|
# Adds a message to the buffer.
|
37
28
|
#
|
38
29
|
# @param message [Rdkafka::Consumer::Message] raw rdkafka message
|
@@ -12,11 +12,11 @@ module Karafka
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
# Creates or fetches pause of a given topic partition.
|
15
|
+
# Creates or fetches pause tracker of a given topic partition.
|
16
16
|
#
|
17
17
|
# @param topic [String] topic name
|
18
18
|
# @param partition [Integer] partition number
|
19
|
-
# @return [Karafka::TimeTrackers::Pause] pause instance
|
19
|
+
# @return [Karafka::TimeTrackers::Pause] pause tracker instance
|
20
20
|
def fetch(topic, partition)
|
21
21
|
@pauses[topic][partition] ||= TimeTrackers::Pause.new(
|
22
22
|
timeout: Karafka::App.config.pause_timeout,
|
@@ -1,18 +1,18 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
-
# and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
|
-
|
12
3
|
module Karafka
|
13
4
|
module Pro
|
14
5
|
# Karafka Pro ActiveJob components
|
15
6
|
module ActiveJob
|
7
|
+
# This Karafka component is a Pro component.
|
8
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
9
|
+
# repository and their usage requires commercial license agreement.
|
10
|
+
#
|
11
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
12
|
+
#
|
13
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
14
|
+
# of your code to Maciej Mensfeld.
|
15
|
+
|
16
16
|
# Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
|
17
17
|
# and that allows to inject additional options into the producer, effectively allowing for a
|
18
18
|
# much better and more granular control over the dispatch and consumption process.
|
@@ -1,17 +1,17 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
-
# and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
|
-
|
12
3
|
module Karafka
|
13
4
|
module Pro
|
14
5
|
module ActiveJob
|
6
|
+
# This Karafka component is a Pro component.
|
7
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
8
|
+
# repository and their usage requires commercial license agreement.
|
9
|
+
#
|
10
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
11
|
+
#
|
12
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
13
|
+
# of your code to Maciej Mensfeld.
|
14
|
+
|
15
15
|
# Contract for validating the options that can be altered with `#karafka_options` per job
|
16
16
|
# class that works with Pro features.
|
17
17
|
class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
-
# and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
3
|
module Karafka
|
12
4
|
module Pro
|
5
|
+
# This Karafka component is a Pro component.
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
12
|
+
# of your code to Maciej Mensfeld.
|
13
|
+
|
13
14
|
# Loader requires and loads all the pro components only when they are needed
|
14
15
|
class Loader
|
15
16
|
class << self
|
@@ -17,11 +18,15 @@ module Karafka
|
|
17
18
|
# @param config [Dry::Configurable::Config] whole app config that we can alter with pro
|
18
19
|
# components
|
19
20
|
def setup(config)
|
21
|
+
require_relative 'performance_tracker'
|
20
22
|
require_relative 'active_job/dispatcher'
|
21
23
|
require_relative 'active_job/job_options_contract'
|
22
24
|
|
23
25
|
config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
|
24
26
|
config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
27
|
+
|
28
|
+
# Monitor time needed to process each message from a single partition
|
29
|
+
config.monitor.subscribe(PerformanceTracker.instance)
|
25
30
|
end
|
26
31
|
end
|
27
32
|
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Pro
|
5
|
+
# This Karafka component is a Pro component.
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
12
|
+
# of your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
# Tracker used to keep track of performance metrics
|
15
|
+
# It provides insights that can be used to optimize processing flow
|
16
|
+
class PerformanceTracker
|
17
|
+
include Singleton
|
18
|
+
|
19
|
+
# How many samples do we collect per topic partition
|
20
|
+
SAMPLES_COUNT = 200
|
21
|
+
|
22
|
+
private_constant :SAMPLES_COUNT
|
23
|
+
|
24
|
+
# Builds up nested concurrent hash for data tracking
|
25
|
+
def initialize
|
26
|
+
@processing_times = Concurrent::Hash.new do |topics_hash, topic|
|
27
|
+
topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
|
28
|
+
# This array does not have to be concurrent because we always access single partition
|
29
|
+
# data via instrumentation that operates in a single thread via consumer
|
30
|
+
partitions_hash[partition] = []
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# @param topic [String]
|
36
|
+
# @param partition [Integer]
|
37
|
+
# @return [Float] p95 processing time of a single message from a single topic partition
|
38
|
+
def processing_time_p95(topic, partition)
|
39
|
+
values = @processing_times[topic][partition]
|
40
|
+
|
41
|
+
return 0 if values.empty?
|
42
|
+
return values.first if values.size == 1
|
43
|
+
|
44
|
+
percentile(0.95, values)
|
45
|
+
end
|
46
|
+
|
47
|
+
# @private
|
48
|
+
# @param event [Dry::Events::Event] event details
|
49
|
+
# Tracks time taken to process a single message of a given topic partition
|
50
|
+
def on_consumer_consumed(event)
|
51
|
+
consumer = event[:caller]
|
52
|
+
messages = consumer.messages
|
53
|
+
topic = messages.metadata.topic
|
54
|
+
partition = messages.metadata.partition
|
55
|
+
|
56
|
+
samples = @processing_times[topic][partition]
|
57
|
+
samples << event[:time] / messages.count
|
58
|
+
|
59
|
+
return unless samples.size > SAMPLES_COUNT
|
60
|
+
|
61
|
+
samples.shift
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
# Computers the requested percentile out of provided values
|
67
|
+
# @param percentile [Float]
|
68
|
+
# @param values [Array<String>] all the values based on which we should
|
69
|
+
# @return [Float] computed percentile
|
70
|
+
def percentile(percentile, values)
|
71
|
+
values_sorted = values.sort
|
72
|
+
|
73
|
+
floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
|
74
|
+
mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
|
75
|
+
|
76
|
+
values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -4,10 +4,10 @@ module Karafka
|
|
4
4
|
# Namespace that encapsulates all the logic related to processing data.
|
5
5
|
module Processing
|
6
6
|
# Executors:
|
7
|
-
# - run consumers code
|
8
|
-
#
|
9
|
-
# - they re-create consumer instances in case of partitions that were revoked
|
10
|
-
#
|
7
|
+
# - run consumers code (for `#call`) or run given preparation / teardown operations when needed
|
8
|
+
# from separate threads.
|
9
|
+
# - they re-create consumer instances in case of partitions that were revoked and assigned
|
10
|
+
# back.
|
11
11
|
#
|
12
12
|
# @note Executors are not removed after partition is revoked. They are not that big and will
|
13
13
|
# be re-used in case of a re-claim
|
@@ -21,21 +21,21 @@ module Karafka
|
|
21
21
|
# @param group_id [String] id of the subscription group to which the executor belongs
|
22
22
|
# @param client [Karafka::Connection::Client] kafka client
|
23
23
|
# @param topic [Karafka::Routing::Topic] topic for which this executor will run
|
24
|
-
# @param
|
25
|
-
def initialize(group_id, client, topic,
|
24
|
+
# @param pause_tracker [Karafka::TimeTrackers::Pause] fetch pause tracker for pausing
|
25
|
+
def initialize(group_id, client, topic, pause_tracker)
|
26
26
|
@id = SecureRandom.uuid
|
27
27
|
@group_id = group_id
|
28
28
|
@client = client
|
29
29
|
@topic = topic
|
30
|
-
@
|
30
|
+
@pause_tracker = pause_tracker
|
31
31
|
end
|
32
32
|
|
33
|
-
#
|
33
|
+
# Builds the consumer instance and sets all that is needed to run the user consumption logic
|
34
34
|
#
|
35
35
|
# @param messages [Array<Rdkafka::Consumer::Message>] raw rdkafka messages
|
36
36
|
# @param received_at [Time] the moment we've received the batch (actually the moment we've)
|
37
37
|
# enqueued it, but good enough
|
38
|
-
def
|
38
|
+
def prepare(messages, received_at)
|
39
39
|
# Recreate consumer with each batch if persistence is not enabled
|
40
40
|
# We reload the consumers with each batch instead of relying on some external signals
|
41
41
|
# when needed for consistency. That way devs may have it on or off and not in this
|
@@ -49,6 +49,11 @@ module Karafka
|
|
49
49
|
received_at
|
50
50
|
)
|
51
51
|
|
52
|
+
consumer.on_prepared
|
53
|
+
end
|
54
|
+
|
55
|
+
# Runs consumer data processing against given batch and handles failures and errors.
|
56
|
+
def consume
|
52
57
|
# We run the consumer client logic...
|
53
58
|
consumer.on_consume
|
54
59
|
end
|
@@ -86,7 +91,7 @@ module Karafka
|
|
86
91
|
consumer = @topic.consumer.new
|
87
92
|
consumer.topic = @topic
|
88
93
|
consumer.client = @client
|
89
|
-
consumer.
|
94
|
+
consumer.pause_tracker = @pause_tracker
|
90
95
|
consumer.producer = ::Karafka::App.producer
|
91
96
|
consumer
|
92
97
|
end
|
@@ -5,6 +5,8 @@ module Karafka
|
|
5
5
|
# Namespace for all the jobs that are suppose to run in workers.
|
6
6
|
module Jobs
|
7
7
|
# Base class for all the jobs types that are suppose to run in workers threads.
|
8
|
+
# Each job can have 3 main entry-points: `#prepare`, `#call` and `#teardown`
|
9
|
+
# Only `#call` is required.
|
8
10
|
class Base
|
9
11
|
extend Forwardable
|
10
12
|
|
@@ -12,6 +14,20 @@ module Karafka
|
|
12
14
|
def_delegators :executor, :id, :group_id
|
13
15
|
|
14
16
|
attr_reader :executor
|
17
|
+
|
18
|
+
# When redefined can run any code that should run before executing the proper code
|
19
|
+
def prepare; end
|
20
|
+
|
21
|
+
# When redefined can run any code that should run after executing the proper code
|
22
|
+
def teardown; end
|
23
|
+
|
24
|
+
# @return [Boolean] is this a non-blocking job
|
25
|
+
# @note Blocking job is a job, that will cause the job queue to wait until it is finished
|
26
|
+
# before removing the lock on new jobs being added
|
27
|
+
# @note All the jobs are blocking by default
|
28
|
+
def non_blocking?
|
29
|
+
false
|
30
|
+
end
|
15
31
|
end
|
16
32
|
end
|
17
33
|
end
|
@@ -18,9 +18,14 @@ module Karafka
|
|
18
18
|
super()
|
19
19
|
end
|
20
20
|
|
21
|
-
# Runs the
|
21
|
+
# Runs the preparations on the executor
|
22
|
+
def prepare
|
23
|
+
executor.prepare(@messages, @created_at)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Runs the given executor
|
22
27
|
def call
|
23
|
-
executor.consume
|
28
|
+
executor.consume
|
24
29
|
end
|
25
30
|
end
|
26
31
|
end
|
@@ -21,7 +21,7 @@ module Karafka
|
|
21
21
|
# We cannot use a single semaphore as it could potentially block in listeners that should
|
22
22
|
# process with their data and also could unlock when a given group needs to remain locked
|
23
23
|
@semaphores = Hash.new { |h, k| h[k] = Queue.new }
|
24
|
-
@in_processing = Hash.new { |h, k| h[k] =
|
24
|
+
@in_processing = Hash.new { |h, k| h[k] = [] }
|
25
25
|
@mutex = Mutex.new
|
26
26
|
end
|
27
27
|
|
@@ -44,9 +44,9 @@ module Karafka
|
|
44
44
|
@mutex.synchronize do
|
45
45
|
group = @in_processing[job.group_id]
|
46
46
|
|
47
|
-
raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.
|
47
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
|
48
48
|
|
49
|
-
group
|
49
|
+
group << job
|
50
50
|
end
|
51
51
|
|
52
52
|
@queue << job
|
@@ -60,14 +60,21 @@ module Karafka
|
|
60
60
|
@queue.pop
|
61
61
|
end
|
62
62
|
|
63
|
+
# Causes the wait lock to re-check the lock conditions and potential unlock.
|
64
|
+
# @param group_id [String] id of the group we want to unlock for one tick
|
65
|
+
# @note This does not release the wait lock. It just causes a conditions recheck
|
66
|
+
def tick(group_id)
|
67
|
+
@semaphores[group_id] << true
|
68
|
+
end
|
69
|
+
|
63
70
|
# Marks a given job from a given group as completed. When there are no more jobs from a given
|
64
71
|
# group to be executed, we won't wait.
|
65
72
|
#
|
66
73
|
# @param [Jobs::Base] job that was completed
|
67
74
|
def complete(job)
|
68
75
|
@mutex.synchronize do
|
69
|
-
@in_processing[job.group_id].delete(job
|
70
|
-
|
76
|
+
@in_processing[job.group_id].delete(job)
|
77
|
+
tick(job.group_id)
|
71
78
|
end
|
72
79
|
end
|
73
80
|
|
@@ -79,7 +86,7 @@ module Karafka
|
|
79
86
|
@mutex.synchronize do
|
80
87
|
@in_processing[group_id].clear
|
81
88
|
# We unlock it just in case it was blocked when clearing started
|
82
|
-
|
89
|
+
tick(group_id)
|
83
90
|
end
|
84
91
|
end
|
85
92
|
|
@@ -108,13 +115,15 @@ module Karafka
|
|
108
115
|
# @param group_id [String] id of the group in which jobs we're interested.
|
109
116
|
# @return [Boolean] should we keep waiting or not
|
110
117
|
def wait?(group_id)
|
118
|
+
group = @in_processing[group_id]
|
119
|
+
|
111
120
|
# If it is stopping, all the previous messages that are processed at the moment need to
|
112
121
|
# finish. Otherwise we may risk closing the client and committing offsets afterwards
|
113
|
-
return false if Karafka::App.stopping? &&
|
122
|
+
return false if Karafka::App.stopping? && group.empty?
|
114
123
|
return false if @queue.closed?
|
115
|
-
return false if
|
124
|
+
return false if group.empty?
|
116
125
|
|
117
|
-
|
126
|
+
!group.all?(&:non_blocking?)
|
118
127
|
end
|
119
128
|
end
|
120
129
|
end
|
@@ -4,6 +4,18 @@ module Karafka
|
|
4
4
|
module Processing
|
5
5
|
# Workers are used to run jobs in separate threads.
|
6
6
|
# Workers are the main processing units of the Karafka framework.
|
7
|
+
#
|
8
|
+
# Each job runs in three stages:
|
9
|
+
# - prepare - here we can run any code that we would need to run blocking before we allow
|
10
|
+
# the job to run fully async (non blocking). This will always run in a blocking
|
11
|
+
# way and can be used to make sure all the resources and external dependencies
|
12
|
+
# are satisfied before going async.
|
13
|
+
#
|
14
|
+
# - call - actual processing logic that can run sync or async
|
15
|
+
#
|
16
|
+
# - teardown - it should include any code that we want to run after we executed the user
|
17
|
+
# code. This can be used to unlock certain resources or do other things that are
|
18
|
+
# not user code but need to run after user code base is executed.
|
7
19
|
class Worker
|
8
20
|
extend Forwardable
|
9
21
|
|
@@ -33,7 +45,18 @@ module Karafka
|
|
33
45
|
job = @jobs_queue.pop
|
34
46
|
|
35
47
|
if job
|
48
|
+
job.prepare
|
49
|
+
|
50
|
+
# If a job is marked as non blocking, we can run a tick in the job queue and if there
|
51
|
+
# are no other blocking factors, the job queue will be unlocked.
|
52
|
+
# If this does not run, all the things will be blocking and job queue won't allow to
|
53
|
+
# pass it until done.
|
54
|
+
@jobs_queue.tick(job.group_id) if job.non_blocking?
|
55
|
+
|
36
56
|
job.call
|
57
|
+
|
58
|
+
job.teardown
|
59
|
+
|
37
60
|
true
|
38
61
|
else
|
39
62
|
false
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
# FIFO scheduler for messages coming from various topics and partitions
|
5
|
+
class Scheduler
|
6
|
+
# Yields messages from partitions in the fifo order
|
7
|
+
#
|
8
|
+
# @param messages_buffer [Karafka::Connection::MessagesBuffer] messages buffer with data from
|
9
|
+
# multiple topics and partitions
|
10
|
+
# @yieldparam [String] topic name
|
11
|
+
# @yieldparam [Integer] partition number
|
12
|
+
# @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
|
13
|
+
def call(messages_buffer)
|
14
|
+
messages_buffer.each do |topic, partitions|
|
15
|
+
partitions.each do |partition, messages|
|
16
|
+
yield(topic, partition, messages)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/karafka/setup/config.rb
CHANGED
@@ -96,6 +96,8 @@ module Karafka
|
|
96
96
|
# option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
|
97
97
|
# group builder
|
98
98
|
setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
|
99
|
+
# option scheduler [Class] scheduler we will be using
|
100
|
+
setting :scheduler, default: Scheduler.new
|
99
101
|
|
100
102
|
# Karafka components for ActiveJob
|
101
103
|
setting :active_job do
|
@@ -41,9 +41,12 @@ module Karafka
|
|
41
41
|
|
42
42
|
# Pauses the processing from now till the end of the interval (backoff or non-backoff)
|
43
43
|
# and records the count.
|
44
|
-
|
44
|
+
# @param timeout [Integer] timeout value in milliseconds that overwrites the default timeout
|
45
|
+
# @note Providing this value can be useful when we explicitly want to pause for a certain
|
46
|
+
# period of time, outside of any regular pausing logic
|
47
|
+
def pause(timeout = backoff_interval)
|
45
48
|
@started_at = now
|
46
|
-
@ends_at = @started_at +
|
49
|
+
@ends_at = @started_at + timeout
|
47
50
|
@count += 1
|
48
51
|
end
|
49
52
|
|
@@ -53,6 +56,11 @@ module Karafka
|
|
53
56
|
@ends_at = nil
|
54
57
|
end
|
55
58
|
|
59
|
+
# Expires the pause, so it can be considered expired
|
60
|
+
def expire
|
61
|
+
@ends_at = nil
|
62
|
+
end
|
63
|
+
|
56
64
|
# @return [Boolean] are we paused from processing
|
57
65
|
def paused?
|
58
66
|
!@started_at.nil?
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.0.
|
4
|
+
version: 2.0.0.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -34,7 +34,7 @@ cert_chain:
|
|
34
34
|
R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
|
35
35
|
pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
|
36
36
|
-----END CERTIFICATE-----
|
37
|
-
date: 2022-
|
37
|
+
date: 2022-05-22 00:00:00.000000000 Z
|
38
38
|
dependencies:
|
39
39
|
- !ruby/object:Gem::Dependency
|
40
40
|
name: dry-configurable
|
@@ -228,6 +228,7 @@ files:
|
|
228
228
|
- lib/karafka/pro/active_job/dispatcher.rb
|
229
229
|
- lib/karafka/pro/active_job/job_options_contract.rb
|
230
230
|
- lib/karafka/pro/loader.rb
|
231
|
+
- lib/karafka/pro/performance_tracker.rb
|
231
232
|
- lib/karafka/process.rb
|
232
233
|
- lib/karafka/processing/executor.rb
|
233
234
|
- lib/karafka/processing/executors_buffer.rb
|
@@ -248,6 +249,7 @@ files:
|
|
248
249
|
- lib/karafka/routing/subscription_groups_builder.rb
|
249
250
|
- lib/karafka/routing/topic.rb
|
250
251
|
- lib/karafka/runner.rb
|
252
|
+
- lib/karafka/scheduler.rb
|
251
253
|
- lib/karafka/serialization/json/deserializer.rb
|
252
254
|
- lib/karafka/server.rb
|
253
255
|
- lib/karafka/setup/config.rb
|
@@ -282,7 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
282
284
|
- !ruby/object:Gem::Version
|
283
285
|
version: 1.3.1
|
284
286
|
requirements: []
|
285
|
-
rubygems_version: 3.3.
|
287
|
+
rubygems_version: 3.3.7
|
286
288
|
signing_key:
|
287
289
|
specification_version: 4
|
288
290
|
summary: Ruby based framework for working with Apache Kafka
|
metadata.gz.sig
CHANGED
Binary file
|