karafka 2.0.0.alpha6 → 2.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +9 -2
- data/Gemfile.lock +6 -6
- data/docker-compose.yml +1 -0
- data/lib/karafka/base_consumer.rb +58 -5
- data/lib/karafka/connection/listener.rb +11 -7
- data/lib/karafka/connection/messages_buffer.rb +4 -13
- data/lib/karafka/connection/pauses_manager.rb +2 -2
- data/lib/karafka/contracts/config.rb +1 -0
- data/lib/karafka/instrumentation/monitor.rb +1 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +9 -9
- data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
- data/lib/karafka/pro/loader.rb +13 -8
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/processing/executor.rb +15 -10
- data/lib/karafka/processing/jobs/base.rb +16 -0
- data/lib/karafka/processing/jobs/consume.rb +7 -2
- data/lib/karafka/processing/jobs_queue.rb +18 -9
- data/lib/karafka/processing/worker.rb +23 -0
- data/lib/karafka/scheduler.rb +21 -0
- data/lib/karafka/setup/config.rb +2 -0
- data/lib/karafka/time_trackers/pause.rb +10 -2
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +5 -3
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f108cb4288d0ed0510381f51c77d49e052b947f6180c9b9c0b06e0ac2b599894
|
4
|
+
data.tar.gz: 3d79066d0107c08f450ca9f4c3b5c4a39aae497836c80bf8380c65f1406b82c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4aae257010c992c59ce4b01ead54ff2cfd4e8ccd8cbe6b52214b3cedf8f879690e0d577f2b41f44b1ab6888d7e27bbc92f3ba4a69e8b127687fb4c43bff51fbc
|
7
|
+
data.tar.gz: f65e425cb84152d20a055bdb9a94fd98280597cdf5e431337cb8604040534cacbfdd03efd6dc23b86c9ecf25721c860bd55ca75ad3f98e4c66136a88c1efc4e7
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.1.
|
1
|
+
3.1.2
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.0.0-beta1 (2022-05-22)
|
4
|
+
- Update the jobs queue blocking engine and allow for non-blocking jobs execution
|
5
|
+
- Provide `#prepared` hook that always runs before the fetching loop is unblocked
|
6
|
+
- [Pro] Introduce performance tracker for scheduling optimizer
|
7
|
+
- Provide ability to pause (`#pause`) and resume (`#resume`) given partitions from the consumers
|
8
|
+
- Small integration specs refactoring + specs for pausing scenarios
|
9
|
+
|
3
10
|
## 2.0.0-alpha6 (2022-04-17)
|
4
11
|
- Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
|
5
12
|
- Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
|
@@ -26,12 +33,12 @@
|
|
26
33
|
|
27
34
|
## 2.0.0-alpha2 (2022-02-19)
|
28
35
|
- Require `kafka` keys to be symbols
|
29
|
-
- Added ActiveJob Pro adapter
|
36
|
+
- [Pro] Added ActiveJob Pro adapter
|
30
37
|
- Small updates to the license and docs
|
31
38
|
|
32
39
|
## 2.0.0-alpha1 (2022-01-30)
|
33
40
|
- Change license to `LGPL-3.0`
|
34
|
-
- Introduce a Pro subscription
|
41
|
+
- [Pro] Introduce a Pro subscription
|
35
42
|
- Switch from `ruby-kafka` to `librdkafka` as an underlying driver
|
36
43
|
- Introduce fully automatic integration tests that go through the whole server lifecycle
|
37
44
|
- Integrate WaterDrop tightly with autoconfiguration inheritance and an option to redefine it
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.0.0.
|
4
|
+
karafka (2.0.0.beta1)
|
5
5
|
dry-configurable (~> 0.13)
|
6
6
|
dry-monitor (~> 0.5)
|
7
7
|
dry-validation (~> 1.7)
|
@@ -13,10 +13,10 @@ PATH
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
-
activejob (7.0.
|
17
|
-
activesupport (= 7.0.
|
16
|
+
activejob (7.0.3)
|
17
|
+
activesupport (= 7.0.3)
|
18
18
|
globalid (>= 0.3.6)
|
19
|
-
activesupport (7.0.
|
19
|
+
activesupport (7.0.3)
|
20
20
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
21
21
|
i18n (>= 1.6, < 2)
|
22
22
|
minitest (>= 5.1)
|
@@ -25,7 +25,7 @@ GEM
|
|
25
25
|
concurrent-ruby (1.1.10)
|
26
26
|
diff-lcs (1.5.0)
|
27
27
|
docile (1.4.0)
|
28
|
-
dry-configurable (0.
|
28
|
+
dry-configurable (0.15.0)
|
29
29
|
concurrent-ruby (~> 1.0)
|
30
30
|
dry-core (~> 0.6)
|
31
31
|
dry-container (0.9.0)
|
@@ -121,4 +121,4 @@ DEPENDENCIES
|
|
121
121
|
simplecov
|
122
122
|
|
123
123
|
BUNDLED WITH
|
124
|
-
2.3.
|
124
|
+
2.3.11
|
data/docker-compose.yml
CHANGED
@@ -10,8 +10,8 @@ module Karafka
|
|
10
10
|
attr_accessor :messages
|
11
11
|
# @return [Karafka::Connection::Client] kafka connection client
|
12
12
|
attr_accessor :client
|
13
|
-
# @return [Karafka::TimeTrackers::Pause] current topic partition pause
|
14
|
-
attr_accessor :
|
13
|
+
# @return [Karafka::TimeTrackers::Pause] current topic partition pause tracker
|
14
|
+
attr_accessor :pause_tracker
|
15
15
|
# @return [Waterdrop::Producer] producer instance
|
16
16
|
attr_accessor :producer
|
17
17
|
|
@@ -24,7 +24,7 @@ module Karafka
|
|
24
24
|
Karafka.monitor.instrument('consumer.consumed', caller: self) do
|
25
25
|
consume
|
26
26
|
|
27
|
-
|
27
|
+
pause_tracker.reset
|
28
28
|
|
29
29
|
# Mark as consumed only if manual offset management is not on
|
30
30
|
return if topic.manual_offset_management
|
@@ -40,8 +40,8 @@ module Karafka
|
|
40
40
|
caller: self,
|
41
41
|
type: 'consumer.consume.error'
|
42
42
|
)
|
43
|
-
|
44
|
-
pause.
|
43
|
+
|
44
|
+
pause(@seek_offset || messages.first.offset)
|
45
45
|
end
|
46
46
|
|
47
47
|
# Trigger method for running on shutdown.
|
@@ -76,8 +76,31 @@ module Karafka
|
|
76
76
|
)
|
77
77
|
end
|
78
78
|
|
79
|
+
# Can be used to run preparation code
|
80
|
+
#
|
81
|
+
# @private
|
82
|
+
# @note This should not be used by the end users as it is part of the lifecycle of things but
|
83
|
+
# not as part of the public api. This can act as a hook when creating non-blocking
|
84
|
+
# consumers and doing other advanced stuff
|
85
|
+
def on_prepared
|
86
|
+
Karafka.monitor.instrument('consumer.prepared', caller: self) do
|
87
|
+
prepared
|
88
|
+
end
|
89
|
+
rescue StandardError => e
|
90
|
+
Karafka.monitor.instrument(
|
91
|
+
'error.occurred',
|
92
|
+
error: e,
|
93
|
+
caller: self,
|
94
|
+
type: 'consumer.prepared.error'
|
95
|
+
)
|
96
|
+
end
|
97
|
+
|
79
98
|
private
|
80
99
|
|
100
|
+
# Method that gets called in the blocking flow allowing to setup any type of resources or to
|
101
|
+
# send additional commands to Kafka before the proper execution starts.
|
102
|
+
def prepared; end
|
103
|
+
|
81
104
|
# Method that will perform business logic and on data received from Kafka (it will consume
|
82
105
|
# the data)
|
83
106
|
# @note This method needs bo be implemented in a subclass. We stub it here as a failover if
|
@@ -97,6 +120,10 @@ module Karafka
|
|
97
120
|
# Marks message as consumed in an async way.
|
98
121
|
#
|
99
122
|
# @param message [Messages::Message] last successfully processed message.
|
123
|
+
# @note We keep track of this offset in case we would mark as consumed and got error when
|
124
|
+
# processing another message. In case like this we do not pause on the message we've already
|
125
|
+
# processed but rather at the next one. This applies to both sync and async versions of this
|
126
|
+
# method.
|
100
127
|
def mark_as_consumed(message)
|
101
128
|
client.mark_as_consumed(message)
|
102
129
|
@seek_offset = message.offset + 1
|
@@ -110,6 +137,32 @@ module Karafka
|
|
110
137
|
@seek_offset = message.offset + 1
|
111
138
|
end
|
112
139
|
|
140
|
+
# Pauses processing on a given offset for the current topic partition
|
141
|
+
#
|
142
|
+
# After given partition is resumed, it will continue processing from the given offset
|
143
|
+
# @param offset [Integer] offset from which we want to restart the processing
|
144
|
+
# @param timeout [Integer, nil] how long in milliseconds do we want to pause or nil to use the
|
145
|
+
# default exponential pausing strategy defined for retries
|
146
|
+
def pause(offset, timeout = nil)
|
147
|
+
client.pause(
|
148
|
+
messages.metadata.topic,
|
149
|
+
messages.metadata.partition,
|
150
|
+
offset
|
151
|
+
)
|
152
|
+
|
153
|
+
timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
|
154
|
+
end
|
155
|
+
|
156
|
+
# Resumes processing of the current topic partition
|
157
|
+
def resume
|
158
|
+
client.resume(
|
159
|
+
messages.metadata.topic,
|
160
|
+
messages.metadata.partition
|
161
|
+
)
|
162
|
+
|
163
|
+
pause_tracker.expire
|
164
|
+
end
|
165
|
+
|
113
166
|
# Seeks in the context of current topic and partition
|
114
167
|
#
|
115
168
|
# @param offset [Integer] offset where we want to seek
|
@@ -15,6 +15,8 @@ module Karafka
|
|
15
15
|
@pauses_manager = PausesManager.new
|
16
16
|
@client = Client.new(@subscription_group)
|
17
17
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
18
|
+
# We reference scheduler here as it is much faster than fetching this each time
|
19
|
+
@scheduler = ::Karafka::App.config.internal.scheduler
|
18
20
|
end
|
19
21
|
|
20
22
|
# Runs the main listener fetch loop.
|
@@ -66,9 +68,9 @@ module Karafka
|
|
66
68
|
# distributing consuming jobs as upon revoking, we might get assigned to the same
|
67
69
|
# partitions, thus getting their jobs. The revoking jobs need to finish before
|
68
70
|
# appropriate consumers are taken down and re-created
|
69
|
-
wait(@subscription_group) if
|
71
|
+
wait(@subscription_group) if schedule_revoke_lost_partitions_jobs
|
70
72
|
|
71
|
-
|
73
|
+
schedule_partitions_jobs(messages_buffer)
|
72
74
|
|
73
75
|
# We wait only on jobs from our subscription group. Other groups are independent.
|
74
76
|
wait(@subscription_group)
|
@@ -103,15 +105,17 @@ module Karafka
|
|
103
105
|
|
104
106
|
# Enqueues revoking jobs for partitions that were taken away from the running process.
|
105
107
|
# @return [Boolean] was there anything to revoke
|
106
|
-
|
108
|
+
# @note We do not use scheduler here as those jobs are not meant to be order optimized in
|
109
|
+
# any way. Since they operate occasionally it is irrelevant.
|
110
|
+
def schedule_revoke_lost_partitions_jobs
|
107
111
|
revoked_partitions = @client.rebalance_manager.revoked_partitions
|
108
112
|
|
109
113
|
return false if revoked_partitions.empty?
|
110
114
|
|
111
115
|
revoked_partitions.each do |topic, partitions|
|
112
116
|
partitions.each do |partition|
|
113
|
-
|
114
|
-
executor = @executors.fetch(topic, partition,
|
117
|
+
pause_tracker = @pauses_manager.fetch(topic, partition)
|
118
|
+
executor = @executors.fetch(topic, partition, pause_tracker)
|
115
119
|
@jobs_queue << Processing::Jobs::Revoked.new(executor)
|
116
120
|
end
|
117
121
|
end
|
@@ -122,8 +126,8 @@ module Karafka
|
|
122
126
|
# Takes the messages per topic partition and enqueues processing jobs in threads.
|
123
127
|
#
|
124
128
|
# @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
|
125
|
-
def
|
126
|
-
messages_buffer
|
129
|
+
def schedule_partitions_jobs(messages_buffer)
|
130
|
+
@scheduler.call(messages_buffer) do |topic, partition, messages|
|
127
131
|
pause = @pauses_manager.fetch(topic, partition)
|
128
132
|
|
129
133
|
next if pause.paused?
|
@@ -10,6 +10,10 @@ module Karafka
|
|
10
10
|
class MessagesBuffer
|
11
11
|
attr_reader :size
|
12
12
|
|
13
|
+
extend Forwardable
|
14
|
+
|
15
|
+
def_delegators :@groups, :each
|
16
|
+
|
13
17
|
# @return [Karafka::Connection::MessagesBuffer] buffer instance
|
14
18
|
def initialize
|
15
19
|
@size = 0
|
@@ -20,19 +24,6 @@ module Karafka
|
|
20
24
|
end
|
21
25
|
end
|
22
26
|
|
23
|
-
# Iterates over aggregated data providing messages per topic partition.
|
24
|
-
#
|
25
|
-
# @yieldparam [String] topic name
|
26
|
-
# @yieldparam [Integer] partition number
|
27
|
-
# @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
|
28
|
-
def each
|
29
|
-
@groups.each do |topic, partitions|
|
30
|
-
partitions.each do |partition, messages|
|
31
|
-
yield(topic, partition, messages)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
27
|
# Adds a message to the buffer.
|
37
28
|
#
|
38
29
|
# @param message [Rdkafka::Consumer::Message] raw rdkafka message
|
@@ -12,11 +12,11 @@ module Karafka
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
# Creates or fetches pause of a given topic partition.
|
15
|
+
# Creates or fetches pause tracker of a given topic partition.
|
16
16
|
#
|
17
17
|
# @param topic [String] topic name
|
18
18
|
# @param partition [Integer] partition number
|
19
|
-
# @return [Karafka::TimeTrackers::Pause] pause instance
|
19
|
+
# @return [Karafka::TimeTrackers::Pause] pause tracker instance
|
20
20
|
def fetch(topic, partition)
|
21
21
|
@pauses[topic][partition] ||= TimeTrackers::Pause.new(
|
22
22
|
timeout: Karafka::App.config.pause_timeout,
|
@@ -1,18 +1,18 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
-
# and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
|
-
|
12
3
|
module Karafka
|
13
4
|
module Pro
|
14
5
|
# Karafka Pro ActiveJob components
|
15
6
|
module ActiveJob
|
7
|
+
# This Karafka component is a Pro component.
|
8
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
9
|
+
# repository and their usage requires commercial license agreement.
|
10
|
+
#
|
11
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
12
|
+
#
|
13
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
14
|
+
# of your code to Maciej Mensfeld.
|
15
|
+
|
16
16
|
# Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
|
17
17
|
# and that allows to inject additional options into the producer, effectively allowing for a
|
18
18
|
# much better and more granular control over the dispatch and consumption process.
|
@@ -1,17 +1,17 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
-
# and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
|
-
|
12
3
|
module Karafka
|
13
4
|
module Pro
|
14
5
|
module ActiveJob
|
6
|
+
# This Karafka component is a Pro component.
|
7
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
8
|
+
# repository and their usage requires commercial license agreement.
|
9
|
+
#
|
10
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
11
|
+
#
|
12
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
13
|
+
# of your code to Maciej Mensfeld.
|
14
|
+
|
15
15
|
# Contract for validating the options that can be altered with `#karafka_options` per job
|
16
16
|
# class that works with Pro features.
|
17
17
|
class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
-
# and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
3
|
module Karafka
|
12
4
|
module Pro
|
5
|
+
# This Karafka component is a Pro component.
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
12
|
+
# of your code to Maciej Mensfeld.
|
13
|
+
|
13
14
|
# Loader requires and loads all the pro components only when they are needed
|
14
15
|
class Loader
|
15
16
|
class << self
|
@@ -17,11 +18,15 @@ module Karafka
|
|
17
18
|
# @param config [Dry::Configurable::Config] whole app config that we can alter with pro
|
18
19
|
# components
|
19
20
|
def setup(config)
|
21
|
+
require_relative 'performance_tracker'
|
20
22
|
require_relative 'active_job/dispatcher'
|
21
23
|
require_relative 'active_job/job_options_contract'
|
22
24
|
|
23
25
|
config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
|
24
26
|
config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
27
|
+
|
28
|
+
# Monitor time needed to process each message from a single partition
|
29
|
+
config.monitor.subscribe(PerformanceTracker.instance)
|
25
30
|
end
|
26
31
|
end
|
27
32
|
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Pro
|
5
|
+
# This Karafka component is a Pro component.
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
12
|
+
# of your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
# Tracker used to keep track of performance metrics
|
15
|
+
# It provides insights that can be used to optimize processing flow
|
16
|
+
class PerformanceTracker
|
17
|
+
include Singleton
|
18
|
+
|
19
|
+
# How many samples do we collect per topic partition
|
20
|
+
SAMPLES_COUNT = 200
|
21
|
+
|
22
|
+
private_constant :SAMPLES_COUNT
|
23
|
+
|
24
|
+
# Builds up nested concurrent hash for data tracking
|
25
|
+
def initialize
|
26
|
+
@processing_times = Concurrent::Hash.new do |topics_hash, topic|
|
27
|
+
topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
|
28
|
+
# This array does not have to be concurrent because we always access single partition
|
29
|
+
# data via instrumentation that operates in a single thread via consumer
|
30
|
+
partitions_hash[partition] = []
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# @param topic [String]
|
36
|
+
# @param partition [Integer]
|
37
|
+
# @return [Float] p95 processing time of a single message from a single topic partition
|
38
|
+
def processing_time_p95(topic, partition)
|
39
|
+
values = @processing_times[topic][partition]
|
40
|
+
|
41
|
+
return 0 if values.empty?
|
42
|
+
return values.first if values.size == 1
|
43
|
+
|
44
|
+
percentile(0.95, values)
|
45
|
+
end
|
46
|
+
|
47
|
+
# @private
|
48
|
+
# @param event [Dry::Events::Event] event details
|
49
|
+
# Tracks time taken to process a single message of a given topic partition
|
50
|
+
def on_consumer_consumed(event)
|
51
|
+
consumer = event[:caller]
|
52
|
+
messages = consumer.messages
|
53
|
+
topic = messages.metadata.topic
|
54
|
+
partition = messages.metadata.partition
|
55
|
+
|
56
|
+
samples = @processing_times[topic][partition]
|
57
|
+
samples << event[:time] / messages.count
|
58
|
+
|
59
|
+
return unless samples.size > SAMPLES_COUNT
|
60
|
+
|
61
|
+
samples.shift
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
# Computers the requested percentile out of provided values
|
67
|
+
# @param percentile [Float]
|
68
|
+
# @param values [Array<String>] all the values based on which we should
|
69
|
+
# @return [Float] computed percentile
|
70
|
+
def percentile(percentile, values)
|
71
|
+
values_sorted = values.sort
|
72
|
+
|
73
|
+
floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
|
74
|
+
mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
|
75
|
+
|
76
|
+
values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -4,10 +4,10 @@ module Karafka
|
|
4
4
|
# Namespace that encapsulates all the logic related to processing data.
|
5
5
|
module Processing
|
6
6
|
# Executors:
|
7
|
-
# - run consumers code
|
8
|
-
#
|
9
|
-
# - they re-create consumer instances in case of partitions that were revoked
|
10
|
-
#
|
7
|
+
# - run consumers code (for `#call`) or run given preparation / teardown operations when needed
|
8
|
+
# from separate threads.
|
9
|
+
# - they re-create consumer instances in case of partitions that were revoked and assigned
|
10
|
+
# back.
|
11
11
|
#
|
12
12
|
# @note Executors are not removed after partition is revoked. They are not that big and will
|
13
13
|
# be re-used in case of a re-claim
|
@@ -21,21 +21,21 @@ module Karafka
|
|
21
21
|
# @param group_id [String] id of the subscription group to which the executor belongs
|
22
22
|
# @param client [Karafka::Connection::Client] kafka client
|
23
23
|
# @param topic [Karafka::Routing::Topic] topic for which this executor will run
|
24
|
-
# @param
|
25
|
-
def initialize(group_id, client, topic,
|
24
|
+
# @param pause_tracker [Karafka::TimeTrackers::Pause] fetch pause tracker for pausing
|
25
|
+
def initialize(group_id, client, topic, pause_tracker)
|
26
26
|
@id = SecureRandom.uuid
|
27
27
|
@group_id = group_id
|
28
28
|
@client = client
|
29
29
|
@topic = topic
|
30
|
-
@
|
30
|
+
@pause_tracker = pause_tracker
|
31
31
|
end
|
32
32
|
|
33
|
-
#
|
33
|
+
# Builds the consumer instance and sets all that is needed to run the user consumption logic
|
34
34
|
#
|
35
35
|
# @param messages [Array<Rdkafka::Consumer::Message>] raw rdkafka messages
|
36
36
|
# @param received_at [Time] the moment we've received the batch (actually the moment we've)
|
37
37
|
# enqueued it, but good enough
|
38
|
-
def
|
38
|
+
def prepare(messages, received_at)
|
39
39
|
# Recreate consumer with each batch if persistence is not enabled
|
40
40
|
# We reload the consumers with each batch instead of relying on some external signals
|
41
41
|
# when needed for consistency. That way devs may have it on or off and not in this
|
@@ -49,6 +49,11 @@ module Karafka
|
|
49
49
|
received_at
|
50
50
|
)
|
51
51
|
|
52
|
+
consumer.on_prepared
|
53
|
+
end
|
54
|
+
|
55
|
+
# Runs consumer data processing against given batch and handles failures and errors.
|
56
|
+
def consume
|
52
57
|
# We run the consumer client logic...
|
53
58
|
consumer.on_consume
|
54
59
|
end
|
@@ -86,7 +91,7 @@ module Karafka
|
|
86
91
|
consumer = @topic.consumer.new
|
87
92
|
consumer.topic = @topic
|
88
93
|
consumer.client = @client
|
89
|
-
consumer.
|
94
|
+
consumer.pause_tracker = @pause_tracker
|
90
95
|
consumer.producer = ::Karafka::App.producer
|
91
96
|
consumer
|
92
97
|
end
|
@@ -5,6 +5,8 @@ module Karafka
|
|
5
5
|
# Namespace for all the jobs that are suppose to run in workers.
|
6
6
|
module Jobs
|
7
7
|
# Base class for all the jobs types that are suppose to run in workers threads.
|
8
|
+
# Each job can have 3 main entry-points: `#prepare`, `#call` and `#teardown`
|
9
|
+
# Only `#call` is required.
|
8
10
|
class Base
|
9
11
|
extend Forwardable
|
10
12
|
|
@@ -12,6 +14,20 @@ module Karafka
|
|
12
14
|
def_delegators :executor, :id, :group_id
|
13
15
|
|
14
16
|
attr_reader :executor
|
17
|
+
|
18
|
+
# When redefined can run any code that should run before executing the proper code
|
19
|
+
def prepare; end
|
20
|
+
|
21
|
+
# When redefined can run any code that should run after executing the proper code
|
22
|
+
def teardown; end
|
23
|
+
|
24
|
+
# @return [Boolean] is this a non-blocking job
|
25
|
+
# @note Blocking job is a job, that will cause the job queue to wait until it is finished
|
26
|
+
# before removing the lock on new jobs being added
|
27
|
+
# @note All the jobs are blocking by default
|
28
|
+
def non_blocking?
|
29
|
+
false
|
30
|
+
end
|
15
31
|
end
|
16
32
|
end
|
17
33
|
end
|
@@ -18,9 +18,14 @@ module Karafka
|
|
18
18
|
super()
|
19
19
|
end
|
20
20
|
|
21
|
-
# Runs the
|
21
|
+
# Runs the preparations on the executor
|
22
|
+
def prepare
|
23
|
+
executor.prepare(@messages, @created_at)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Runs the given executor
|
22
27
|
def call
|
23
|
-
executor.consume
|
28
|
+
executor.consume
|
24
29
|
end
|
25
30
|
end
|
26
31
|
end
|
@@ -21,7 +21,7 @@ module Karafka
|
|
21
21
|
# We cannot use a single semaphore as it could potentially block in listeners that should
|
22
22
|
# process with their data and also could unlock when a given group needs to remain locked
|
23
23
|
@semaphores = Hash.new { |h, k| h[k] = Queue.new }
|
24
|
-
@in_processing = Hash.new { |h, k| h[k] =
|
24
|
+
@in_processing = Hash.new { |h, k| h[k] = [] }
|
25
25
|
@mutex = Mutex.new
|
26
26
|
end
|
27
27
|
|
@@ -44,9 +44,9 @@ module Karafka
|
|
44
44
|
@mutex.synchronize do
|
45
45
|
group = @in_processing[job.group_id]
|
46
46
|
|
47
|
-
raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.
|
47
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
|
48
48
|
|
49
|
-
group
|
49
|
+
group << job
|
50
50
|
end
|
51
51
|
|
52
52
|
@queue << job
|
@@ -60,14 +60,21 @@ module Karafka
|
|
60
60
|
@queue.pop
|
61
61
|
end
|
62
62
|
|
63
|
+
# Causes the wait lock to re-check the lock conditions and potential unlock.
|
64
|
+
# @param group_id [String] id of the group we want to unlock for one tick
|
65
|
+
# @note This does not release the wait lock. It just causes a conditions recheck
|
66
|
+
def tick(group_id)
|
67
|
+
@semaphores[group_id] << true
|
68
|
+
end
|
69
|
+
|
63
70
|
# Marks a given job from a given group as completed. When there are no more jobs from a given
|
64
71
|
# group to be executed, we won't wait.
|
65
72
|
#
|
66
73
|
# @param [Jobs::Base] job that was completed
|
67
74
|
def complete(job)
|
68
75
|
@mutex.synchronize do
|
69
|
-
@in_processing[job.group_id].delete(job
|
70
|
-
|
76
|
+
@in_processing[job.group_id].delete(job)
|
77
|
+
tick(job.group_id)
|
71
78
|
end
|
72
79
|
end
|
73
80
|
|
@@ -79,7 +86,7 @@ module Karafka
|
|
79
86
|
@mutex.synchronize do
|
80
87
|
@in_processing[group_id].clear
|
81
88
|
# We unlock it just in case it was blocked when clearing started
|
82
|
-
|
89
|
+
tick(group_id)
|
83
90
|
end
|
84
91
|
end
|
85
92
|
|
@@ -108,13 +115,15 @@ module Karafka
|
|
108
115
|
# @param group_id [String] id of the group in which jobs we're interested.
|
109
116
|
# @return [Boolean] should we keep waiting or not
|
110
117
|
def wait?(group_id)
|
118
|
+
group = @in_processing[group_id]
|
119
|
+
|
111
120
|
# If it is stopping, all the previous messages that are processed at the moment need to
|
112
121
|
# finish. Otherwise we may risk closing the client and committing offsets afterwards
|
113
|
-
return false if Karafka::App.stopping? &&
|
122
|
+
return false if Karafka::App.stopping? && group.empty?
|
114
123
|
return false if @queue.closed?
|
115
|
-
return false if
|
124
|
+
return false if group.empty?
|
116
125
|
|
117
|
-
|
126
|
+
!group.all?(&:non_blocking?)
|
118
127
|
end
|
119
128
|
end
|
120
129
|
end
|
@@ -4,6 +4,18 @@ module Karafka
|
|
4
4
|
module Processing
|
5
5
|
# Workers are used to run jobs in separate threads.
|
6
6
|
# Workers are the main processing units of the Karafka framework.
|
7
|
+
#
|
8
|
+
# Each job runs in three stages:
|
9
|
+
# - prepare - here we can run any code that we would need to run blocking before we allow
|
10
|
+
# the job to run fully async (non blocking). This will always run in a blocking
|
11
|
+
# way and can be used to make sure all the resources and external dependencies
|
12
|
+
# are satisfied before going async.
|
13
|
+
#
|
14
|
+
# - call - actual processing logic that can run sync or async
|
15
|
+
#
|
16
|
+
# - teardown - it should include any code that we want to run after we executed the user
|
17
|
+
# code. This can be used to unlock certain resources or do other things that are
|
18
|
+
# not user code but need to run after user code base is executed.
|
7
19
|
class Worker
|
8
20
|
extend Forwardable
|
9
21
|
|
@@ -33,7 +45,18 @@ module Karafka
|
|
33
45
|
job = @jobs_queue.pop
|
34
46
|
|
35
47
|
if job
|
48
|
+
job.prepare
|
49
|
+
|
50
|
+
# If a job is marked as non blocking, we can run a tick in the job queue and if there
|
51
|
+
# are no other blocking factors, the job queue will be unlocked.
|
52
|
+
# If this does not run, all the things will be blocking and job queue won't allow to
|
53
|
+
# pass it until done.
|
54
|
+
@jobs_queue.tick(job.group_id) if job.non_blocking?
|
55
|
+
|
36
56
|
job.call
|
57
|
+
|
58
|
+
job.teardown
|
59
|
+
|
37
60
|
true
|
38
61
|
else
|
39
62
|
false
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
# FIFO scheduler for messages coming from various topics and partitions
|
5
|
+
class Scheduler
|
6
|
+
# Yields messages from partitions in the fifo order
|
7
|
+
#
|
8
|
+
# @param messages_buffer [Karafka::Connection::MessagesBuffer] messages buffer with data from
|
9
|
+
# multiple topics and partitions
|
10
|
+
# @yieldparam [String] topic name
|
11
|
+
# @yieldparam [Integer] partition number
|
12
|
+
# @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
|
13
|
+
def call(messages_buffer)
|
14
|
+
messages_buffer.each do |topic, partitions|
|
15
|
+
partitions.each do |partition, messages|
|
16
|
+
yield(topic, partition, messages)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/karafka/setup/config.rb
CHANGED
@@ -96,6 +96,8 @@ module Karafka
|
|
96
96
|
# option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
|
97
97
|
# group builder
|
98
98
|
setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
|
99
|
+
# option scheduler [Class] scheduler we will be using
|
100
|
+
setting :scheduler, default: Scheduler.new
|
99
101
|
|
100
102
|
# Karafka components for ActiveJob
|
101
103
|
setting :active_job do
|
@@ -41,9 +41,12 @@ module Karafka
|
|
41
41
|
|
42
42
|
# Pauses the processing from now till the end of the interval (backoff or non-backoff)
|
43
43
|
# and records the count.
|
44
|
-
|
44
|
+
# @param timeout [Integer] timeout value in milliseconds that overwrites the default timeout
|
45
|
+
# @note Providing this value can be useful when we explicitly want to pause for a certain
|
46
|
+
# period of time, outside of any regular pausing logic
|
47
|
+
def pause(timeout = backoff_interval)
|
45
48
|
@started_at = now
|
46
|
-
@ends_at = @started_at +
|
49
|
+
@ends_at = @started_at + timeout
|
47
50
|
@count += 1
|
48
51
|
end
|
49
52
|
|
@@ -53,6 +56,11 @@ module Karafka
|
|
53
56
|
@ends_at = nil
|
54
57
|
end
|
55
58
|
|
59
|
+
# Expires the pause, so it can be considered expired
|
60
|
+
def expire
|
61
|
+
@ends_at = nil
|
62
|
+
end
|
63
|
+
|
56
64
|
# @return [Boolean] are we paused from processing
|
57
65
|
def paused?
|
58
66
|
!@started_at.nil?
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.0.
|
4
|
+
version: 2.0.0.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -34,7 +34,7 @@ cert_chain:
|
|
34
34
|
R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
|
35
35
|
pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
|
36
36
|
-----END CERTIFICATE-----
|
37
|
-
date: 2022-
|
37
|
+
date: 2022-05-22 00:00:00.000000000 Z
|
38
38
|
dependencies:
|
39
39
|
- !ruby/object:Gem::Dependency
|
40
40
|
name: dry-configurable
|
@@ -228,6 +228,7 @@ files:
|
|
228
228
|
- lib/karafka/pro/active_job/dispatcher.rb
|
229
229
|
- lib/karafka/pro/active_job/job_options_contract.rb
|
230
230
|
- lib/karafka/pro/loader.rb
|
231
|
+
- lib/karafka/pro/performance_tracker.rb
|
231
232
|
- lib/karafka/process.rb
|
232
233
|
- lib/karafka/processing/executor.rb
|
233
234
|
- lib/karafka/processing/executors_buffer.rb
|
@@ -248,6 +249,7 @@ files:
|
|
248
249
|
- lib/karafka/routing/subscription_groups_builder.rb
|
249
250
|
- lib/karafka/routing/topic.rb
|
250
251
|
- lib/karafka/runner.rb
|
252
|
+
- lib/karafka/scheduler.rb
|
251
253
|
- lib/karafka/serialization/json/deserializer.rb
|
252
254
|
- lib/karafka/server.rb
|
253
255
|
- lib/karafka/setup/config.rb
|
@@ -282,7 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
282
284
|
- !ruby/object:Gem::Version
|
283
285
|
version: 1.3.1
|
284
286
|
requirements: []
|
285
|
-
rubygems_version: 3.3.
|
287
|
+
rubygems_version: 3.3.7
|
286
288
|
signing_key:
|
287
289
|
specification_version: 4
|
288
290
|
summary: Ruby based framework for working with Apache Kafka
|
metadata.gz.sig
CHANGED
Binary file
|