karafka 2.0.0.beta3 → 2.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +18 -15
- data/CHANGELOG.md +37 -0
- data/CONTRIBUTING.md +0 -5
- data/Gemfile.lock +6 -6
- data/README.md +2 -10
- data/bin/benchmarks +2 -2
- data/bin/integrations +10 -3
- data/bin/{stress → stress_many} +1 -1
- data/bin/stress_one +13 -0
- data/bin/wait_for_kafka +20 -0
- data/docker-compose.yml +32 -13
- data/karafka.gemspec +1 -1
- data/lib/karafka/active_job/routing/extensions.rb +1 -1
- data/lib/karafka/app.rb +2 -1
- data/lib/karafka/base_consumer.rb +59 -46
- data/lib/karafka/connection/client.rb +60 -14
- data/lib/karafka/connection/listener.rb +37 -11
- data/lib/karafka/connection/rebalance_manager.rb +20 -19
- data/lib/karafka/contracts/config.rb +18 -4
- data/lib/karafka/contracts/server_cli_options.rb +1 -1
- data/lib/karafka/errors.rb +3 -0
- data/lib/karafka/instrumentation/logger_listener.rb +0 -3
- data/lib/karafka/instrumentation/monitor.rb +0 -1
- data/lib/karafka/pro/active_job/consumer.rb +2 -8
- data/lib/karafka/pro/base_consumer.rb +82 -0
- data/lib/karafka/pro/loader.rb +14 -8
- data/lib/karafka/pro/processing/coordinator.rb +63 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
- data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
- data/lib/karafka/pro/processing/partitioner.rb +41 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/extensions.rb +6 -0
- data/lib/karafka/processing/coordinator.rb +88 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +16 -9
- data/lib/karafka/processing/executors_buffer.rb +46 -15
- data/lib/karafka/processing/jobs/base.rb +8 -3
- data/lib/karafka/processing/jobs/consume.rb +11 -4
- data/lib/karafka/processing/jobs_builder.rb +3 -2
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +29 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +2 -2
- data/lib/karafka/routing/consumer_group.rb +1 -1
- data/lib/karafka/routing/topic.rb +14 -0
- data/lib/karafka/setup/config.rb +20 -10
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +16 -8
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/base_consumer_extensions.rb +0 -66
- data/lib/karafka/pro/scheduler.rb +0 -54
- data/lib/karafka/scheduler.rb +0 -20
data/lib/karafka/setup/config.rb
CHANGED
@@ -85,21 +85,31 @@ module Karafka
|
|
85
85
|
|
86
86
|
# Namespace for internal settings that should not be modified directly
|
87
87
|
setting :internal do
|
88
|
-
# option routing_builder [Karafka::Routing::Builder] builder instance
|
89
|
-
setting :routing_builder, default: Routing::Builder.new
|
90
88
|
# option status [Karafka::Status] app status
|
91
89
|
setting :status, default: Status.new
|
92
90
|
# option process [Karafka::Process] process status
|
93
91
|
# @note In the future, we need to have a single process representation for all the karafka
|
94
92
|
# instances
|
95
93
|
setting :process, default: Process.new
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
94
|
+
|
95
|
+
setting :routing do
|
96
|
+
# option builder [Karafka::Routing::Builder] builder instance
|
97
|
+
setting :builder, default: Routing::Builder.new
|
98
|
+
# option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
|
99
|
+
# group builder
|
100
|
+
setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
|
101
|
+
end
|
102
|
+
|
103
|
+
setting :processing do
|
104
|
+
# option scheduler [Object] scheduler we will be using
|
105
|
+
setting :scheduler, default: Processing::Scheduler.new
|
106
|
+
# option jobs_builder [Object] jobs builder we want to use
|
107
|
+
setting :jobs_builder, default: Processing::JobsBuilder.new
|
108
|
+
# option coordinator [Class] work coordinator we want to user for processing coordination
|
109
|
+
setting :coordinator_class, default: Processing::Coordinator
|
110
|
+
# option partitioner_class [Class] partitioner we use against a batch of data
|
111
|
+
setting :partitioner_class, default: Processing::Partitioner
|
112
|
+
end
|
103
113
|
|
104
114
|
# Karafka components for ActiveJob
|
105
115
|
setting :active_job do
|
@@ -109,7 +119,7 @@ module Karafka
|
|
109
119
|
# ensuring, that extra job options defined are valid
|
110
120
|
setting :job_options_contract, default: ActiveJob::JobOptionsContract.new
|
111
121
|
# option consumer [Class] consumer class that should be used to consume ActiveJob data
|
112
|
-
setting :
|
122
|
+
setting :consumer_class, default: ActiveJob::Consumer
|
113
123
|
end
|
114
124
|
end
|
115
125
|
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.0.
|
4
|
+
version: 2.0.0.rc1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -34,7 +34,7 @@ cert_chain:
|
|
34
34
|
R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
|
35
35
|
pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
|
36
36
|
-----END CERTIFICATE-----
|
37
|
-
date: 2022-
|
37
|
+
date: 2022-07-08 00:00:00.000000000 Z
|
38
38
|
dependencies:
|
39
39
|
- !ruby/object:Gem::Dependency
|
40
40
|
name: dry-configurable
|
@@ -112,7 +112,7 @@ dependencies:
|
|
112
112
|
requirements:
|
113
113
|
- - ">="
|
114
114
|
- !ruby/object:Gem::Version
|
115
|
-
version: 2.3.
|
115
|
+
version: 2.3.1
|
116
116
|
- - "<"
|
117
117
|
- !ruby/object:Gem::Version
|
118
118
|
version: 3.0.0
|
@@ -122,7 +122,7 @@ dependencies:
|
|
122
122
|
requirements:
|
123
123
|
- - ">="
|
124
124
|
- !ruby/object:Gem::Version
|
125
|
-
version: 2.3.
|
125
|
+
version: 2.3.1
|
126
126
|
- - "<"
|
127
127
|
- !ruby/object:Gem::Version
|
128
128
|
version: 3.0.0
|
@@ -172,7 +172,9 @@ files:
|
|
172
172
|
- bin/integrations
|
173
173
|
- bin/karafka
|
174
174
|
- bin/scenario
|
175
|
-
- bin/
|
175
|
+
- bin/stress_many
|
176
|
+
- bin/stress_one
|
177
|
+
- bin/wait_for_kafka
|
176
178
|
- certs/karafka-pro.pem
|
177
179
|
- certs/mensfeld.pem
|
178
180
|
- config/errors.yml
|
@@ -232,14 +234,18 @@ files:
|
|
232
234
|
- lib/karafka/pro/active_job/consumer.rb
|
233
235
|
- lib/karafka/pro/active_job/dispatcher.rb
|
234
236
|
- lib/karafka/pro/active_job/job_options_contract.rb
|
235
|
-
- lib/karafka/pro/
|
237
|
+
- lib/karafka/pro/base_consumer.rb
|
236
238
|
- lib/karafka/pro/loader.rb
|
237
239
|
- lib/karafka/pro/performance_tracker.rb
|
240
|
+
- lib/karafka/pro/processing/coordinator.rb
|
238
241
|
- lib/karafka/pro/processing/jobs/consume_non_blocking.rb
|
239
242
|
- lib/karafka/pro/processing/jobs_builder.rb
|
243
|
+
- lib/karafka/pro/processing/partitioner.rb
|
244
|
+
- lib/karafka/pro/processing/scheduler.rb
|
240
245
|
- lib/karafka/pro/routing/extensions.rb
|
241
|
-
- lib/karafka/pro/scheduler.rb
|
242
246
|
- lib/karafka/process.rb
|
247
|
+
- lib/karafka/processing/coordinator.rb
|
248
|
+
- lib/karafka/processing/coordinators_buffer.rb
|
243
249
|
- lib/karafka/processing/executor.rb
|
244
250
|
- lib/karafka/processing/executors_buffer.rb
|
245
251
|
- lib/karafka/processing/jobs/base.rb
|
@@ -248,6 +254,9 @@ files:
|
|
248
254
|
- lib/karafka/processing/jobs/shutdown.rb
|
249
255
|
- lib/karafka/processing/jobs_builder.rb
|
250
256
|
- lib/karafka/processing/jobs_queue.rb
|
257
|
+
- lib/karafka/processing/partitioner.rb
|
258
|
+
- lib/karafka/processing/result.rb
|
259
|
+
- lib/karafka/processing/scheduler.rb
|
251
260
|
- lib/karafka/processing/worker.rb
|
252
261
|
- lib/karafka/processing/workers_batch.rb
|
253
262
|
- lib/karafka/railtie.rb
|
@@ -261,7 +270,6 @@ files:
|
|
261
270
|
- lib/karafka/routing/topic.rb
|
262
271
|
- lib/karafka/routing/topics.rb
|
263
272
|
- lib/karafka/runner.rb
|
264
|
-
- lib/karafka/scheduler.rb
|
265
273
|
- lib/karafka/serialization/json/deserializer.rb
|
266
274
|
- lib/karafka/server.rb
|
267
275
|
- lib/karafka/setup/config.rb
|
metadata.gz.sig
CHANGED
Binary file
|
@@ -1,66 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
-
# repository and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
|
-
|
12
|
-
module Karafka
|
13
|
-
module Pro
|
14
|
-
# Extensions to the base consumer that make it more pro and fancy
|
15
|
-
#
|
16
|
-
# @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
|
17
|
-
# after each batch is processed.
|
18
|
-
#
|
19
|
-
# They need to be added to the consumer via `#prepend`
|
20
|
-
module BaseConsumerExtensions
|
21
|
-
# Pause for tops 31 years
|
22
|
-
MAX_PAUSE_TIME = 1_000_000_000_000
|
23
|
-
|
24
|
-
private_constant :MAX_PAUSE_TIME
|
25
|
-
|
26
|
-
# Pauses processing of a given partition until we're done with the processing
|
27
|
-
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
28
|
-
def on_prepare
|
29
|
-
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
30
|
-
# any messages
|
31
|
-
pause(messages.first.offset, MAX_PAUSE_TIME) if topic.long_running_job?
|
32
|
-
|
33
|
-
super
|
34
|
-
end
|
35
|
-
|
36
|
-
# After user code, we seek and un-pause our partition
|
37
|
-
def on_consume
|
38
|
-
# If anything went wrong here, we should not run any partition management as it's Karafka
|
39
|
-
# core that will handle the backoff
|
40
|
-
return unless super
|
41
|
-
|
42
|
-
return unless topic.long_running_job?
|
43
|
-
|
44
|
-
# Nothing to resume if it was revoked
|
45
|
-
return if revoked?
|
46
|
-
|
47
|
-
# Once processing is done, we move to the new offset based on commits
|
48
|
-
seek(@seek_offset || messages.first.offset)
|
49
|
-
resume
|
50
|
-
end
|
51
|
-
|
52
|
-
# Marks this consumer revoked state as true
|
53
|
-
# This allows us for things like lrj to finish early as this state may change during lrj
|
54
|
-
# execution
|
55
|
-
def on_revoked
|
56
|
-
@revoked = true
|
57
|
-
super
|
58
|
-
end
|
59
|
-
|
60
|
-
# @return [Boolean] true if partition was revoked from the current consumer
|
61
|
-
def revoked?
|
62
|
-
@revoked || false
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
@@ -1,54 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
-
# repository and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
|
-
|
12
|
-
module Karafka
|
13
|
-
module Pro
|
14
|
-
# Optimizes scheduler that takes into consideration of execution time needed to process
|
15
|
-
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
16
|
-
#
|
17
|
-
# This scheduler is designed to optimize execution times on jobs that perform IO operations as
|
18
|
-
# when taking IO into consideration, the can achieve optimized parallel processing.
|
19
|
-
#
|
20
|
-
# This scheduler can also work with virtual partitions.
|
21
|
-
#
|
22
|
-
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
23
|
-
# default FIFO scheduler from the default Karafka scheduler
|
24
|
-
class Scheduler < ::Karafka::Scheduler
|
25
|
-
# Schedules jobs in the LJF order for consumption
|
26
|
-
#
|
27
|
-
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
28
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
29
|
-
#
|
30
|
-
def schedule_consumption(queue, jobs_array)
|
31
|
-
pt = PerformanceTracker.instance
|
32
|
-
|
33
|
-
ordered = []
|
34
|
-
|
35
|
-
jobs_array.each do |job|
|
36
|
-
messages = job.messages
|
37
|
-
message = messages.first
|
38
|
-
|
39
|
-
cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
|
40
|
-
|
41
|
-
ordered << [job, cost]
|
42
|
-
end
|
43
|
-
|
44
|
-
ordered.sort_by!(&:last)
|
45
|
-
ordered.reverse!
|
46
|
-
ordered.map!(&:first)
|
47
|
-
|
48
|
-
ordered.each do |job|
|
49
|
-
queue << job
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
data/lib/karafka/scheduler.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Karafka
|
4
|
-
# FIFO scheduler for messages coming from various topics and partitions
|
5
|
-
class Scheduler
|
6
|
-
# Schedules jobs in the fifo order
|
7
|
-
#
|
8
|
-
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
9
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
10
|
-
def schedule_consumption(queue, jobs_array)
|
11
|
-
jobs_array.each do |job|
|
12
|
-
queue << job
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
# Both revocation and shutdown jobs can also run in fifo by default
|
17
|
-
alias schedule_revocation schedule_consumption
|
18
|
-
alias schedule_shutdown schedule_consumption
|
19
|
-
end
|
20
|
-
end
|