karafka 2.0.0.beta3 → 2.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +18 -15
- data/CHANGELOG.md +37 -0
- data/CONTRIBUTING.md +0 -5
- data/Gemfile.lock +6 -6
- data/README.md +2 -10
- data/bin/benchmarks +2 -2
- data/bin/integrations +10 -3
- data/bin/{stress → stress_many} +1 -1
- data/bin/stress_one +13 -0
- data/bin/wait_for_kafka +20 -0
- data/docker-compose.yml +32 -13
- data/karafka.gemspec +1 -1
- data/lib/karafka/active_job/routing/extensions.rb +1 -1
- data/lib/karafka/app.rb +2 -1
- data/lib/karafka/base_consumer.rb +59 -46
- data/lib/karafka/connection/client.rb +60 -14
- data/lib/karafka/connection/listener.rb +37 -11
- data/lib/karafka/connection/rebalance_manager.rb +20 -19
- data/lib/karafka/contracts/config.rb +18 -4
- data/lib/karafka/contracts/server_cli_options.rb +1 -1
- data/lib/karafka/errors.rb +3 -0
- data/lib/karafka/instrumentation/logger_listener.rb +0 -3
- data/lib/karafka/instrumentation/monitor.rb +0 -1
- data/lib/karafka/pro/active_job/consumer.rb +2 -8
- data/lib/karafka/pro/base_consumer.rb +82 -0
- data/lib/karafka/pro/loader.rb +14 -8
- data/lib/karafka/pro/processing/coordinator.rb +63 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
- data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
- data/lib/karafka/pro/processing/partitioner.rb +41 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/extensions.rb +6 -0
- data/lib/karafka/processing/coordinator.rb +88 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +16 -9
- data/lib/karafka/processing/executors_buffer.rb +46 -15
- data/lib/karafka/processing/jobs/base.rb +8 -3
- data/lib/karafka/processing/jobs/consume.rb +11 -4
- data/lib/karafka/processing/jobs_builder.rb +3 -2
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +29 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +2 -2
- data/lib/karafka/routing/consumer_group.rb +1 -1
- data/lib/karafka/routing/topic.rb +14 -0
- data/lib/karafka/setup/config.rb +20 -10
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +16 -8
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/base_consumer_extensions.rb +0 -66
- data/lib/karafka/pro/scheduler.rb +0 -54
- data/lib/karafka/scheduler.rb +0 -20
data/lib/karafka/setup/config.rb
CHANGED
@@ -85,21 +85,31 @@ module Karafka
|
|
85
85
|
|
86
86
|
# Namespace for internal settings that should not be modified directly
|
87
87
|
setting :internal do
|
88
|
-
# option routing_builder [Karafka::Routing::Builder] builder instance
|
89
|
-
setting :routing_builder, default: Routing::Builder.new
|
90
88
|
# option status [Karafka::Status] app status
|
91
89
|
setting :status, default: Status.new
|
92
90
|
# option process [Karafka::Process] process status
|
93
91
|
# @note In the future, we need to have a single process representation for all the karafka
|
94
92
|
# instances
|
95
93
|
setting :process, default: Process.new
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
94
|
+
|
95
|
+
setting :routing do
|
96
|
+
# option builder [Karafka::Routing::Builder] builder instance
|
97
|
+
setting :builder, default: Routing::Builder.new
|
98
|
+
# option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
|
99
|
+
# group builder
|
100
|
+
setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
|
101
|
+
end
|
102
|
+
|
103
|
+
setting :processing do
|
104
|
+
# option scheduler [Object] scheduler we will be using
|
105
|
+
setting :scheduler, default: Processing::Scheduler.new
|
106
|
+
# option jobs_builder [Object] jobs builder we want to use
|
107
|
+
setting :jobs_builder, default: Processing::JobsBuilder.new
|
108
|
+
# option coordinator [Class] work coordinator we want to user for processing coordination
|
109
|
+
setting :coordinator_class, default: Processing::Coordinator
|
110
|
+
# option partitioner_class [Class] partitioner we use against a batch of data
|
111
|
+
setting :partitioner_class, default: Processing::Partitioner
|
112
|
+
end
|
103
113
|
|
104
114
|
# Karafka components for ActiveJob
|
105
115
|
setting :active_job do
|
@@ -109,7 +119,7 @@ module Karafka
|
|
109
119
|
# ensuring, that extra job options defined are valid
|
110
120
|
setting :job_options_contract, default: ActiveJob::JobOptionsContract.new
|
111
121
|
# option consumer [Class] consumer class that should be used to consume ActiveJob data
|
112
|
-
setting :
|
122
|
+
setting :consumer_class, default: ActiveJob::Consumer
|
113
123
|
end
|
114
124
|
end
|
115
125
|
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.0.
|
4
|
+
version: 2.0.0.rc1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -34,7 +34,7 @@ cert_chain:
|
|
34
34
|
R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
|
35
35
|
pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
|
36
36
|
-----END CERTIFICATE-----
|
37
|
-
date: 2022-
|
37
|
+
date: 2022-07-08 00:00:00.000000000 Z
|
38
38
|
dependencies:
|
39
39
|
- !ruby/object:Gem::Dependency
|
40
40
|
name: dry-configurable
|
@@ -112,7 +112,7 @@ dependencies:
|
|
112
112
|
requirements:
|
113
113
|
- - ">="
|
114
114
|
- !ruby/object:Gem::Version
|
115
|
-
version: 2.3.
|
115
|
+
version: 2.3.1
|
116
116
|
- - "<"
|
117
117
|
- !ruby/object:Gem::Version
|
118
118
|
version: 3.0.0
|
@@ -122,7 +122,7 @@ dependencies:
|
|
122
122
|
requirements:
|
123
123
|
- - ">="
|
124
124
|
- !ruby/object:Gem::Version
|
125
|
-
version: 2.3.
|
125
|
+
version: 2.3.1
|
126
126
|
- - "<"
|
127
127
|
- !ruby/object:Gem::Version
|
128
128
|
version: 3.0.0
|
@@ -172,7 +172,9 @@ files:
|
|
172
172
|
- bin/integrations
|
173
173
|
- bin/karafka
|
174
174
|
- bin/scenario
|
175
|
-
- bin/
|
175
|
+
- bin/stress_many
|
176
|
+
- bin/stress_one
|
177
|
+
- bin/wait_for_kafka
|
176
178
|
- certs/karafka-pro.pem
|
177
179
|
- certs/mensfeld.pem
|
178
180
|
- config/errors.yml
|
@@ -232,14 +234,18 @@ files:
|
|
232
234
|
- lib/karafka/pro/active_job/consumer.rb
|
233
235
|
- lib/karafka/pro/active_job/dispatcher.rb
|
234
236
|
- lib/karafka/pro/active_job/job_options_contract.rb
|
235
|
-
- lib/karafka/pro/
|
237
|
+
- lib/karafka/pro/base_consumer.rb
|
236
238
|
- lib/karafka/pro/loader.rb
|
237
239
|
- lib/karafka/pro/performance_tracker.rb
|
240
|
+
- lib/karafka/pro/processing/coordinator.rb
|
238
241
|
- lib/karafka/pro/processing/jobs/consume_non_blocking.rb
|
239
242
|
- lib/karafka/pro/processing/jobs_builder.rb
|
243
|
+
- lib/karafka/pro/processing/partitioner.rb
|
244
|
+
- lib/karafka/pro/processing/scheduler.rb
|
240
245
|
- lib/karafka/pro/routing/extensions.rb
|
241
|
-
- lib/karafka/pro/scheduler.rb
|
242
246
|
- lib/karafka/process.rb
|
247
|
+
- lib/karafka/processing/coordinator.rb
|
248
|
+
- lib/karafka/processing/coordinators_buffer.rb
|
243
249
|
- lib/karafka/processing/executor.rb
|
244
250
|
- lib/karafka/processing/executors_buffer.rb
|
245
251
|
- lib/karafka/processing/jobs/base.rb
|
@@ -248,6 +254,9 @@ files:
|
|
248
254
|
- lib/karafka/processing/jobs/shutdown.rb
|
249
255
|
- lib/karafka/processing/jobs_builder.rb
|
250
256
|
- lib/karafka/processing/jobs_queue.rb
|
257
|
+
- lib/karafka/processing/partitioner.rb
|
258
|
+
- lib/karafka/processing/result.rb
|
259
|
+
- lib/karafka/processing/scheduler.rb
|
251
260
|
- lib/karafka/processing/worker.rb
|
252
261
|
- lib/karafka/processing/workers_batch.rb
|
253
262
|
- lib/karafka/railtie.rb
|
@@ -261,7 +270,6 @@ files:
|
|
261
270
|
- lib/karafka/routing/topic.rb
|
262
271
|
- lib/karafka/routing/topics.rb
|
263
272
|
- lib/karafka/runner.rb
|
264
|
-
- lib/karafka/scheduler.rb
|
265
273
|
- lib/karafka/serialization/json/deserializer.rb
|
266
274
|
- lib/karafka/server.rb
|
267
275
|
- lib/karafka/setup/config.rb
|
metadata.gz.sig
CHANGED
Binary file
|
@@ -1,66 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
-
# repository and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
|
-
|
12
|
-
module Karafka
|
13
|
-
module Pro
|
14
|
-
# Extensions to the base consumer that make it more pro and fancy
|
15
|
-
#
|
16
|
-
# @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
|
17
|
-
# after each batch is processed.
|
18
|
-
#
|
19
|
-
# They need to be added to the consumer via `#prepend`
|
20
|
-
module BaseConsumerExtensions
|
21
|
-
# Pause for tops 31 years
|
22
|
-
MAX_PAUSE_TIME = 1_000_000_000_000
|
23
|
-
|
24
|
-
private_constant :MAX_PAUSE_TIME
|
25
|
-
|
26
|
-
# Pauses processing of a given partition until we're done with the processing
|
27
|
-
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
28
|
-
def on_prepare
|
29
|
-
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
30
|
-
# any messages
|
31
|
-
pause(messages.first.offset, MAX_PAUSE_TIME) if topic.long_running_job?
|
32
|
-
|
33
|
-
super
|
34
|
-
end
|
35
|
-
|
36
|
-
# After user code, we seek and un-pause our partition
|
37
|
-
def on_consume
|
38
|
-
# If anything went wrong here, we should not run any partition management as it's Karafka
|
39
|
-
# core that will handle the backoff
|
40
|
-
return unless super
|
41
|
-
|
42
|
-
return unless topic.long_running_job?
|
43
|
-
|
44
|
-
# Nothing to resume if it was revoked
|
45
|
-
return if revoked?
|
46
|
-
|
47
|
-
# Once processing is done, we move to the new offset based on commits
|
48
|
-
seek(@seek_offset || messages.first.offset)
|
49
|
-
resume
|
50
|
-
end
|
51
|
-
|
52
|
-
# Marks this consumer revoked state as true
|
53
|
-
# This allows us for things like lrj to finish early as this state may change during lrj
|
54
|
-
# execution
|
55
|
-
def on_revoked
|
56
|
-
@revoked = true
|
57
|
-
super
|
58
|
-
end
|
59
|
-
|
60
|
-
# @return [Boolean] true if partition was revoked from the current consumer
|
61
|
-
def revoked?
|
62
|
-
@revoked || false
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
@@ -1,54 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
-
# repository and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
|
-
|
12
|
-
module Karafka
|
13
|
-
module Pro
|
14
|
-
# Optimizes scheduler that takes into consideration of execution time needed to process
|
15
|
-
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
16
|
-
#
|
17
|
-
# This scheduler is designed to optimize execution times on jobs that perform IO operations as
|
18
|
-
# when taking IO into consideration, the can achieve optimized parallel processing.
|
19
|
-
#
|
20
|
-
# This scheduler can also work with virtual partitions.
|
21
|
-
#
|
22
|
-
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
23
|
-
# default FIFO scheduler from the default Karafka scheduler
|
24
|
-
class Scheduler < ::Karafka::Scheduler
|
25
|
-
# Schedules jobs in the LJF order for consumption
|
26
|
-
#
|
27
|
-
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
28
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
29
|
-
#
|
30
|
-
def schedule_consumption(queue, jobs_array)
|
31
|
-
pt = PerformanceTracker.instance
|
32
|
-
|
33
|
-
ordered = []
|
34
|
-
|
35
|
-
jobs_array.each do |job|
|
36
|
-
messages = job.messages
|
37
|
-
message = messages.first
|
38
|
-
|
39
|
-
cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
|
40
|
-
|
41
|
-
ordered << [job, cost]
|
42
|
-
end
|
43
|
-
|
44
|
-
ordered.sort_by!(&:last)
|
45
|
-
ordered.reverse!
|
46
|
-
ordered.map!(&:first)
|
47
|
-
|
48
|
-
ordered.each do |job|
|
49
|
-
queue << job
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
data/lib/karafka/scheduler.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Karafka
|
4
|
-
# FIFO scheduler for messages coming from various topics and partitions
|
5
|
-
class Scheduler
|
6
|
-
# Schedules jobs in the fifo order
|
7
|
-
#
|
8
|
-
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
9
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
10
|
-
def schedule_consumption(queue, jobs_array)
|
11
|
-
jobs_array.each do |job|
|
12
|
-
queue << job
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
# Both revocation and shutdown jobs can also run in fifo by default
|
17
|
-
alias schedule_revocation schedule_consumption
|
18
|
-
alias schedule_shutdown schedule_consumption
|
19
|
-
end
|
20
|
-
end
|