karafka 2.0.0.beta3 → 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +18 -15
  4. data/CHANGELOG.md +37 -0
  5. data/CONTRIBUTING.md +0 -5
  6. data/Gemfile.lock +6 -6
  7. data/README.md +2 -10
  8. data/bin/benchmarks +2 -2
  9. data/bin/integrations +10 -3
  10. data/bin/{stress → stress_many} +1 -1
  11. data/bin/stress_one +13 -0
  12. data/bin/wait_for_kafka +20 -0
  13. data/docker-compose.yml +32 -13
  14. data/karafka.gemspec +1 -1
  15. data/lib/karafka/active_job/routing/extensions.rb +1 -1
  16. data/lib/karafka/app.rb +2 -1
  17. data/lib/karafka/base_consumer.rb +59 -46
  18. data/lib/karafka/connection/client.rb +60 -14
  19. data/lib/karafka/connection/listener.rb +37 -11
  20. data/lib/karafka/connection/rebalance_manager.rb +20 -19
  21. data/lib/karafka/contracts/config.rb +18 -4
  22. data/lib/karafka/contracts/server_cli_options.rb +1 -1
  23. data/lib/karafka/errors.rb +3 -0
  24. data/lib/karafka/instrumentation/logger_listener.rb +0 -3
  25. data/lib/karafka/instrumentation/monitor.rb +0 -1
  26. data/lib/karafka/pro/active_job/consumer.rb +2 -8
  27. data/lib/karafka/pro/base_consumer.rb +82 -0
  28. data/lib/karafka/pro/loader.rb +14 -8
  29. data/lib/karafka/pro/processing/coordinator.rb +63 -0
  30. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
  31. data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
  32. data/lib/karafka/pro/processing/partitioner.rb +41 -0
  33. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  34. data/lib/karafka/pro/routing/extensions.rb +6 -0
  35. data/lib/karafka/processing/coordinator.rb +88 -0
  36. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  37. data/lib/karafka/processing/executor.rb +16 -9
  38. data/lib/karafka/processing/executors_buffer.rb +46 -15
  39. data/lib/karafka/processing/jobs/base.rb +8 -3
  40. data/lib/karafka/processing/jobs/consume.rb +11 -4
  41. data/lib/karafka/processing/jobs_builder.rb +3 -2
  42. data/lib/karafka/processing/partitioner.rb +22 -0
  43. data/lib/karafka/processing/result.rb +29 -0
  44. data/lib/karafka/processing/scheduler.rb +22 -0
  45. data/lib/karafka/processing/worker.rb +2 -2
  46. data/lib/karafka/routing/consumer_group.rb +1 -1
  47. data/lib/karafka/routing/topic.rb +14 -0
  48. data/lib/karafka/setup/config.rb +20 -10
  49. data/lib/karafka/version.rb +1 -1
  50. data.tar.gz.sig +0 -0
  51. metadata +16 -8
  52. metadata.gz.sig +0 -0
  53. data/lib/karafka/pro/base_consumer_extensions.rb +0 -66
  54. data/lib/karafka/pro/scheduler.rb +0 -54
  55. data/lib/karafka/scheduler.rb +0 -20
@@ -85,21 +85,31 @@ module Karafka
85
85
 
86
86
  # Namespace for internal settings that should not be modified directly
87
87
  setting :internal do
88
- # option routing_builder [Karafka::Routing::Builder] builder instance
89
- setting :routing_builder, default: Routing::Builder.new
90
88
  # option status [Karafka::Status] app status
91
89
  setting :status, default: Status.new
92
90
  # option process [Karafka::Process] process status
93
91
  # @note In the future, we need to have a single process representation for all the karafka
94
92
  # instances
95
93
  setting :process, default: Process.new
96
- # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
97
- # group builder
98
- setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
99
- # option scheduler [Class] scheduler we will be using
100
- setting :scheduler, default: Scheduler.new
101
- # option jobs_builder [Class] jobs builder we want to use
102
- setting :jobs_builder, default: Processing::JobsBuilder.new
94
+
95
+ setting :routing do
96
+ # option builder [Karafka::Routing::Builder] builder instance
97
+ setting :builder, default: Routing::Builder.new
98
+ # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
99
+ # group builder
100
+ setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
101
+ end
102
+
103
+ setting :processing do
104
+ # option scheduler [Object] scheduler we will be using
105
+ setting :scheduler, default: Processing::Scheduler.new
106
+ # option jobs_builder [Object] jobs builder we want to use
107
+ setting :jobs_builder, default: Processing::JobsBuilder.new
108
+ # option coordinator [Class] work coordinator we want to user for processing coordination
109
+ setting :coordinator_class, default: Processing::Coordinator
110
+ # option partitioner_class [Class] partitioner we use against a batch of data
111
+ setting :partitioner_class, default: Processing::Partitioner
112
+ end
103
113
 
104
114
  # Karafka components for ActiveJob
105
115
  setting :active_job do
@@ -109,7 +119,7 @@ module Karafka
109
119
  # ensuring, that extra job options defined are valid
110
120
  setting :job_options_contract, default: ActiveJob::JobOptionsContract.new
111
121
  # option consumer [Class] consumer class that should be used to consume ActiveJob data
112
- setting :consumer, default: ActiveJob::Consumer
122
+ setting :consumer_class, default: ActiveJob::Consumer
113
123
  end
114
124
  end
115
125
 
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.0.beta3'
6
+ VERSION = '2.0.0.rc1'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.beta3
4
+ version: 2.0.0.rc1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -34,7 +34,7 @@ cert_chain:
34
34
  R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
35
35
  pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
36
36
  -----END CERTIFICATE-----
37
- date: 2022-06-14 00:00:00.000000000 Z
37
+ date: 2022-07-08 00:00:00.000000000 Z
38
38
  dependencies:
39
39
  - !ruby/object:Gem::Dependency
40
40
  name: dry-configurable
@@ -112,7 +112,7 @@ dependencies:
112
112
  requirements:
113
113
  - - ">="
114
114
  - !ruby/object:Gem::Version
115
- version: 2.3.0
115
+ version: 2.3.1
116
116
  - - "<"
117
117
  - !ruby/object:Gem::Version
118
118
  version: 3.0.0
@@ -122,7 +122,7 @@ dependencies:
122
122
  requirements:
123
123
  - - ">="
124
124
  - !ruby/object:Gem::Version
125
- version: 2.3.0
125
+ version: 2.3.1
126
126
  - - "<"
127
127
  - !ruby/object:Gem::Version
128
128
  version: 3.0.0
@@ -172,7 +172,9 @@ files:
172
172
  - bin/integrations
173
173
  - bin/karafka
174
174
  - bin/scenario
175
- - bin/stress
175
+ - bin/stress_many
176
+ - bin/stress_one
177
+ - bin/wait_for_kafka
176
178
  - certs/karafka-pro.pem
177
179
  - certs/mensfeld.pem
178
180
  - config/errors.yml
@@ -232,14 +234,18 @@ files:
232
234
  - lib/karafka/pro/active_job/consumer.rb
233
235
  - lib/karafka/pro/active_job/dispatcher.rb
234
236
  - lib/karafka/pro/active_job/job_options_contract.rb
235
- - lib/karafka/pro/base_consumer_extensions.rb
237
+ - lib/karafka/pro/base_consumer.rb
236
238
  - lib/karafka/pro/loader.rb
237
239
  - lib/karafka/pro/performance_tracker.rb
240
+ - lib/karafka/pro/processing/coordinator.rb
238
241
  - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
239
242
  - lib/karafka/pro/processing/jobs_builder.rb
243
+ - lib/karafka/pro/processing/partitioner.rb
244
+ - lib/karafka/pro/processing/scheduler.rb
240
245
  - lib/karafka/pro/routing/extensions.rb
241
- - lib/karafka/pro/scheduler.rb
242
246
  - lib/karafka/process.rb
247
+ - lib/karafka/processing/coordinator.rb
248
+ - lib/karafka/processing/coordinators_buffer.rb
243
249
  - lib/karafka/processing/executor.rb
244
250
  - lib/karafka/processing/executors_buffer.rb
245
251
  - lib/karafka/processing/jobs/base.rb
@@ -248,6 +254,9 @@ files:
248
254
  - lib/karafka/processing/jobs/shutdown.rb
249
255
  - lib/karafka/processing/jobs_builder.rb
250
256
  - lib/karafka/processing/jobs_queue.rb
257
+ - lib/karafka/processing/partitioner.rb
258
+ - lib/karafka/processing/result.rb
259
+ - lib/karafka/processing/scheduler.rb
251
260
  - lib/karafka/processing/worker.rb
252
261
  - lib/karafka/processing/workers_batch.rb
253
262
  - lib/karafka/railtie.rb
@@ -261,7 +270,6 @@ files:
261
270
  - lib/karafka/routing/topic.rb
262
271
  - lib/karafka/routing/topics.rb
263
272
  - lib/karafka/runner.rb
264
- - lib/karafka/scheduler.rb
265
273
  - lib/karafka/serialization/json/deserializer.rb
266
274
  - lib/karafka/server.rb
267
275
  - lib/karafka/setup/config.rb
metadata.gz.sig CHANGED
Binary file
@@ -1,66 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this
5
- # repository and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
-
12
- module Karafka
13
- module Pro
14
- # Extensions to the base consumer that make it more pro and fancy
15
- #
16
- # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
17
- # after each batch is processed.
18
- #
19
- # They need to be added to the consumer via `#prepend`
20
- module BaseConsumerExtensions
21
- # Pause for tops 31 years
22
- MAX_PAUSE_TIME = 1_000_000_000_000
23
-
24
- private_constant :MAX_PAUSE_TIME
25
-
26
- # Pauses processing of a given partition until we're done with the processing
27
- # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
- def on_prepare
29
- # Pause at the first message in a batch. That way in case of a crash, we will not loose
30
- # any messages
31
- pause(messages.first.offset, MAX_PAUSE_TIME) if topic.long_running_job?
32
-
33
- super
34
- end
35
-
36
- # After user code, we seek and un-pause our partition
37
- def on_consume
38
- # If anything went wrong here, we should not run any partition management as it's Karafka
39
- # core that will handle the backoff
40
- return unless super
41
-
42
- return unless topic.long_running_job?
43
-
44
- # Nothing to resume if it was revoked
45
- return if revoked?
46
-
47
- # Once processing is done, we move to the new offset based on commits
48
- seek(@seek_offset || messages.first.offset)
49
- resume
50
- end
51
-
52
- # Marks this consumer revoked state as true
53
- # This allows us for things like lrj to finish early as this state may change during lrj
54
- # execution
55
- def on_revoked
56
- @revoked = true
57
- super
58
- end
59
-
60
- # @return [Boolean] true if partition was revoked from the current consumer
61
- def revoked?
62
- @revoked || false
63
- end
64
- end
65
- end
66
- end
@@ -1,54 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this
5
- # repository and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
-
12
- module Karafka
13
- module Pro
14
- # Optimizes scheduler that takes into consideration of execution time needed to process
15
- # messages from given topics partitions. It uses the non-preemptive LJF algorithm
16
- #
17
- # This scheduler is designed to optimize execution times on jobs that perform IO operations as
18
- # when taking IO into consideration, the can achieve optimized parallel processing.
19
- #
20
- # This scheduler can also work with virtual partitions.
21
- #
22
- # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
23
- # default FIFO scheduler from the default Karafka scheduler
24
- class Scheduler < ::Karafka::Scheduler
25
- # Schedules jobs in the LJF order for consumption
26
- #
27
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
28
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
29
- #
30
- def schedule_consumption(queue, jobs_array)
31
- pt = PerformanceTracker.instance
32
-
33
- ordered = []
34
-
35
- jobs_array.each do |job|
36
- messages = job.messages
37
- message = messages.first
38
-
39
- cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
40
-
41
- ordered << [job, cost]
42
- end
43
-
44
- ordered.sort_by!(&:last)
45
- ordered.reverse!
46
- ordered.map!(&:first)
47
-
48
- ordered.each do |job|
49
- queue << job
50
- end
51
- end
52
- end
53
- end
54
- end
@@ -1,20 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # FIFO scheduler for messages coming from various topics and partitions
5
- class Scheduler
6
- # Schedules jobs in the fifo order
7
- #
8
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
9
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
10
- def schedule_consumption(queue, jobs_array)
11
- jobs_array.each do |job|
12
- queue << job
13
- end
14
- end
15
-
16
- # Both revocation and shutdown jobs can also run in fifo by default
17
- alias schedule_revocation schedule_consumption
18
- alias schedule_shutdown schedule_consumption
19
- end
20
- end