karafka 2.0.0.beta3 → 2.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +18 -15
  4. data/CHANGELOG.md +37 -0
  5. data/CONTRIBUTING.md +0 -5
  6. data/Gemfile.lock +6 -6
  7. data/README.md +2 -10
  8. data/bin/benchmarks +2 -2
  9. data/bin/integrations +10 -3
  10. data/bin/{stress → stress_many} +1 -1
  11. data/bin/stress_one +13 -0
  12. data/bin/wait_for_kafka +20 -0
  13. data/docker-compose.yml +32 -13
  14. data/karafka.gemspec +1 -1
  15. data/lib/karafka/active_job/routing/extensions.rb +1 -1
  16. data/lib/karafka/app.rb +2 -1
  17. data/lib/karafka/base_consumer.rb +59 -46
  18. data/lib/karafka/connection/client.rb +60 -14
  19. data/lib/karafka/connection/listener.rb +37 -11
  20. data/lib/karafka/connection/rebalance_manager.rb +20 -19
  21. data/lib/karafka/contracts/config.rb +18 -4
  22. data/lib/karafka/contracts/server_cli_options.rb +1 -1
  23. data/lib/karafka/errors.rb +3 -0
  24. data/lib/karafka/instrumentation/logger_listener.rb +0 -3
  25. data/lib/karafka/instrumentation/monitor.rb +0 -1
  26. data/lib/karafka/pro/active_job/consumer.rb +2 -8
  27. data/lib/karafka/pro/base_consumer.rb +82 -0
  28. data/lib/karafka/pro/loader.rb +14 -8
  29. data/lib/karafka/pro/processing/coordinator.rb +63 -0
  30. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
  31. data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
  32. data/lib/karafka/pro/processing/partitioner.rb +41 -0
  33. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  34. data/lib/karafka/pro/routing/extensions.rb +6 -0
  35. data/lib/karafka/processing/coordinator.rb +88 -0
  36. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  37. data/lib/karafka/processing/executor.rb +16 -9
  38. data/lib/karafka/processing/executors_buffer.rb +46 -15
  39. data/lib/karafka/processing/jobs/base.rb +8 -3
  40. data/lib/karafka/processing/jobs/consume.rb +11 -4
  41. data/lib/karafka/processing/jobs_builder.rb +3 -2
  42. data/lib/karafka/processing/partitioner.rb +22 -0
  43. data/lib/karafka/processing/result.rb +29 -0
  44. data/lib/karafka/processing/scheduler.rb +22 -0
  45. data/lib/karafka/processing/worker.rb +2 -2
  46. data/lib/karafka/routing/consumer_group.rb +1 -1
  47. data/lib/karafka/routing/topic.rb +14 -0
  48. data/lib/karafka/setup/config.rb +20 -10
  49. data/lib/karafka/version.rb +1 -1
  50. data.tar.gz.sig +0 -0
  51. metadata +16 -8
  52. metadata.gz.sig +0 -0
  53. data/lib/karafka/pro/base_consumer_extensions.rb +0 -66
  54. data/lib/karafka/pro/scheduler.rb +0 -54
  55. data/lib/karafka/scheduler.rb +0 -20
@@ -85,21 +85,31 @@ module Karafka
85
85
 
86
86
  # Namespace for internal settings that should not be modified directly
87
87
  setting :internal do
88
- # option routing_builder [Karafka::Routing::Builder] builder instance
89
- setting :routing_builder, default: Routing::Builder.new
90
88
  # option status [Karafka::Status] app status
91
89
  setting :status, default: Status.new
92
90
  # option process [Karafka::Process] process status
93
91
  # @note In the future, we need to have a single process representation for all the karafka
94
92
  # instances
95
93
  setting :process, default: Process.new
96
- # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
97
- # group builder
98
- setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
99
- # option scheduler [Class] scheduler we will be using
100
- setting :scheduler, default: Scheduler.new
101
- # option jobs_builder [Class] jobs builder we want to use
102
- setting :jobs_builder, default: Processing::JobsBuilder.new
94
+
95
+ setting :routing do
96
+ # option builder [Karafka::Routing::Builder] builder instance
97
+ setting :builder, default: Routing::Builder.new
98
+ # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
99
+ # group builder
100
+ setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
101
+ end
102
+
103
+ setting :processing do
104
+ # option scheduler [Object] scheduler we will be using
105
+ setting :scheduler, default: Processing::Scheduler.new
106
+ # option jobs_builder [Object] jobs builder we want to use
107
+ setting :jobs_builder, default: Processing::JobsBuilder.new
108
+ # option coordinator [Class] work coordinator we want to user for processing coordination
109
+ setting :coordinator_class, default: Processing::Coordinator
110
+ # option partitioner_class [Class] partitioner we use against a batch of data
111
+ setting :partitioner_class, default: Processing::Partitioner
112
+ end
103
113
 
104
114
  # Karafka components for ActiveJob
105
115
  setting :active_job do
@@ -109,7 +119,7 @@ module Karafka
109
119
  # ensuring, that extra job options defined are valid
110
120
  setting :job_options_contract, default: ActiveJob::JobOptionsContract.new
111
121
  # option consumer [Class] consumer class that should be used to consume ActiveJob data
112
- setting :consumer, default: ActiveJob::Consumer
122
+ setting :consumer_class, default: ActiveJob::Consumer
113
123
  end
114
124
  end
115
125
 
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.0.beta3'
6
+ VERSION = '2.0.0.rc1'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.beta3
4
+ version: 2.0.0.rc1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -34,7 +34,7 @@ cert_chain:
34
34
  R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
35
35
  pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
36
36
  -----END CERTIFICATE-----
37
- date: 2022-06-14 00:00:00.000000000 Z
37
+ date: 2022-07-08 00:00:00.000000000 Z
38
38
  dependencies:
39
39
  - !ruby/object:Gem::Dependency
40
40
  name: dry-configurable
@@ -112,7 +112,7 @@ dependencies:
112
112
  requirements:
113
113
  - - ">="
114
114
  - !ruby/object:Gem::Version
115
- version: 2.3.0
115
+ version: 2.3.1
116
116
  - - "<"
117
117
  - !ruby/object:Gem::Version
118
118
  version: 3.0.0
@@ -122,7 +122,7 @@ dependencies:
122
122
  requirements:
123
123
  - - ">="
124
124
  - !ruby/object:Gem::Version
125
- version: 2.3.0
125
+ version: 2.3.1
126
126
  - - "<"
127
127
  - !ruby/object:Gem::Version
128
128
  version: 3.0.0
@@ -172,7 +172,9 @@ files:
172
172
  - bin/integrations
173
173
  - bin/karafka
174
174
  - bin/scenario
175
- - bin/stress
175
+ - bin/stress_many
176
+ - bin/stress_one
177
+ - bin/wait_for_kafka
176
178
  - certs/karafka-pro.pem
177
179
  - certs/mensfeld.pem
178
180
  - config/errors.yml
@@ -232,14 +234,18 @@ files:
232
234
  - lib/karafka/pro/active_job/consumer.rb
233
235
  - lib/karafka/pro/active_job/dispatcher.rb
234
236
  - lib/karafka/pro/active_job/job_options_contract.rb
235
- - lib/karafka/pro/base_consumer_extensions.rb
237
+ - lib/karafka/pro/base_consumer.rb
236
238
  - lib/karafka/pro/loader.rb
237
239
  - lib/karafka/pro/performance_tracker.rb
240
+ - lib/karafka/pro/processing/coordinator.rb
238
241
  - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
239
242
  - lib/karafka/pro/processing/jobs_builder.rb
243
+ - lib/karafka/pro/processing/partitioner.rb
244
+ - lib/karafka/pro/processing/scheduler.rb
240
245
  - lib/karafka/pro/routing/extensions.rb
241
- - lib/karafka/pro/scheduler.rb
242
246
  - lib/karafka/process.rb
247
+ - lib/karafka/processing/coordinator.rb
248
+ - lib/karafka/processing/coordinators_buffer.rb
243
249
  - lib/karafka/processing/executor.rb
244
250
  - lib/karafka/processing/executors_buffer.rb
245
251
  - lib/karafka/processing/jobs/base.rb
@@ -248,6 +254,9 @@ files:
248
254
  - lib/karafka/processing/jobs/shutdown.rb
249
255
  - lib/karafka/processing/jobs_builder.rb
250
256
  - lib/karafka/processing/jobs_queue.rb
257
+ - lib/karafka/processing/partitioner.rb
258
+ - lib/karafka/processing/result.rb
259
+ - lib/karafka/processing/scheduler.rb
251
260
  - lib/karafka/processing/worker.rb
252
261
  - lib/karafka/processing/workers_batch.rb
253
262
  - lib/karafka/railtie.rb
@@ -261,7 +270,6 @@ files:
261
270
  - lib/karafka/routing/topic.rb
262
271
  - lib/karafka/routing/topics.rb
263
272
  - lib/karafka/runner.rb
264
- - lib/karafka/scheduler.rb
265
273
  - lib/karafka/serialization/json/deserializer.rb
266
274
  - lib/karafka/server.rb
267
275
  - lib/karafka/setup/config.rb
metadata.gz.sig CHANGED
Binary file
@@ -1,66 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this
5
- # repository and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
-
12
- module Karafka
13
- module Pro
14
- # Extensions to the base consumer that make it more pro and fancy
15
- #
16
- # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
17
- # after each batch is processed.
18
- #
19
- # They need to be added to the consumer via `#prepend`
20
- module BaseConsumerExtensions
21
- # Pause for tops 31 years
22
- MAX_PAUSE_TIME = 1_000_000_000_000
23
-
24
- private_constant :MAX_PAUSE_TIME
25
-
26
- # Pauses processing of a given partition until we're done with the processing
27
- # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
- def on_prepare
29
- # Pause at the first message in a batch. That way in case of a crash, we will not loose
30
- # any messages
31
- pause(messages.first.offset, MAX_PAUSE_TIME) if topic.long_running_job?
32
-
33
- super
34
- end
35
-
36
- # After user code, we seek and un-pause our partition
37
- def on_consume
38
- # If anything went wrong here, we should not run any partition management as it's Karafka
39
- # core that will handle the backoff
40
- return unless super
41
-
42
- return unless topic.long_running_job?
43
-
44
- # Nothing to resume if it was revoked
45
- return if revoked?
46
-
47
- # Once processing is done, we move to the new offset based on commits
48
- seek(@seek_offset || messages.first.offset)
49
- resume
50
- end
51
-
52
- # Marks this consumer revoked state as true
53
- # This allows us for things like lrj to finish early as this state may change during lrj
54
- # execution
55
- def on_revoked
56
- @revoked = true
57
- super
58
- end
59
-
60
- # @return [Boolean] true if partition was revoked from the current consumer
61
- def revoked?
62
- @revoked || false
63
- end
64
- end
65
- end
66
- end
@@ -1,54 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this
5
- # repository and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
-
12
- module Karafka
13
- module Pro
14
- # Optimizes scheduler that takes into consideration of execution time needed to process
15
- # messages from given topics partitions. It uses the non-preemptive LJF algorithm
16
- #
17
- # This scheduler is designed to optimize execution times on jobs that perform IO operations as
18
- # when taking IO into consideration, the can achieve optimized parallel processing.
19
- #
20
- # This scheduler can also work with virtual partitions.
21
- #
22
- # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
23
- # default FIFO scheduler from the default Karafka scheduler
24
- class Scheduler < ::Karafka::Scheduler
25
- # Schedules jobs in the LJF order for consumption
26
- #
27
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
28
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
29
- #
30
- def schedule_consumption(queue, jobs_array)
31
- pt = PerformanceTracker.instance
32
-
33
- ordered = []
34
-
35
- jobs_array.each do |job|
36
- messages = job.messages
37
- message = messages.first
38
-
39
- cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
40
-
41
- ordered << [job, cost]
42
- end
43
-
44
- ordered.sort_by!(&:last)
45
- ordered.reverse!
46
- ordered.map!(&:first)
47
-
48
- ordered.each do |job|
49
- queue << job
50
- end
51
- end
52
- end
53
- end
54
- end
@@ -1,20 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # FIFO scheduler for messages coming from various topics and partitions
5
- class Scheduler
6
- # Schedules jobs in the fifo order
7
- #
8
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
9
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
10
- def schedule_consumption(queue, jobs_array)
11
- jobs_array.each do |job|
12
- queue << job
13
- end
14
- end
15
-
16
- # Both revocation and shutdown jobs can also run in fifo by default
17
- alias schedule_revocation schedule_consumption
18
- alias schedule_shutdown schedule_consumption
19
- end
20
- end