karafka 2.0.0.beta4 → 2.0.0.rc2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +18 -1
  4. data/CHANGELOG.md +30 -0
  5. data/CONTRIBUTING.md +0 -5
  6. data/Gemfile.lock +12 -42
  7. data/README.md +2 -12
  8. data/bin/benchmarks +2 -2
  9. data/bin/integrations +10 -3
  10. data/bin/{stress → stress_many} +1 -1
  11. data/bin/stress_one +13 -0
  12. data/config/errors.yml +48 -5
  13. data/docker-compose.yml +27 -18
  14. data/karafka.gemspec +2 -4
  15. data/lib/karafka/active_job/job_options_contract.rb +8 -2
  16. data/lib/karafka/active_job/routing/extensions.rb +1 -1
  17. data/lib/karafka/app.rb +2 -1
  18. data/lib/karafka/base_consumer.rb +24 -19
  19. data/lib/karafka/cli/install.rb +15 -2
  20. data/lib/karafka/cli/server.rb +4 -2
  21. data/lib/karafka/connection/client.rb +40 -17
  22. data/lib/karafka/connection/listener.rb +37 -11
  23. data/lib/karafka/connection/rebalance_manager.rb +20 -19
  24. data/lib/karafka/contracts/base.rb +2 -8
  25. data/lib/karafka/contracts/config.rb +71 -38
  26. data/lib/karafka/contracts/consumer_group.rb +25 -18
  27. data/lib/karafka/contracts/consumer_group_topic.rb +30 -16
  28. data/lib/karafka/contracts/server_cli_options.rb +18 -7
  29. data/lib/karafka/errors.rb +3 -0
  30. data/lib/karafka/helpers/colorize.rb +20 -0
  31. data/lib/karafka/pro/active_job/consumer.rb +1 -8
  32. data/lib/karafka/pro/active_job/job_options_contract.rb +10 -6
  33. data/lib/karafka/pro/base_consumer.rb +27 -21
  34. data/lib/karafka/pro/loader.rb +13 -6
  35. data/lib/karafka/pro/processing/coordinator.rb +63 -0
  36. data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
  37. data/lib/karafka/pro/processing/partitioner.rb +41 -0
  38. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  39. data/lib/karafka/pro/routing/extensions.rb +6 -0
  40. data/lib/karafka/processing/coordinator.rb +88 -0
  41. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  42. data/lib/karafka/processing/executor.rb +7 -17
  43. data/lib/karafka/processing/executors_buffer.rb +46 -15
  44. data/lib/karafka/processing/jobs/consume.rb +4 -2
  45. data/lib/karafka/processing/jobs_builder.rb +3 -2
  46. data/lib/karafka/processing/partitioner.rb +22 -0
  47. data/lib/karafka/processing/result.rb +0 -5
  48. data/lib/karafka/processing/scheduler.rb +22 -0
  49. data/lib/karafka/routing/consumer_group.rb +1 -1
  50. data/lib/karafka/routing/topic.rb +9 -0
  51. data/lib/karafka/setup/config.rb +26 -12
  52. data/lib/karafka/templates/example_consumer.rb.erb +2 -2
  53. data/lib/karafka/version.rb +1 -1
  54. data/lib/karafka.rb +0 -2
  55. data.tar.gz.sig +0 -0
  56. metadata +15 -36
  57. metadata.gz.sig +0 -0
  58. data/lib/karafka/pro/scheduler.rb +0 -54
  59. data/lib/karafka/scheduler.rb +0 -20
@@ -12,17 +12,19 @@ module Karafka
12
12
  # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
13
13
  # job
14
14
  # @param messages [Karafka::Messages::Messages] karafka messages batch
15
+ # @param coordinator [Karafka::Processing::Coordinator] processing coordinator
15
16
  # @return [Consume]
16
- def initialize(executor, messages)
17
+ def initialize(executor, messages, coordinator)
17
18
  @executor = executor
18
19
  @messages = messages
20
+ @coordinator = coordinator
19
21
  @created_at = Time.now
20
22
  super()
21
23
  end
22
24
 
23
25
  # Runs the before consumption preparations on the executor
24
26
  def before_call
25
- executor.before_consume(@messages, @created_at)
27
+ executor.before_consume(@messages, @created_at, @coordinator)
26
28
  end
27
29
 
28
30
  # Runs the given executor
@@ -7,9 +7,10 @@ module Karafka
7
7
  class JobsBuilder
8
8
  # @param executor [Karafka::Processing::Executor]
9
9
  # @param messages [Karafka::Messages::Messages] messages batch to be consumed
10
+ # @param coordinator [Karafka::Processing::Coordinator]
10
11
  # @return [Karafka::Processing::Jobs::Consume] consumption job
11
- def consume(executor, messages)
12
- Jobs::Consume.new(executor, messages)
12
+ def consume(executor, messages, coordinator)
13
+ Jobs::Consume.new(executor, messages, coordinator)
13
14
  end
14
15
 
15
16
  # @param executor [Karafka::Processing::Executor]
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Basic partitioner for work division
6
+ # It does not divide any work.
7
+ class Partitioner
8
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
9
+ def initialize(subscription_group)
10
+ @subscription_group = subscription_group
11
+ end
12
+
13
+ # @param _topic [String] topic name
14
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages
15
+ # @yieldparam [Integer] group id
16
+ # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
17
+ def call(_topic, messages)
18
+ yield(0, messages)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -10,11 +10,6 @@ module Karafka
10
10
  @success = true
11
11
  end
12
12
 
13
- # @return [Boolean]
14
- def failure?
15
- !success?
16
- end
17
-
18
13
  # @return [Boolean]
19
14
  def success?
20
15
  @success
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # FIFO scheduler for messages coming from various topics and partitions
6
+ class Scheduler
7
+ # Schedules jobs in the fifo order
8
+ #
9
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
10
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
11
+ def schedule_consumption(queue, jobs_array)
12
+ jobs_array.each do |job|
13
+ queue << job
14
+ end
15
+ end
16
+
17
+ # Both revocation and shutdown jobs can also run in fifo by default
18
+ alias schedule_revocation schedule_consumption
19
+ alias schedule_shutdown schedule_consumption
20
+ end
21
+ end
22
+ end
@@ -38,7 +38,7 @@ module Karafka
38
38
  # @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on
39
39
  # the consumer group topics
40
40
  def subscription_groups
41
- App.config.internal.subscription_groups_builder.call(topics)
41
+ App.config.internal.routing.subscription_groups_builder.call(topics)
42
42
  end
43
43
 
44
44
  # Hashed version of consumer group that can be used for validation purposes
@@ -66,6 +66,15 @@ module Karafka
66
66
  end
67
67
  end
68
68
 
69
+ # @return [Class] consumer class that we should use
70
+ # @note This is just an alias to the `#consumer` method. We however want to use it internally
71
+ # instead of referencing the `#consumer`. We use this to indicate that this method returns
72
+ # class and not an instance. In the routing we want to keep the `#consumer Consumer`
73
+ # routing syntax, but for references outside, we should use this one.
74
+ def consumer_class
75
+ consumer
76
+ end
77
+
69
78
  # @return [Boolean] true if this topic offset is handled by the end user
70
79
  def manual_offset_management?
71
80
  manual_offset_management
@@ -12,7 +12,7 @@ module Karafka
12
12
  # enough and will still keep the code simple
13
13
  # @see Karafka::Setup::Configurators::Base for more details about configurators api
14
14
  class Config
15
- extend Dry::Configurable
15
+ extend ::WaterDrop::Configurable
16
16
 
17
17
  # Defaults for kafka settings, that will be overwritten only if not present already
18
18
  KAFKA_DEFAULTS = {
@@ -62,7 +62,7 @@ module Karafka
62
62
  # options max_messages [Integer] how many messages do we want to fetch from Kafka in one go
63
63
  setting :max_messages, default: 1_000
64
64
  # option [Integer] number of milliseconds we can wait while fetching data
65
- setting :max_wait_time, default: 10_000
65
+ setting :max_wait_time, default: 5_000
66
66
  # option shutdown_timeout [Integer] the number of milliseconds after which Karafka no
67
67
  # longer waits for the consumers to stop gracefully but instead we force terminate
68
68
  # everything.
@@ -85,21 +85,31 @@ module Karafka
85
85
 
86
86
  # Namespace for internal settings that should not be modified directly
87
87
  setting :internal do
88
- # option routing_builder [Karafka::Routing::Builder] builder instance
89
- setting :routing_builder, default: Routing::Builder.new
90
88
  # option status [Karafka::Status] app status
91
89
  setting :status, default: Status.new
92
90
  # option process [Karafka::Process] process status
93
91
  # @note In the future, we need to have a single process representation for all the karafka
94
92
  # instances
95
93
  setting :process, default: Process.new
96
- # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
97
- # group builder
98
- setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
99
- # option scheduler [Class] scheduler we will be using
100
- setting :scheduler, default: Scheduler.new
101
- # option jobs_builder [Class] jobs builder we want to use
102
- setting :jobs_builder, default: Processing::JobsBuilder.new
94
+
95
+ setting :routing do
96
+ # option builder [Karafka::Routing::Builder] builder instance
97
+ setting :builder, default: Routing::Builder.new
98
+ # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
99
+ # group builder
100
+ setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
101
+ end
102
+
103
+ setting :processing do
104
+ # option scheduler [Object] scheduler we will be using
105
+ setting :scheduler, default: Processing::Scheduler.new
106
+ # option jobs_builder [Object] jobs builder we want to use
107
+ setting :jobs_builder, default: Processing::JobsBuilder.new
108
+ # option coordinator [Class] work coordinator we want to user for processing coordination
109
+ setting :coordinator_class, default: Processing::Coordinator
110
+ # option partitioner_class [Class] partitioner we use against a batch of data
111
+ setting :partitioner_class, default: Processing::Partitioner
112
+ end
103
113
 
104
114
  # Karafka components for ActiveJob
105
115
  setting :active_job do
@@ -109,10 +119,14 @@ module Karafka
109
119
  # ensuring, that extra job options defined are valid
110
120
  setting :job_options_contract, default: ActiveJob::JobOptionsContract.new
111
121
  # option consumer [Class] consumer class that should be used to consume ActiveJob data
112
- setting :consumer, default: ActiveJob::Consumer
122
+ setting :consumer_class, default: ActiveJob::Consumer
113
123
  end
114
124
  end
115
125
 
126
+ # This will load all the defaults that can be later overwritten.
127
+ # Thanks to that we have an initial state out of the box.
128
+ configure
129
+
116
130
  class << self
117
131
  # Configuring method
118
132
  # @param block [Proc] block we want to execute with the config instance
@@ -7,10 +7,10 @@ class ExampleConsumer < ApplicationConsumer
7
7
  end
8
8
 
9
9
  # Run anything upon partition being revoked
10
- # def on_revoked
10
+ # def revoked
11
11
  # end
12
12
 
13
13
  # Define here any teardown things you want when Karafka server stops
14
- # def on_shutdown
14
+ # def shutdown
15
15
  # end
16
16
  end
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.0.beta4'
6
+ VERSION = '2.0.0.rc2'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -12,8 +12,6 @@
12
12
  openssl
13
13
  base64
14
14
  date
15
- dry-configurable
16
- dry-validation
17
15
  dry/events/publisher
18
16
  dry/monitor/notifications
19
17
  zeitwerk
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.beta4
4
+ version: 2.0.0.rc2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -34,22 +34,8 @@ cert_chain:
34
34
  R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
35
35
  pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
36
36
  -----END CERTIFICATE-----
37
- date: 2022-06-20 00:00:00.000000000 Z
37
+ date: 2022-07-19 00:00:00.000000000 Z
38
38
  dependencies:
39
- - !ruby/object:Gem::Dependency
40
- name: dry-configurable
41
- requirement: !ruby/object:Gem::Requirement
42
- requirements:
43
- - - "~>"
44
- - !ruby/object:Gem::Version
45
- version: '0.13'
46
- type: :runtime
47
- prerelease: false
48
- version_requirements: !ruby/object:Gem::Requirement
49
- requirements:
50
- - - "~>"
51
- - !ruby/object:Gem::Version
52
- version: '0.13'
53
39
  - !ruby/object:Gem::Dependency
54
40
  name: dry-monitor
55
41
  requirement: !ruby/object:Gem::Requirement
@@ -64,20 +50,6 @@ dependencies:
64
50
  - - "~>"
65
51
  - !ruby/object:Gem::Version
66
52
  version: '0.5'
67
- - !ruby/object:Gem::Dependency
68
- name: dry-validation
69
- requirement: !ruby/object:Gem::Requirement
70
- requirements:
71
- - - "~>"
72
- - !ruby/object:Gem::Version
73
- version: '1.7'
74
- type: :runtime
75
- prerelease: false
76
- version_requirements: !ruby/object:Gem::Requirement
77
- requirements:
78
- - - "~>"
79
- - !ruby/object:Gem::Version
80
- version: '1.7'
81
53
  - !ruby/object:Gem::Dependency
82
54
  name: rdkafka
83
55
  requirement: !ruby/object:Gem::Requirement
@@ -112,7 +84,7 @@ dependencies:
112
84
  requirements:
113
85
  - - ">="
114
86
  - !ruby/object:Gem::Version
115
- version: 2.3.1
87
+ version: 2.3.3
116
88
  - - "<"
117
89
  - !ruby/object:Gem::Version
118
90
  version: 3.0.0
@@ -122,7 +94,7 @@ dependencies:
122
94
  requirements:
123
95
  - - ">="
124
96
  - !ruby/object:Gem::Version
125
- version: 2.3.1
97
+ version: 2.3.3
126
98
  - - "<"
127
99
  - !ruby/object:Gem::Version
128
100
  version: 3.0.0
@@ -172,7 +144,8 @@ files:
172
144
  - bin/integrations
173
145
  - bin/karafka
174
146
  - bin/scenario
175
- - bin/stress
147
+ - bin/stress_many
148
+ - bin/stress_one
176
149
  - bin/wait_for_kafka
177
150
  - certs/karafka-pro.pem
178
151
  - certs/mensfeld.pem
@@ -211,6 +184,7 @@ files:
211
184
  - lib/karafka/env.rb
212
185
  - lib/karafka/errors.rb
213
186
  - lib/karafka/helpers/async.rb
187
+ - lib/karafka/helpers/colorize.rb
214
188
  - lib/karafka/helpers/multi_delegator.rb
215
189
  - lib/karafka/instrumentation.rb
216
190
  - lib/karafka/instrumentation/callbacks/error.rb
@@ -236,11 +210,15 @@ files:
236
210
  - lib/karafka/pro/base_consumer.rb
237
211
  - lib/karafka/pro/loader.rb
238
212
  - lib/karafka/pro/performance_tracker.rb
213
+ - lib/karafka/pro/processing/coordinator.rb
239
214
  - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
240
215
  - lib/karafka/pro/processing/jobs_builder.rb
216
+ - lib/karafka/pro/processing/partitioner.rb
217
+ - lib/karafka/pro/processing/scheduler.rb
241
218
  - lib/karafka/pro/routing/extensions.rb
242
- - lib/karafka/pro/scheduler.rb
243
219
  - lib/karafka/process.rb
220
+ - lib/karafka/processing/coordinator.rb
221
+ - lib/karafka/processing/coordinators_buffer.rb
244
222
  - lib/karafka/processing/executor.rb
245
223
  - lib/karafka/processing/executors_buffer.rb
246
224
  - lib/karafka/processing/jobs/base.rb
@@ -249,7 +227,9 @@ files:
249
227
  - lib/karafka/processing/jobs/shutdown.rb
250
228
  - lib/karafka/processing/jobs_builder.rb
251
229
  - lib/karafka/processing/jobs_queue.rb
230
+ - lib/karafka/processing/partitioner.rb
252
231
  - lib/karafka/processing/result.rb
232
+ - lib/karafka/processing/scheduler.rb
253
233
  - lib/karafka/processing/worker.rb
254
234
  - lib/karafka/processing/workers_batch.rb
255
235
  - lib/karafka/railtie.rb
@@ -263,7 +243,6 @@ files:
263
243
  - lib/karafka/routing/topic.rb
264
244
  - lib/karafka/routing/topics.rb
265
245
  - lib/karafka/runner.rb
266
- - lib/karafka/scheduler.rb
267
246
  - lib/karafka/serialization/json/deserializer.rb
268
247
  - lib/karafka/server.rb
269
248
  - lib/karafka/setup/config.rb
@@ -302,5 +281,5 @@ requirements: []
302
281
  rubygems_version: 3.3.7
303
282
  signing_key:
304
283
  specification_version: 4
305
- summary: Ruby based framework for working with Apache Kafka
284
+ summary: Ruby framework for working with Apache Kafka
306
285
  test_files: []
metadata.gz.sig CHANGED
Binary file
@@ -1,54 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this
5
- # repository and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
-
12
- module Karafka
13
- module Pro
14
- # Optimizes scheduler that takes into consideration of execution time needed to process
15
- # messages from given topics partitions. It uses the non-preemptive LJF algorithm
16
- #
17
- # This scheduler is designed to optimize execution times on jobs that perform IO operations as
18
- # when taking IO into consideration, the can achieve optimized parallel processing.
19
- #
20
- # This scheduler can also work with virtual partitions.
21
- #
22
- # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
23
- # default FIFO scheduler from the default Karafka scheduler
24
- class Scheduler < ::Karafka::Scheduler
25
- # Schedules jobs in the LJF order for consumption
26
- #
27
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
28
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
29
- #
30
- def schedule_consumption(queue, jobs_array)
31
- pt = PerformanceTracker.instance
32
-
33
- ordered = []
34
-
35
- jobs_array.each do |job|
36
- messages = job.messages
37
- message = messages.first
38
-
39
- cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
40
-
41
- ordered << [job, cost]
42
- end
43
-
44
- ordered.sort_by!(&:last)
45
- ordered.reverse!
46
- ordered.map!(&:first)
47
-
48
- ordered.each do |job|
49
- queue << job
50
- end
51
- end
52
- end
53
- end
54
- end
@@ -1,20 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # FIFO scheduler for messages coming from various topics and partitions
5
- class Scheduler
6
- # Schedules jobs in the fifo order
7
- #
8
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
9
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
10
- def schedule_consumption(queue, jobs_array)
11
- jobs_array.each do |job|
12
- queue << job
13
- end
14
- end
15
-
16
- # Both revocation and shutdown jobs can also run in fifo by default
17
- alias schedule_revocation schedule_consumption
18
- alias schedule_shutdown schedule_consumption
19
- end
20
- end