karafka 2.0.0.beta4 → 2.0.0.rc2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +18 -1
  4. data/CHANGELOG.md +30 -0
  5. data/CONTRIBUTING.md +0 -5
  6. data/Gemfile.lock +12 -42
  7. data/README.md +2 -12
  8. data/bin/benchmarks +2 -2
  9. data/bin/integrations +10 -3
  10. data/bin/{stress → stress_many} +1 -1
  11. data/bin/stress_one +13 -0
  12. data/config/errors.yml +48 -5
  13. data/docker-compose.yml +27 -18
  14. data/karafka.gemspec +2 -4
  15. data/lib/karafka/active_job/job_options_contract.rb +8 -2
  16. data/lib/karafka/active_job/routing/extensions.rb +1 -1
  17. data/lib/karafka/app.rb +2 -1
  18. data/lib/karafka/base_consumer.rb +24 -19
  19. data/lib/karafka/cli/install.rb +15 -2
  20. data/lib/karafka/cli/server.rb +4 -2
  21. data/lib/karafka/connection/client.rb +40 -17
  22. data/lib/karafka/connection/listener.rb +37 -11
  23. data/lib/karafka/connection/rebalance_manager.rb +20 -19
  24. data/lib/karafka/contracts/base.rb +2 -8
  25. data/lib/karafka/contracts/config.rb +71 -38
  26. data/lib/karafka/contracts/consumer_group.rb +25 -18
  27. data/lib/karafka/contracts/consumer_group_topic.rb +30 -16
  28. data/lib/karafka/contracts/server_cli_options.rb +18 -7
  29. data/lib/karafka/errors.rb +3 -0
  30. data/lib/karafka/helpers/colorize.rb +20 -0
  31. data/lib/karafka/pro/active_job/consumer.rb +1 -8
  32. data/lib/karafka/pro/active_job/job_options_contract.rb +10 -6
  33. data/lib/karafka/pro/base_consumer.rb +27 -21
  34. data/lib/karafka/pro/loader.rb +13 -6
  35. data/lib/karafka/pro/processing/coordinator.rb +63 -0
  36. data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
  37. data/lib/karafka/pro/processing/partitioner.rb +41 -0
  38. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  39. data/lib/karafka/pro/routing/extensions.rb +6 -0
  40. data/lib/karafka/processing/coordinator.rb +88 -0
  41. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  42. data/lib/karafka/processing/executor.rb +7 -17
  43. data/lib/karafka/processing/executors_buffer.rb +46 -15
  44. data/lib/karafka/processing/jobs/consume.rb +4 -2
  45. data/lib/karafka/processing/jobs_builder.rb +3 -2
  46. data/lib/karafka/processing/partitioner.rb +22 -0
  47. data/lib/karafka/processing/result.rb +0 -5
  48. data/lib/karafka/processing/scheduler.rb +22 -0
  49. data/lib/karafka/routing/consumer_group.rb +1 -1
  50. data/lib/karafka/routing/topic.rb +9 -0
  51. data/lib/karafka/setup/config.rb +26 -12
  52. data/lib/karafka/templates/example_consumer.rb.erb +2 -2
  53. data/lib/karafka/version.rb +1 -1
  54. data/lib/karafka.rb +0 -2
  55. data.tar.gz.sig +0 -0
  56. metadata +15 -36
  57. metadata.gz.sig +0 -0
  58. data/lib/karafka/pro/scheduler.rb +0 -54
  59. data/lib/karafka/scheduler.rb +0 -20
@@ -12,17 +12,19 @@ module Karafka
12
12
  # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
13
13
  # job
14
14
  # @param messages [Karafka::Messages::Messages] karafka messages batch
15
+ # @param coordinator [Karafka::Processing::Coordinator] processing coordinator
15
16
  # @return [Consume]
16
- def initialize(executor, messages)
17
+ def initialize(executor, messages, coordinator)
17
18
  @executor = executor
18
19
  @messages = messages
20
+ @coordinator = coordinator
19
21
  @created_at = Time.now
20
22
  super()
21
23
  end
22
24
 
23
25
  # Runs the before consumption preparations on the executor
24
26
  def before_call
25
- executor.before_consume(@messages, @created_at)
27
+ executor.before_consume(@messages, @created_at, @coordinator)
26
28
  end
27
29
 
28
30
  # Runs the given executor
@@ -7,9 +7,10 @@ module Karafka
7
7
  class JobsBuilder
8
8
  # @param executor [Karafka::Processing::Executor]
9
9
  # @param messages [Karafka::Messages::Messages] messages batch to be consumed
10
+ # @param coordinator [Karafka::Processing::Coordinator]
10
11
  # @return [Karafka::Processing::Jobs::Consume] consumption job
11
- def consume(executor, messages)
12
- Jobs::Consume.new(executor, messages)
12
+ def consume(executor, messages, coordinator)
13
+ Jobs::Consume.new(executor, messages, coordinator)
13
14
  end
14
15
 
15
16
  # @param executor [Karafka::Processing::Executor]
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Basic partitioner for work division
6
+ # It does not divide any work.
7
+ class Partitioner
8
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
9
+ def initialize(subscription_group)
10
+ @subscription_group = subscription_group
11
+ end
12
+
13
+ # @param _topic [String] topic name
14
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages
15
+ # @yieldparam [Integer] group id
16
+ # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
17
+ def call(_topic, messages)
18
+ yield(0, messages)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -10,11 +10,6 @@ module Karafka
10
10
  @success = true
11
11
  end
12
12
 
13
- # @return [Boolean]
14
- def failure?
15
- !success?
16
- end
17
-
18
13
  # @return [Boolean]
19
14
  def success?
20
15
  @success
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # FIFO scheduler for messages coming from various topics and partitions
6
+ class Scheduler
7
+ # Schedules jobs in the fifo order
8
+ #
9
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
10
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
11
+ def schedule_consumption(queue, jobs_array)
12
+ jobs_array.each do |job|
13
+ queue << job
14
+ end
15
+ end
16
+
17
+ # Both revocation and shutdown jobs can also run in fifo by default
18
+ alias schedule_revocation schedule_consumption
19
+ alias schedule_shutdown schedule_consumption
20
+ end
21
+ end
22
+ end
@@ -38,7 +38,7 @@ module Karafka
38
38
  # @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on
39
39
  # the consumer group topics
40
40
  def subscription_groups
41
- App.config.internal.subscription_groups_builder.call(topics)
41
+ App.config.internal.routing.subscription_groups_builder.call(topics)
42
42
  end
43
43
 
44
44
  # Hashed version of consumer group that can be used for validation purposes
@@ -66,6 +66,15 @@ module Karafka
66
66
  end
67
67
  end
68
68
 
69
+ # @return [Class] consumer class that we should use
70
+ # @note This is just an alias to the `#consumer` method. We however want to use it internally
71
+ # instead of referencing the `#consumer`. We use this to indicate that this method returns
72
+ # class and not an instance. In the routing we want to keep the `#consumer Consumer`
73
+ # routing syntax, but for references outside, we should use this one.
74
+ def consumer_class
75
+ consumer
76
+ end
77
+
69
78
  # @return [Boolean] true if this topic offset is handled by the end user
70
79
  def manual_offset_management?
71
80
  manual_offset_management
@@ -12,7 +12,7 @@ module Karafka
12
12
  # enough and will still keep the code simple
13
13
  # @see Karafka::Setup::Configurators::Base for more details about configurators api
14
14
  class Config
15
- extend Dry::Configurable
15
+ extend ::WaterDrop::Configurable
16
16
 
17
17
  # Defaults for kafka settings, that will be overwritten only if not present already
18
18
  KAFKA_DEFAULTS = {
@@ -62,7 +62,7 @@ module Karafka
62
62
  # options max_messages [Integer] how many messages do we want to fetch from Kafka in one go
63
63
  setting :max_messages, default: 1_000
64
64
  # option [Integer] number of milliseconds we can wait while fetching data
65
- setting :max_wait_time, default: 10_000
65
+ setting :max_wait_time, default: 5_000
66
66
  # option shutdown_timeout [Integer] the number of milliseconds after which Karafka no
67
67
  # longer waits for the consumers to stop gracefully but instead we force terminate
68
68
  # everything.
@@ -85,21 +85,31 @@ module Karafka
85
85
 
86
86
  # Namespace for internal settings that should not be modified directly
87
87
  setting :internal do
88
- # option routing_builder [Karafka::Routing::Builder] builder instance
89
- setting :routing_builder, default: Routing::Builder.new
90
88
  # option status [Karafka::Status] app status
91
89
  setting :status, default: Status.new
92
90
  # option process [Karafka::Process] process status
93
91
  # @note In the future, we need to have a single process representation for all the karafka
94
92
  # instances
95
93
  setting :process, default: Process.new
96
- # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
97
- # group builder
98
- setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
99
- # option scheduler [Class] scheduler we will be using
100
- setting :scheduler, default: Scheduler.new
101
- # option jobs_builder [Class] jobs builder we want to use
102
- setting :jobs_builder, default: Processing::JobsBuilder.new
94
+
95
+ setting :routing do
96
+ # option builder [Karafka::Routing::Builder] builder instance
97
+ setting :builder, default: Routing::Builder.new
98
+ # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
99
+ # group builder
100
+ setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
101
+ end
102
+
103
+ setting :processing do
104
+ # option scheduler [Object] scheduler we will be using
105
+ setting :scheduler, default: Processing::Scheduler.new
106
+ # option jobs_builder [Object] jobs builder we want to use
107
+ setting :jobs_builder, default: Processing::JobsBuilder.new
108
+ # option coordinator [Class] work coordinator we want to user for processing coordination
109
+ setting :coordinator_class, default: Processing::Coordinator
110
+ # option partitioner_class [Class] partitioner we use against a batch of data
111
+ setting :partitioner_class, default: Processing::Partitioner
112
+ end
103
113
 
104
114
  # Karafka components for ActiveJob
105
115
  setting :active_job do
@@ -109,10 +119,14 @@ module Karafka
109
119
  # ensuring, that extra job options defined are valid
110
120
  setting :job_options_contract, default: ActiveJob::JobOptionsContract.new
111
121
  # option consumer [Class] consumer class that should be used to consume ActiveJob data
112
- setting :consumer, default: ActiveJob::Consumer
122
+ setting :consumer_class, default: ActiveJob::Consumer
113
123
  end
114
124
  end
115
125
 
126
+ # This will load all the defaults that can be later overwritten.
127
+ # Thanks to that we have an initial state out of the box.
128
+ configure
129
+
116
130
  class << self
117
131
  # Configuring method
118
132
  # @param block [Proc] block we want to execute with the config instance
@@ -7,10 +7,10 @@ class ExampleConsumer < ApplicationConsumer
7
7
  end
8
8
 
9
9
  # Run anything upon partition being revoked
10
- # def on_revoked
10
+ # def revoked
11
11
  # end
12
12
 
13
13
  # Define here any teardown things you want when Karafka server stops
14
- # def on_shutdown
14
+ # def shutdown
15
15
  # end
16
16
  end
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.0.beta4'
6
+ VERSION = '2.0.0.rc2'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -12,8 +12,6 @@
12
12
  openssl
13
13
  base64
14
14
  date
15
- dry-configurable
16
- dry-validation
17
15
  dry/events/publisher
18
16
  dry/monitor/notifications
19
17
  zeitwerk
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.beta4
4
+ version: 2.0.0.rc2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -34,22 +34,8 @@ cert_chain:
34
34
  R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
35
35
  pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
36
36
  -----END CERTIFICATE-----
37
- date: 2022-06-20 00:00:00.000000000 Z
37
+ date: 2022-07-19 00:00:00.000000000 Z
38
38
  dependencies:
39
- - !ruby/object:Gem::Dependency
40
- name: dry-configurable
41
- requirement: !ruby/object:Gem::Requirement
42
- requirements:
43
- - - "~>"
44
- - !ruby/object:Gem::Version
45
- version: '0.13'
46
- type: :runtime
47
- prerelease: false
48
- version_requirements: !ruby/object:Gem::Requirement
49
- requirements:
50
- - - "~>"
51
- - !ruby/object:Gem::Version
52
- version: '0.13'
53
39
  - !ruby/object:Gem::Dependency
54
40
  name: dry-monitor
55
41
  requirement: !ruby/object:Gem::Requirement
@@ -64,20 +50,6 @@ dependencies:
64
50
  - - "~>"
65
51
  - !ruby/object:Gem::Version
66
52
  version: '0.5'
67
- - !ruby/object:Gem::Dependency
68
- name: dry-validation
69
- requirement: !ruby/object:Gem::Requirement
70
- requirements:
71
- - - "~>"
72
- - !ruby/object:Gem::Version
73
- version: '1.7'
74
- type: :runtime
75
- prerelease: false
76
- version_requirements: !ruby/object:Gem::Requirement
77
- requirements:
78
- - - "~>"
79
- - !ruby/object:Gem::Version
80
- version: '1.7'
81
53
  - !ruby/object:Gem::Dependency
82
54
  name: rdkafka
83
55
  requirement: !ruby/object:Gem::Requirement
@@ -112,7 +84,7 @@ dependencies:
112
84
  requirements:
113
85
  - - ">="
114
86
  - !ruby/object:Gem::Version
115
- version: 2.3.1
87
+ version: 2.3.3
116
88
  - - "<"
117
89
  - !ruby/object:Gem::Version
118
90
  version: 3.0.0
@@ -122,7 +94,7 @@ dependencies:
122
94
  requirements:
123
95
  - - ">="
124
96
  - !ruby/object:Gem::Version
125
- version: 2.3.1
97
+ version: 2.3.3
126
98
  - - "<"
127
99
  - !ruby/object:Gem::Version
128
100
  version: 3.0.0
@@ -172,7 +144,8 @@ files:
172
144
  - bin/integrations
173
145
  - bin/karafka
174
146
  - bin/scenario
175
- - bin/stress
147
+ - bin/stress_many
148
+ - bin/stress_one
176
149
  - bin/wait_for_kafka
177
150
  - certs/karafka-pro.pem
178
151
  - certs/mensfeld.pem
@@ -211,6 +184,7 @@ files:
211
184
  - lib/karafka/env.rb
212
185
  - lib/karafka/errors.rb
213
186
  - lib/karafka/helpers/async.rb
187
+ - lib/karafka/helpers/colorize.rb
214
188
  - lib/karafka/helpers/multi_delegator.rb
215
189
  - lib/karafka/instrumentation.rb
216
190
  - lib/karafka/instrumentation/callbacks/error.rb
@@ -236,11 +210,15 @@ files:
236
210
  - lib/karafka/pro/base_consumer.rb
237
211
  - lib/karafka/pro/loader.rb
238
212
  - lib/karafka/pro/performance_tracker.rb
213
+ - lib/karafka/pro/processing/coordinator.rb
239
214
  - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
240
215
  - lib/karafka/pro/processing/jobs_builder.rb
216
+ - lib/karafka/pro/processing/partitioner.rb
217
+ - lib/karafka/pro/processing/scheduler.rb
241
218
  - lib/karafka/pro/routing/extensions.rb
242
- - lib/karafka/pro/scheduler.rb
243
219
  - lib/karafka/process.rb
220
+ - lib/karafka/processing/coordinator.rb
221
+ - lib/karafka/processing/coordinators_buffer.rb
244
222
  - lib/karafka/processing/executor.rb
245
223
  - lib/karafka/processing/executors_buffer.rb
246
224
  - lib/karafka/processing/jobs/base.rb
@@ -249,7 +227,9 @@ files:
249
227
  - lib/karafka/processing/jobs/shutdown.rb
250
228
  - lib/karafka/processing/jobs_builder.rb
251
229
  - lib/karafka/processing/jobs_queue.rb
230
+ - lib/karafka/processing/partitioner.rb
252
231
  - lib/karafka/processing/result.rb
232
+ - lib/karafka/processing/scheduler.rb
253
233
  - lib/karafka/processing/worker.rb
254
234
  - lib/karafka/processing/workers_batch.rb
255
235
  - lib/karafka/railtie.rb
@@ -263,7 +243,6 @@ files:
263
243
  - lib/karafka/routing/topic.rb
264
244
  - lib/karafka/routing/topics.rb
265
245
  - lib/karafka/runner.rb
266
- - lib/karafka/scheduler.rb
267
246
  - lib/karafka/serialization/json/deserializer.rb
268
247
  - lib/karafka/server.rb
269
248
  - lib/karafka/setup/config.rb
@@ -302,5 +281,5 @@ requirements: []
302
281
  rubygems_version: 3.3.7
303
282
  signing_key:
304
283
  specification_version: 4
305
- summary: Ruby based framework for working with Apache Kafka
284
+ summary: Ruby framework for working with Apache Kafka
306
285
  test_files: []
metadata.gz.sig CHANGED
Binary file
@@ -1,54 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this
5
- # repository and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
-
12
- module Karafka
13
- module Pro
14
- # Optimizes scheduler that takes into consideration of execution time needed to process
15
- # messages from given topics partitions. It uses the non-preemptive LJF algorithm
16
- #
17
- # This scheduler is designed to optimize execution times on jobs that perform IO operations as
18
- # when taking IO into consideration, the can achieve optimized parallel processing.
19
- #
20
- # This scheduler can also work with virtual partitions.
21
- #
22
- # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
23
- # default FIFO scheduler from the default Karafka scheduler
24
- class Scheduler < ::Karafka::Scheduler
25
- # Schedules jobs in the LJF order for consumption
26
- #
27
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
28
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
29
- #
30
- def schedule_consumption(queue, jobs_array)
31
- pt = PerformanceTracker.instance
32
-
33
- ordered = []
34
-
35
- jobs_array.each do |job|
36
- messages = job.messages
37
- message = messages.first
38
-
39
- cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
40
-
41
- ordered << [job, cost]
42
- end
43
-
44
- ordered.sort_by!(&:last)
45
- ordered.reverse!
46
- ordered.map!(&:first)
47
-
48
- ordered.each do |job|
49
- queue << job
50
- end
51
- end
52
- end
53
- end
54
- end
@@ -1,20 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # FIFO scheduler for messages coming from various topics and partitions
5
- class Scheduler
6
- # Schedules jobs in the fifo order
7
- #
8
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
9
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
10
- def schedule_consumption(queue, jobs_array)
11
- jobs_array.each do |job|
12
- queue << job
13
- end
14
- end
15
-
16
- # Both revocation and shutdown jobs can also run in fifo by default
17
- alias schedule_revocation schedule_consumption
18
- alias schedule_shutdown schedule_consumption
19
- end
20
- end