karafka 2.0.0.alpha6 → 2.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 862df94b26c24809f82e07f71c39433b90ef08f68f053a004a87aa466b872dac
4
- data.tar.gz: 2c533cbd6c271fe282f59c2030d9cc885555242bb8bc9316d0264a5ccfd694a0
3
+ metadata.gz: f108cb4288d0ed0510381f51c77d49e052b947f6180c9b9c0b06e0ac2b599894
4
+ data.tar.gz: 3d79066d0107c08f450ca9f4c3b5c4a39aae497836c80bf8380c65f1406b82c0
5
5
  SHA512:
6
- metadata.gz: 8ce3720e535d65f121bcbfd957286cbc41404f2aff2751622183367862db55f8231c286d5bbede4a18649eafcd085952a7fc8e7569a74dbd3633c9db906114e5
7
- data.tar.gz: e77819ccd2be263b02958fcee71ce9228c93c7dee62fabbea61711fbbdbffa997bb9a85a7f668cf8bb596b7a98a773ace560ad5075c46e70f20cd302b354b0fe
6
+ metadata.gz: 4aae257010c992c59ce4b01ead54ff2cfd4e8ccd8cbe6b52214b3cedf8f879690e0d577f2b41f44b1ab6888d7e27bbc92f3ba4a69e8b127687fb4c43bff51fbc
7
+ data.tar.gz: f65e425cb84152d20a055bdb9a94fd98280597cdf5e431337cb8604040534cacbfdd03efd6dc23b86c9ecf25721c860bd55ca75ad3f98e4c66136a88c1efc4e7
checksums.yaml.gz.sig CHANGED
Binary file
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.1.0
1
+ 3.1.2
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.0-beta1 (2022-05-22)
4
+ - Update the jobs queue blocking engine and allow for non-blocking jobs execution
5
+ - Provide `#prepared` hook that always runs before the fetching loop is unblocked
6
+ - [Pro] Introduce performance tracker for scheduling optimizer
7
+ - Provide ability to pause (`#pause`) and resume (`#resume`) given partitions from the consumers
8
+ - Small integration specs refactoring + specs for pausing scenarios
9
+
3
10
  ## 2.0.0-alpha6 (2022-04-17)
4
11
  - Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
5
12
  - Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
@@ -26,12 +33,12 @@
26
33
 
27
34
  ## 2.0.0-alpha2 (2022-02-19)
28
35
  - Require `kafka` keys to be symbols
29
- - Added ActiveJob Pro adapter
36
+ - [Pro] Added ActiveJob Pro adapter
30
37
  - Small updates to the license and docs
31
38
 
32
39
  ## 2.0.0-alpha1 (2022-01-30)
33
40
  - Change license to `LGPL-3.0`
34
- - Introduce a Pro subscription
41
+ - [Pro] Introduce a Pro subscription
35
42
  - Switch from `ruby-kafka` to `librdkafka` as an underlying driver
36
43
  - Introduce fully automatic integration tests that go through the whole server lifecycle
37
44
  - Integrate WaterDrop tightly with autoconfiguration inheritance and an option to redefine it
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.0.alpha6)
4
+ karafka (2.0.0.beta1)
5
5
  dry-configurable (~> 0.13)
6
6
  dry-monitor (~> 0.5)
7
7
  dry-validation (~> 1.7)
@@ -13,10 +13,10 @@ PATH
13
13
  GEM
14
14
  remote: https://rubygems.org/
15
15
  specs:
16
- activejob (7.0.2.3)
17
- activesupport (= 7.0.2.3)
16
+ activejob (7.0.3)
17
+ activesupport (= 7.0.3)
18
18
  globalid (>= 0.3.6)
19
- activesupport (7.0.2.3)
19
+ activesupport (7.0.3)
20
20
  concurrent-ruby (~> 1.0, >= 1.0.2)
21
21
  i18n (>= 1.6, < 2)
22
22
  minitest (>= 5.1)
@@ -25,7 +25,7 @@ GEM
25
25
  concurrent-ruby (1.1.10)
26
26
  diff-lcs (1.5.0)
27
27
  docile (1.4.0)
28
- dry-configurable (0.14.0)
28
+ dry-configurable (0.15.0)
29
29
  concurrent-ruby (~> 1.0)
30
30
  dry-core (~> 0.6)
31
31
  dry-container (0.9.0)
@@ -121,4 +121,4 @@ DEPENDENCIES
121
121
  simplecov
122
122
 
123
123
  BUNDLED WITH
124
- 2.3.10
124
+ 2.3.11
data/docker-compose.yml CHANGED
@@ -16,6 +16,7 @@ services:
16
16
  KAFKA_CREATE_TOPICS:
17
17
  "integrations_0_02:2:1,\
18
18
  integrations_1_02:2:1,\
19
+ integrations_2_02:2:1,\
19
20
  integrations_0_03:3:1,\
20
21
  integrations_1_03:3:1,\
21
22
  integrations_2_03:3:1,\
@@ -10,8 +10,8 @@ module Karafka
10
10
  attr_accessor :messages
11
11
  # @return [Karafka::Connection::Client] kafka connection client
12
12
  attr_accessor :client
13
- # @return [Karafka::TimeTrackers::Pause] current topic partition pause
14
- attr_accessor :pause
13
+ # @return [Karafka::TimeTrackers::Pause] current topic partition pause tracker
14
+ attr_accessor :pause_tracker
15
15
  # @return [Waterdrop::Producer] producer instance
16
16
  attr_accessor :producer
17
17
 
@@ -24,7 +24,7 @@ module Karafka
24
24
  Karafka.monitor.instrument('consumer.consumed', caller: self) do
25
25
  consume
26
26
 
27
- pause.reset
27
+ pause_tracker.reset
28
28
 
29
29
  # Mark as consumed only if manual offset management is not on
30
30
  return if topic.manual_offset_management
@@ -40,8 +40,8 @@ module Karafka
40
40
  caller: self,
41
41
  type: 'consumer.consume.error'
42
42
  )
43
- client.pause(topic.name, messages.first.partition, @seek_offset || messages.first.offset)
44
- pause.pause
43
+
44
+ pause(@seek_offset || messages.first.offset)
45
45
  end
46
46
 
47
47
  # Trigger method for running on shutdown.
@@ -76,8 +76,31 @@ module Karafka
76
76
  )
77
77
  end
78
78
 
79
+ # Can be used to run preparation code
80
+ #
81
+ # @private
82
+ # @note This should not be used by the end users as it is part of the lifecycle of things but
83
+ # not as part of the public api. This can act as a hook when creating non-blocking
84
+ # consumers and doing other advanced stuff
85
+ def on_prepared
86
+ Karafka.monitor.instrument('consumer.prepared', caller: self) do
87
+ prepared
88
+ end
89
+ rescue StandardError => e
90
+ Karafka.monitor.instrument(
91
+ 'error.occurred',
92
+ error: e,
93
+ caller: self,
94
+ type: 'consumer.prepared.error'
95
+ )
96
+ end
97
+
79
98
  private
80
99
 
100
+ # Method that gets called in the blocking flow allowing to setup any type of resources or to
101
+ # send additional commands to Kafka before the proper execution starts.
102
+ def prepared; end
103
+
81
104
  # Method that will perform business logic and on data received from Kafka (it will consume
82
105
  # the data)
83
106
  # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
@@ -97,6 +120,10 @@ module Karafka
97
120
  # Marks message as consumed in an async way.
98
121
  #
99
122
  # @param message [Messages::Message] last successfully processed message.
123
+ # @note We keep track of this offset in case we would mark as consumed and got error when
124
+ # processing another message. In case like this we do not pause on the message we've already
125
+ # processed but rather at the next one. This applies to both sync and async versions of this
126
+ # method.
100
127
  def mark_as_consumed(message)
101
128
  client.mark_as_consumed(message)
102
129
  @seek_offset = message.offset + 1
@@ -110,6 +137,32 @@ module Karafka
110
137
  @seek_offset = message.offset + 1
111
138
  end
112
139
 
140
+ # Pauses processing on a given offset for the current topic partition
141
+ #
142
+ # After given partition is resumed, it will continue processing from the given offset
143
+ # @param offset [Integer] offset from which we want to restart the processing
144
+ # @param timeout [Integer, nil] how long in milliseconds do we want to pause or nil to use the
145
+ # default exponential pausing strategy defined for retries
146
+ def pause(offset, timeout = nil)
147
+ client.pause(
148
+ messages.metadata.topic,
149
+ messages.metadata.partition,
150
+ offset
151
+ )
152
+
153
+ timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
154
+ end
155
+
156
+ # Resumes processing of the current topic partition
157
+ def resume
158
+ client.resume(
159
+ messages.metadata.topic,
160
+ messages.metadata.partition
161
+ )
162
+
163
+ pause_tracker.expire
164
+ end
165
+
113
166
  # Seeks in the context of current topic and partition
114
167
  #
115
168
  # @param offset [Integer] offset where we want to seek
@@ -15,6 +15,8 @@ module Karafka
15
15
  @pauses_manager = PausesManager.new
16
16
  @client = Client.new(@subscription_group)
17
17
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
18
+ # We reference scheduler here as it is much faster than fetching this each time
19
+ @scheduler = ::Karafka::App.config.internal.scheduler
18
20
  end
19
21
 
20
22
  # Runs the main listener fetch loop.
@@ -66,9 +68,9 @@ module Karafka
66
68
  # distributing consuming jobs as upon revoking, we might get assigned to the same
67
69
  # partitions, thus getting their jobs. The revoking jobs need to finish before
68
70
  # appropriate consumers are taken down and re-created
69
- wait(@subscription_group) if distribute_revoke_lost_partitions_jobs
71
+ wait(@subscription_group) if schedule_revoke_lost_partitions_jobs
70
72
 
71
- distribute_partitions_jobs(messages_buffer)
73
+ schedule_partitions_jobs(messages_buffer)
72
74
 
73
75
  # We wait only on jobs from our subscription group. Other groups are independent.
74
76
  wait(@subscription_group)
@@ -103,15 +105,17 @@ module Karafka
103
105
 
104
106
  # Enqueues revoking jobs for partitions that were taken away from the running process.
105
107
  # @return [Boolean] was there anything to revoke
106
- def distribute_revoke_lost_partitions_jobs
108
+ # @note We do not use scheduler here as those jobs are not meant to be order optimized in
109
+ # any way. Since they operate occasionally it is irrelevant.
110
+ def schedule_revoke_lost_partitions_jobs
107
111
  revoked_partitions = @client.rebalance_manager.revoked_partitions
108
112
 
109
113
  return false if revoked_partitions.empty?
110
114
 
111
115
  revoked_partitions.each do |topic, partitions|
112
116
  partitions.each do |partition|
113
- pause = @pauses_manager.fetch(topic, partition)
114
- executor = @executors.fetch(topic, partition, pause)
117
+ pause_tracker = @pauses_manager.fetch(topic, partition)
118
+ executor = @executors.fetch(topic, partition, pause_tracker)
115
119
  @jobs_queue << Processing::Jobs::Revoked.new(executor)
116
120
  end
117
121
  end
@@ -122,8 +126,8 @@ module Karafka
122
126
  # Takes the messages per topic partition and enqueues processing jobs in threads.
123
127
  #
124
128
  # @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
125
- def distribute_partitions_jobs(messages_buffer)
126
- messages_buffer.each do |topic, partition, messages|
129
+ def schedule_partitions_jobs(messages_buffer)
130
+ @scheduler.call(messages_buffer) do |topic, partition, messages|
127
131
  pause = @pauses_manager.fetch(topic, partition)
128
132
 
129
133
  next if pause.paused?
@@ -10,6 +10,10 @@ module Karafka
10
10
  class MessagesBuffer
11
11
  attr_reader :size
12
12
 
13
+ extend Forwardable
14
+
15
+ def_delegators :@groups, :each
16
+
13
17
  # @return [Karafka::Connection::MessagesBuffer] buffer instance
14
18
  def initialize
15
19
  @size = 0
@@ -20,19 +24,6 @@ module Karafka
20
24
  end
21
25
  end
22
26
 
23
- # Iterates over aggregated data providing messages per topic partition.
24
- #
25
- # @yieldparam [String] topic name
26
- # @yieldparam [Integer] partition number
27
- # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
28
- def each
29
- @groups.each do |topic, partitions|
30
- partitions.each do |partition, messages|
31
- yield(topic, partition, messages)
32
- end
33
- end
34
- end
35
-
36
27
  # Adds a message to the buffer.
37
28
  #
38
29
  # @param message [Rdkafka::Consumer::Message] raw rdkafka message
@@ -12,11 +12,11 @@ module Karafka
12
12
  end
13
13
  end
14
14
 
15
- # Creates or fetches pause of a given topic partition.
15
+ # Creates or fetches pause tracker of a given topic partition.
16
16
  #
17
17
  # @param topic [String] topic name
18
18
  # @param partition [Integer] partition number
19
- # @return [Karafka::TimeTrackers::Pause] pause instance
19
+ # @return [Karafka::TimeTrackers::Pause] pause tracker instance
20
20
  def fetch(topic, partition)
21
21
  @pauses[topic][partition] ||= TimeTrackers::Pause.new(
22
22
  timeout: Karafka::App.config.pause_timeout,
@@ -32,6 +32,7 @@ module Karafka
32
32
  required(:routing_builder)
33
33
  required(:status)
34
34
  required(:process)
35
+ required(:scheduler)
35
36
  required(:subscription_groups_builder)
36
37
  end
37
38
  end
@@ -22,6 +22,7 @@ module Karafka
22
22
  app.stopping
23
23
  app.stopped
24
24
 
25
+ consumer.prepared
25
26
  consumer.consumed
26
27
  consumer.revoked
27
28
  consumer.shutdown
@@ -1,18 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this repository
5
- # and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
-
12
3
  module Karafka
13
4
  module Pro
14
5
  # Karafka Pro ActiveJob components
15
6
  module ActiveJob
7
+ # This Karafka component is a Pro component.
8
+ # All of the commercial components are present in the lib/karafka/pro directory of this
9
+ # repository and their usage requires commercial license agreement.
10
+ #
11
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
12
+ #
13
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
14
+ # of your code to Maciej Mensfeld.
15
+
16
16
  # Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
17
17
  # and that allows to inject additional options into the producer, effectively allowing for a
18
18
  # much better and more granular control over the dispatch and consumption process.
@@ -1,17 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this repository
5
- # and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
-
12
3
  module Karafka
13
4
  module Pro
14
5
  module ActiveJob
6
+ # This Karafka component is a Pro component.
7
+ # All of the commercial components are present in the lib/karafka/pro directory of this
8
+ # repository and their usage requires commercial license agreement.
9
+ #
10
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
11
+ #
12
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
13
+ # of your code to Maciej Mensfeld.
14
+
15
15
  # Contract for validating the options that can be altered with `#karafka_options` per job
16
16
  # class that works with Pro features.
17
17
  class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract
@@ -1,15 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this repository
5
- # and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
3
  module Karafka
12
4
  module Pro
5
+ # This Karafka component is a Pro component.
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
12
+ # of your code to Maciej Mensfeld.
13
+
13
14
  # Loader requires and loads all the pro components only when they are needed
14
15
  class Loader
15
16
  class << self
@@ -17,11 +18,15 @@ module Karafka
17
18
  # @param config [Dry::Configurable::Config] whole app config that we can alter with pro
18
19
  # components
19
20
  def setup(config)
21
+ require_relative 'performance_tracker'
20
22
  require_relative 'active_job/dispatcher'
21
23
  require_relative 'active_job/job_options_contract'
22
24
 
23
25
  config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
24
26
  config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
27
+
28
+ # Monitor time needed to process each message from a single partition
29
+ config.monitor.subscribe(PerformanceTracker.instance)
25
30
  end
26
31
  end
27
32
  end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ # This Karafka component is a Pro component.
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
12
+ # of your code to Maciej Mensfeld.
13
+
14
+ # Tracker used to keep track of performance metrics
15
+ # It provides insights that can be used to optimize processing flow
16
+ class PerformanceTracker
17
+ include Singleton
18
+
19
+ # How many samples do we collect per topic partition
20
+ SAMPLES_COUNT = 200
21
+
22
+ private_constant :SAMPLES_COUNT
23
+
24
+ # Builds up nested concurrent hash for data tracking
25
+ def initialize
26
+ @processing_times = Concurrent::Hash.new do |topics_hash, topic|
27
+ topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
28
+ # This array does not have to be concurrent because we always access single partition
29
+ # data via instrumentation that operates in a single thread via consumer
30
+ partitions_hash[partition] = []
31
+ end
32
+ end
33
+ end
34
+
35
+ # @param topic [String]
36
+ # @param partition [Integer]
37
+ # @return [Float] p95 processing time of a single message from a single topic partition
38
+ def processing_time_p95(topic, partition)
39
+ values = @processing_times[topic][partition]
40
+
41
+ return 0 if values.empty?
42
+ return values.first if values.size == 1
43
+
44
+ percentile(0.95, values)
45
+ end
46
+
47
+ # @private
48
+ # @param event [Dry::Events::Event] event details
49
+ # Tracks time taken to process a single message of a given topic partition
50
+ def on_consumer_consumed(event)
51
+ consumer = event[:caller]
52
+ messages = consumer.messages
53
+ topic = messages.metadata.topic
54
+ partition = messages.metadata.partition
55
+
56
+ samples = @processing_times[topic][partition]
57
+ samples << event[:time] / messages.count
58
+
59
+ return unless samples.size > SAMPLES_COUNT
60
+
61
+ samples.shift
62
+ end
63
+
64
+ private
65
+
66
+ # Computers the requested percentile out of provided values
67
+ # @param percentile [Float]
68
+ # @param values [Array<String>] all the values based on which we should
69
+ # @return [Float] computed percentile
70
+ def percentile(percentile, values)
71
+ values_sorted = values.sort
72
+
73
+ floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
74
+ mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
75
+
76
+ values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
77
+ end
78
+ end
79
+ end
80
+ end
@@ -4,10 +4,10 @@ module Karafka
4
4
  # Namespace that encapsulates all the logic related to processing data.
5
5
  module Processing
6
6
  # Executors:
7
- # - run consumers code with provided messages batch (for `#call`) or run given teardown
8
- # operations when needed from separate threads.
9
- # - they re-create consumer instances in case of partitions that were revoked
10
- # and assigned back.
7
+ # - run consumers code (for `#call`) or run given preparation / teardown operations when needed
8
+ # from separate threads.
9
+ # - they re-create consumer instances in case of partitions that were revoked and assigned
10
+ # back.
11
11
  #
12
12
  # @note Executors are not removed after partition is revoked. They are not that big and will
13
13
  # be re-used in case of a re-claim
@@ -21,21 +21,21 @@ module Karafka
21
21
  # @param group_id [String] id of the subscription group to which the executor belongs
22
22
  # @param client [Karafka::Connection::Client] kafka client
23
23
  # @param topic [Karafka::Routing::Topic] topic for which this executor will run
24
- # @param pause [Karafka::TimeTrackers::Pause] fetch pause object for crash pausing
25
- def initialize(group_id, client, topic, pause)
24
+ # @param pause_tracker [Karafka::TimeTrackers::Pause] fetch pause tracker for pausing
25
+ def initialize(group_id, client, topic, pause_tracker)
26
26
  @id = SecureRandom.uuid
27
27
  @group_id = group_id
28
28
  @client = client
29
29
  @topic = topic
30
- @pause = pause
30
+ @pause_tracker = pause_tracker
31
31
  end
32
32
 
33
- # Runs consumer data processing against given batch and handles failures and errors.
33
+ # Builds the consumer instance and sets all that is needed to run the user consumption logic
34
34
  #
35
35
  # @param messages [Array<Rdkafka::Consumer::Message>] raw rdkafka messages
36
36
  # @param received_at [Time] the moment we've received the batch (actually the moment we've)
37
37
  # enqueued it, but good enough
38
- def consume(messages, received_at)
38
+ def prepare(messages, received_at)
39
39
  # Recreate consumer with each batch if persistence is not enabled
40
40
  # We reload the consumers with each batch instead of relying on some external signals
41
41
  # when needed for consistency. That way devs may have it on or off and not in this
@@ -49,6 +49,11 @@ module Karafka
49
49
  received_at
50
50
  )
51
51
 
52
+ consumer.on_prepared
53
+ end
54
+
55
+ # Runs consumer data processing against given batch and handles failures and errors.
56
+ def consume
52
57
  # We run the consumer client logic...
53
58
  consumer.on_consume
54
59
  end
@@ -86,7 +91,7 @@ module Karafka
86
91
  consumer = @topic.consumer.new
87
92
  consumer.topic = @topic
88
93
  consumer.client = @client
89
- consumer.pause = @pause
94
+ consumer.pause_tracker = @pause_tracker
90
95
  consumer.producer = ::Karafka::App.producer
91
96
  consumer
92
97
  end
@@ -5,6 +5,8 @@ module Karafka
5
5
  # Namespace for all the jobs that are suppose to run in workers.
6
6
  module Jobs
7
7
  # Base class for all the jobs types that are suppose to run in workers threads.
8
+ # Each job can have 3 main entry-points: `#prepare`, `#call` and `#teardown`
9
+ # Only `#call` is required.
8
10
  class Base
9
11
  extend Forwardable
10
12
 
@@ -12,6 +14,20 @@ module Karafka
12
14
  def_delegators :executor, :id, :group_id
13
15
 
14
16
  attr_reader :executor
17
+
18
+ # When redefined can run any code that should run before executing the proper code
19
+ def prepare; end
20
+
21
+ # When redefined can run any code that should run after executing the proper code
22
+ def teardown; end
23
+
24
+ # @return [Boolean] is this a non-blocking job
25
+ # @note Blocking job is a job, that will cause the job queue to wait until it is finished
26
+ # before removing the lock on new jobs being added
27
+ # @note All the jobs are blocking by default
28
+ def non_blocking?
29
+ false
30
+ end
15
31
  end
16
32
  end
17
33
  end
@@ -18,9 +18,14 @@ module Karafka
18
18
  super()
19
19
  end
20
20
 
21
- # Runs the given executor.
21
+ # Runs the preparations on the executor
22
+ def prepare
23
+ executor.prepare(@messages, @created_at)
24
+ end
25
+
26
+ # Runs the given executor
22
27
  def call
23
- executor.consume(@messages, @created_at)
28
+ executor.consume
24
29
  end
25
30
  end
26
31
  end
@@ -21,7 +21,7 @@ module Karafka
21
21
  # We cannot use a single semaphore as it could potentially block in listeners that should
22
22
  # process with their data and also could unlock when a given group needs to remain locked
23
23
  @semaphores = Hash.new { |h, k| h[k] = Queue.new }
24
- @in_processing = Hash.new { |h, k| h[k] = {} }
24
+ @in_processing = Hash.new { |h, k| h[k] = [] }
25
25
  @mutex = Mutex.new
26
26
  end
27
27
 
@@ -44,9 +44,9 @@ module Karafka
44
44
  @mutex.synchronize do
45
45
  group = @in_processing[job.group_id]
46
46
 
47
- raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.key?(job.id)
47
+ raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
48
48
 
49
- group[job.id] = true
49
+ group << job
50
50
  end
51
51
 
52
52
  @queue << job
@@ -60,14 +60,21 @@ module Karafka
60
60
  @queue.pop
61
61
  end
62
62
 
63
+ # Causes the wait lock to re-check the lock conditions and potential unlock.
64
+ # @param group_id [String] id of the group we want to unlock for one tick
65
+ # @note This does not release the wait lock. It just causes a conditions recheck
66
+ def tick(group_id)
67
+ @semaphores[group_id] << true
68
+ end
69
+
63
70
  # Marks a given job from a given group as completed. When there are no more jobs from a given
64
71
  # group to be executed, we won't wait.
65
72
  #
66
73
  # @param [Jobs::Base] job that was completed
67
74
  def complete(job)
68
75
  @mutex.synchronize do
69
- @in_processing[job.group_id].delete(job.id)
70
- @semaphores[job.group_id] << true
76
+ @in_processing[job.group_id].delete(job)
77
+ tick(job.group_id)
71
78
  end
72
79
  end
73
80
 
@@ -79,7 +86,7 @@ module Karafka
79
86
  @mutex.synchronize do
80
87
  @in_processing[group_id].clear
81
88
  # We unlock it just in case it was blocked when clearing started
82
- @semaphores[group_id] << true
89
+ tick(group_id)
83
90
  end
84
91
  end
85
92
 
@@ -108,13 +115,15 @@ module Karafka
108
115
  # @param group_id [String] id of the group in which jobs we're interested.
109
116
  # @return [Boolean] should we keep waiting or not
110
117
  def wait?(group_id)
118
+ group = @in_processing[group_id]
119
+
111
120
  # If it is stopping, all the previous messages that are processed at the moment need to
112
121
  # finish. Otherwise we may risk closing the client and committing offsets afterwards
113
- return false if Karafka::App.stopping? && @in_processing[group_id].empty?
122
+ return false if Karafka::App.stopping? && group.empty?
114
123
  return false if @queue.closed?
115
- return false if @in_processing[group_id].empty?
124
+ return false if group.empty?
116
125
 
117
- true
126
+ !group.all?(&:non_blocking?)
118
127
  end
119
128
  end
120
129
  end
@@ -4,6 +4,18 @@ module Karafka
4
4
  module Processing
5
5
  # Workers are used to run jobs in separate threads.
6
6
  # Workers are the main processing units of the Karafka framework.
7
+ #
8
+ # Each job runs in three stages:
9
+ # - prepare - here we can run any code that we would need to run blocking before we allow
10
+ # the job to run fully async (non blocking). This will always run in a blocking
11
+ # way and can be used to make sure all the resources and external dependencies
12
+ # are satisfied before going async.
13
+ #
14
+ # - call - actual processing logic that can run sync or async
15
+ #
16
+ # - teardown - it should include any code that we want to run after we executed the user
17
+ # code. This can be used to unlock certain resources or do other things that are
18
+ # not user code but need to run after user code base is executed.
7
19
  class Worker
8
20
  extend Forwardable
9
21
 
@@ -33,7 +45,18 @@ module Karafka
33
45
  job = @jobs_queue.pop
34
46
 
35
47
  if job
48
+ job.prepare
49
+
50
+ # If a job is marked as non blocking, we can run a tick in the job queue and if there
51
+ # are no other blocking factors, the job queue will be unlocked.
52
+ # If this does not run, all the things will be blocking and job queue won't allow to
53
+ # pass it until done.
54
+ @jobs_queue.tick(job.group_id) if job.non_blocking?
55
+
36
56
  job.call
57
+
58
+ job.teardown
59
+
37
60
  true
38
61
  else
39
62
  false
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # FIFO scheduler for messages coming from various topics and partitions
5
+ class Scheduler
6
+ # Yields messages from partitions in the fifo order
7
+ #
8
+ # @param messages_buffer [Karafka::Connection::MessagesBuffer] messages buffer with data from
9
+ # multiple topics and partitions
10
+ # @yieldparam [String] topic name
11
+ # @yieldparam [Integer] partition number
12
+ # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
13
+ def call(messages_buffer)
14
+ messages_buffer.each do |topic, partitions|
15
+ partitions.each do |partition, messages|
16
+ yield(topic, partition, messages)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -96,6 +96,8 @@ module Karafka
96
96
  # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
97
97
  # group builder
98
98
  setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
99
+ # option scheduler [Class] scheduler we will be using
100
+ setting :scheduler, default: Scheduler.new
99
101
 
100
102
  # Karafka components for ActiveJob
101
103
  setting :active_job do
@@ -41,9 +41,12 @@ module Karafka
41
41
 
42
42
  # Pauses the processing from now till the end of the interval (backoff or non-backoff)
43
43
  # and records the count.
44
- def pause
44
+ # @param timeout [Integer] timeout value in milliseconds that overwrites the default timeout
45
+ # @note Providing this value can be useful when we explicitly want to pause for a certain
46
+ # period of time, outside of any regular pausing logic
47
+ def pause(timeout = backoff_interval)
45
48
  @started_at = now
46
- @ends_at = @started_at + backoff_interval
49
+ @ends_at = @started_at + timeout
47
50
  @count += 1
48
51
  end
49
52
 
@@ -53,6 +56,11 @@ module Karafka
53
56
  @ends_at = nil
54
57
  end
55
58
 
59
+ # Expires the pause, so it can be considered expired
60
+ def expire
61
+ @ends_at = nil
62
+ end
63
+
56
64
  # @return [Boolean] are we paused from processing
57
65
  def paused?
58
66
  !@started_at.nil?
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.0.alpha6'
6
+ VERSION = '2.0.0.beta1'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.alpha6
4
+ version: 2.0.0.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -34,7 +34,7 @@ cert_chain:
34
34
  R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
35
35
  pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
36
36
  -----END CERTIFICATE-----
37
- date: 2022-04-17 00:00:00.000000000 Z
37
+ date: 2022-05-22 00:00:00.000000000 Z
38
38
  dependencies:
39
39
  - !ruby/object:Gem::Dependency
40
40
  name: dry-configurable
@@ -228,6 +228,7 @@ files:
228
228
  - lib/karafka/pro/active_job/dispatcher.rb
229
229
  - lib/karafka/pro/active_job/job_options_contract.rb
230
230
  - lib/karafka/pro/loader.rb
231
+ - lib/karafka/pro/performance_tracker.rb
231
232
  - lib/karafka/process.rb
232
233
  - lib/karafka/processing/executor.rb
233
234
  - lib/karafka/processing/executors_buffer.rb
@@ -248,6 +249,7 @@ files:
248
249
  - lib/karafka/routing/subscription_groups_builder.rb
249
250
  - lib/karafka/routing/topic.rb
250
251
  - lib/karafka/runner.rb
252
+ - lib/karafka/scheduler.rb
251
253
  - lib/karafka/serialization/json/deserializer.rb
252
254
  - lib/karafka/server.rb
253
255
  - lib/karafka/setup/config.rb
@@ -282,7 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
282
284
  - !ruby/object:Gem::Version
283
285
  version: 1.3.1
284
286
  requirements: []
285
- rubygems_version: 3.3.3
287
+ rubygems_version: 3.3.7
286
288
  signing_key:
287
289
  specification_version: 4
288
290
  summary: Ruby based framework for working with Apache Kafka
metadata.gz.sig CHANGED
Binary file