karafka 2.0.0.alpha6 → 2.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 862df94b26c24809f82e07f71c39433b90ef08f68f053a004a87aa466b872dac
4
- data.tar.gz: 2c533cbd6c271fe282f59c2030d9cc885555242bb8bc9316d0264a5ccfd694a0
3
+ metadata.gz: f108cb4288d0ed0510381f51c77d49e052b947f6180c9b9c0b06e0ac2b599894
4
+ data.tar.gz: 3d79066d0107c08f450ca9f4c3b5c4a39aae497836c80bf8380c65f1406b82c0
5
5
  SHA512:
6
- metadata.gz: 8ce3720e535d65f121bcbfd957286cbc41404f2aff2751622183367862db55f8231c286d5bbede4a18649eafcd085952a7fc8e7569a74dbd3633c9db906114e5
7
- data.tar.gz: e77819ccd2be263b02958fcee71ce9228c93c7dee62fabbea61711fbbdbffa997bb9a85a7f668cf8bb596b7a98a773ace560ad5075c46e70f20cd302b354b0fe
6
+ metadata.gz: 4aae257010c992c59ce4b01ead54ff2cfd4e8ccd8cbe6b52214b3cedf8f879690e0d577f2b41f44b1ab6888d7e27bbc92f3ba4a69e8b127687fb4c43bff51fbc
7
+ data.tar.gz: f65e425cb84152d20a055bdb9a94fd98280597cdf5e431337cb8604040534cacbfdd03efd6dc23b86c9ecf25721c860bd55ca75ad3f98e4c66136a88c1efc4e7
checksums.yaml.gz.sig CHANGED
Binary file
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.1.0
1
+ 3.1.2
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.0-beta1 (2022-05-22)
4
+ - Update the jobs queue blocking engine and allow for non-blocking jobs execution
5
+ - Provide `#prepared` hook that always runs before the fetching loop is unblocked
6
+ - [Pro] Introduce performance tracker for scheduling optimizer
7
+ - Provide ability to pause (`#pause`) and resume (`#resume`) given partitions from the consumers
8
+ - Small integration specs refactoring + specs for pausing scenarios
9
+
3
10
  ## 2.0.0-alpha6 (2022-04-17)
4
11
  - Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
5
12
  - Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
@@ -26,12 +33,12 @@
26
33
 
27
34
  ## 2.0.0-alpha2 (2022-02-19)
28
35
  - Require `kafka` keys to be symbols
29
- - Added ActiveJob Pro adapter
36
+ - [Pro] Added ActiveJob Pro adapter
30
37
  - Small updates to the license and docs
31
38
 
32
39
  ## 2.0.0-alpha1 (2022-01-30)
33
40
  - Change license to `LGPL-3.0`
34
- - Introduce a Pro subscription
41
+ - [Pro] Introduce a Pro subscription
35
42
  - Switch from `ruby-kafka` to `librdkafka` as an underlying driver
36
43
  - Introduce fully automatic integration tests that go through the whole server lifecycle
37
44
  - Integrate WaterDrop tightly with autoconfiguration inheritance and an option to redefine it
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.0.alpha6)
4
+ karafka (2.0.0.beta1)
5
5
  dry-configurable (~> 0.13)
6
6
  dry-monitor (~> 0.5)
7
7
  dry-validation (~> 1.7)
@@ -13,10 +13,10 @@ PATH
13
13
  GEM
14
14
  remote: https://rubygems.org/
15
15
  specs:
16
- activejob (7.0.2.3)
17
- activesupport (= 7.0.2.3)
16
+ activejob (7.0.3)
17
+ activesupport (= 7.0.3)
18
18
  globalid (>= 0.3.6)
19
- activesupport (7.0.2.3)
19
+ activesupport (7.0.3)
20
20
  concurrent-ruby (~> 1.0, >= 1.0.2)
21
21
  i18n (>= 1.6, < 2)
22
22
  minitest (>= 5.1)
@@ -25,7 +25,7 @@ GEM
25
25
  concurrent-ruby (1.1.10)
26
26
  diff-lcs (1.5.0)
27
27
  docile (1.4.0)
28
- dry-configurable (0.14.0)
28
+ dry-configurable (0.15.0)
29
29
  concurrent-ruby (~> 1.0)
30
30
  dry-core (~> 0.6)
31
31
  dry-container (0.9.0)
@@ -121,4 +121,4 @@ DEPENDENCIES
121
121
  simplecov
122
122
 
123
123
  BUNDLED WITH
124
- 2.3.10
124
+ 2.3.11
data/docker-compose.yml CHANGED
@@ -16,6 +16,7 @@ services:
16
16
  KAFKA_CREATE_TOPICS:
17
17
  "integrations_0_02:2:1,\
18
18
  integrations_1_02:2:1,\
19
+ integrations_2_02:2:1,\
19
20
  integrations_0_03:3:1,\
20
21
  integrations_1_03:3:1,\
21
22
  integrations_2_03:3:1,\
@@ -10,8 +10,8 @@ module Karafka
10
10
  attr_accessor :messages
11
11
  # @return [Karafka::Connection::Client] kafka connection client
12
12
  attr_accessor :client
13
- # @return [Karafka::TimeTrackers::Pause] current topic partition pause
14
- attr_accessor :pause
13
+ # @return [Karafka::TimeTrackers::Pause] current topic partition pause tracker
14
+ attr_accessor :pause_tracker
15
15
  # @return [Waterdrop::Producer] producer instance
16
16
  attr_accessor :producer
17
17
 
@@ -24,7 +24,7 @@ module Karafka
24
24
  Karafka.monitor.instrument('consumer.consumed', caller: self) do
25
25
  consume
26
26
 
27
- pause.reset
27
+ pause_tracker.reset
28
28
 
29
29
  # Mark as consumed only if manual offset management is not on
30
30
  return if topic.manual_offset_management
@@ -40,8 +40,8 @@ module Karafka
40
40
  caller: self,
41
41
  type: 'consumer.consume.error'
42
42
  )
43
- client.pause(topic.name, messages.first.partition, @seek_offset || messages.first.offset)
44
- pause.pause
43
+
44
+ pause(@seek_offset || messages.first.offset)
45
45
  end
46
46
 
47
47
  # Trigger method for running on shutdown.
@@ -76,8 +76,31 @@ module Karafka
76
76
  )
77
77
  end
78
78
 
79
+ # Can be used to run preparation code
80
+ #
81
+ # @private
82
+ # @note This should not be used by the end users as it is part of the lifecycle of things but
83
+ # not as part of the public api. This can act as a hook when creating non-blocking
84
+ # consumers and doing other advanced stuff
85
+ def on_prepared
86
+ Karafka.monitor.instrument('consumer.prepared', caller: self) do
87
+ prepared
88
+ end
89
+ rescue StandardError => e
90
+ Karafka.monitor.instrument(
91
+ 'error.occurred',
92
+ error: e,
93
+ caller: self,
94
+ type: 'consumer.prepared.error'
95
+ )
96
+ end
97
+
79
98
  private
80
99
 
100
+ # Method that gets called in the blocking flow allowing to setup any type of resources or to
101
+ # send additional commands to Kafka before the proper execution starts.
102
+ def prepared; end
103
+
81
104
  # Method that will perform business logic and on data received from Kafka (it will consume
82
105
  # the data)
83
106
  # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
@@ -97,6 +120,10 @@ module Karafka
97
120
  # Marks message as consumed in an async way.
98
121
  #
99
122
  # @param message [Messages::Message] last successfully processed message.
123
+ # @note We keep track of this offset in case we would mark as consumed and got error when
124
+ # processing another message. In case like this we do not pause on the message we've already
125
+ # processed but rather at the next one. This applies to both sync and async versions of this
126
+ # method.
100
127
  def mark_as_consumed(message)
101
128
  client.mark_as_consumed(message)
102
129
  @seek_offset = message.offset + 1
@@ -110,6 +137,32 @@ module Karafka
110
137
  @seek_offset = message.offset + 1
111
138
  end
112
139
 
140
+ # Pauses processing on a given offset for the current topic partition
141
+ #
142
+ # After given partition is resumed, it will continue processing from the given offset
143
+ # @param offset [Integer] offset from which we want to restart the processing
144
+ # @param timeout [Integer, nil] how long in milliseconds do we want to pause or nil to use the
145
+ # default exponential pausing strategy defined for retries
146
+ def pause(offset, timeout = nil)
147
+ client.pause(
148
+ messages.metadata.topic,
149
+ messages.metadata.partition,
150
+ offset
151
+ )
152
+
153
+ timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
154
+ end
155
+
156
+ # Resumes processing of the current topic partition
157
+ def resume
158
+ client.resume(
159
+ messages.metadata.topic,
160
+ messages.metadata.partition
161
+ )
162
+
163
+ pause_tracker.expire
164
+ end
165
+
113
166
  # Seeks in the context of current topic and partition
114
167
  #
115
168
  # @param offset [Integer] offset where we want to seek
@@ -15,6 +15,8 @@ module Karafka
15
15
  @pauses_manager = PausesManager.new
16
16
  @client = Client.new(@subscription_group)
17
17
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
18
+ # We reference scheduler here as it is much faster than fetching this each time
19
+ @scheduler = ::Karafka::App.config.internal.scheduler
18
20
  end
19
21
 
20
22
  # Runs the main listener fetch loop.
@@ -66,9 +68,9 @@ module Karafka
66
68
  # distributing consuming jobs as upon revoking, we might get assigned to the same
67
69
  # partitions, thus getting their jobs. The revoking jobs need to finish before
68
70
  # appropriate consumers are taken down and re-created
69
- wait(@subscription_group) if distribute_revoke_lost_partitions_jobs
71
+ wait(@subscription_group) if schedule_revoke_lost_partitions_jobs
70
72
 
71
- distribute_partitions_jobs(messages_buffer)
73
+ schedule_partitions_jobs(messages_buffer)
72
74
 
73
75
  # We wait only on jobs from our subscription group. Other groups are independent.
74
76
  wait(@subscription_group)
@@ -103,15 +105,17 @@ module Karafka
103
105
 
104
106
  # Enqueues revoking jobs for partitions that were taken away from the running process.
105
107
  # @return [Boolean] was there anything to revoke
106
- def distribute_revoke_lost_partitions_jobs
108
+ # @note We do not use scheduler here as those jobs are not meant to be order optimized in
109
+ # any way. Since they operate occasionally it is irrelevant.
110
+ def schedule_revoke_lost_partitions_jobs
107
111
  revoked_partitions = @client.rebalance_manager.revoked_partitions
108
112
 
109
113
  return false if revoked_partitions.empty?
110
114
 
111
115
  revoked_partitions.each do |topic, partitions|
112
116
  partitions.each do |partition|
113
- pause = @pauses_manager.fetch(topic, partition)
114
- executor = @executors.fetch(topic, partition, pause)
117
+ pause_tracker = @pauses_manager.fetch(topic, partition)
118
+ executor = @executors.fetch(topic, partition, pause_tracker)
115
119
  @jobs_queue << Processing::Jobs::Revoked.new(executor)
116
120
  end
117
121
  end
@@ -122,8 +126,8 @@ module Karafka
122
126
  # Takes the messages per topic partition and enqueues processing jobs in threads.
123
127
  #
124
128
  # @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
125
- def distribute_partitions_jobs(messages_buffer)
126
- messages_buffer.each do |topic, partition, messages|
129
+ def schedule_partitions_jobs(messages_buffer)
130
+ @scheduler.call(messages_buffer) do |topic, partition, messages|
127
131
  pause = @pauses_manager.fetch(topic, partition)
128
132
 
129
133
  next if pause.paused?
@@ -10,6 +10,10 @@ module Karafka
10
10
  class MessagesBuffer
11
11
  attr_reader :size
12
12
 
13
+ extend Forwardable
14
+
15
+ def_delegators :@groups, :each
16
+
13
17
  # @return [Karafka::Connection::MessagesBuffer] buffer instance
14
18
  def initialize
15
19
  @size = 0
@@ -20,19 +24,6 @@ module Karafka
20
24
  end
21
25
  end
22
26
 
23
- # Iterates over aggregated data providing messages per topic partition.
24
- #
25
- # @yieldparam [String] topic name
26
- # @yieldparam [Integer] partition number
27
- # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
28
- def each
29
- @groups.each do |topic, partitions|
30
- partitions.each do |partition, messages|
31
- yield(topic, partition, messages)
32
- end
33
- end
34
- end
35
-
36
27
  # Adds a message to the buffer.
37
28
  #
38
29
  # @param message [Rdkafka::Consumer::Message] raw rdkafka message
@@ -12,11 +12,11 @@ module Karafka
12
12
  end
13
13
  end
14
14
 
15
- # Creates or fetches pause of a given topic partition.
15
+ # Creates or fetches pause tracker of a given topic partition.
16
16
  #
17
17
  # @param topic [String] topic name
18
18
  # @param partition [Integer] partition number
19
- # @return [Karafka::TimeTrackers::Pause] pause instance
19
+ # @return [Karafka::TimeTrackers::Pause] pause tracker instance
20
20
  def fetch(topic, partition)
21
21
  @pauses[topic][partition] ||= TimeTrackers::Pause.new(
22
22
  timeout: Karafka::App.config.pause_timeout,
@@ -32,6 +32,7 @@ module Karafka
32
32
  required(:routing_builder)
33
33
  required(:status)
34
34
  required(:process)
35
+ required(:scheduler)
35
36
  required(:subscription_groups_builder)
36
37
  end
37
38
  end
@@ -22,6 +22,7 @@ module Karafka
22
22
  app.stopping
23
23
  app.stopped
24
24
 
25
+ consumer.prepared
25
26
  consumer.consumed
26
27
  consumer.revoked
27
28
  consumer.shutdown
@@ -1,18 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this repository
5
- # and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
-
12
3
  module Karafka
13
4
  module Pro
14
5
  # Karafka Pro ActiveJob components
15
6
  module ActiveJob
7
+ # This Karafka component is a Pro component.
8
+ # All of the commercial components are present in the lib/karafka/pro directory of this
9
+ # repository and their usage requires commercial license agreement.
10
+ #
11
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
12
+ #
13
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
14
+ # of your code to Maciej Mensfeld.
15
+
16
16
  # Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
17
17
  # and that allows to inject additional options into the producer, effectively allowing for a
18
18
  # much better and more granular control over the dispatch and consumption process.
@@ -1,17 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this repository
5
- # and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
-
12
3
  module Karafka
13
4
  module Pro
14
5
  module ActiveJob
6
+ # This Karafka component is a Pro component.
7
+ # All of the commercial components are present in the lib/karafka/pro directory of this
8
+ # repository and their usage requires commercial license agreement.
9
+ #
10
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
11
+ #
12
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
13
+ # of your code to Maciej Mensfeld.
14
+
15
15
  # Contract for validating the options that can be altered with `#karafka_options` per job
16
16
  # class that works with Pro features.
17
17
  class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract
@@ -1,15 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
4
- # All of the commercial components are present in the lib/karafka/pro directory of this repository
5
- # and their usage requires commercial license agreement.
6
- #
7
- # Karafka has also commercial-friendly license, commercial support and commercial components.
8
- #
9
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
- # your code to Maciej Mensfeld.
11
3
  module Karafka
12
4
  module Pro
5
+ # This Karafka component is a Pro component.
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
12
+ # of your code to Maciej Mensfeld.
13
+
13
14
  # Loader requires and loads all the pro components only when they are needed
14
15
  class Loader
15
16
  class << self
@@ -17,11 +18,15 @@ module Karafka
17
18
  # @param config [Dry::Configurable::Config] whole app config that we can alter with pro
18
19
  # components
19
20
  def setup(config)
21
+ require_relative 'performance_tracker'
20
22
  require_relative 'active_job/dispatcher'
21
23
  require_relative 'active_job/job_options_contract'
22
24
 
23
25
  config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
24
26
  config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
27
+
28
+ # Monitor time needed to process each message from a single partition
29
+ config.monitor.subscribe(PerformanceTracker.instance)
25
30
  end
26
31
  end
27
32
  end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ # This Karafka component is a Pro component.
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
12
+ # of your code to Maciej Mensfeld.
13
+
14
+ # Tracker used to keep track of performance metrics
15
+ # It provides insights that can be used to optimize processing flow
16
+ class PerformanceTracker
17
+ include Singleton
18
+
19
+ # How many samples do we collect per topic partition
20
+ SAMPLES_COUNT = 200
21
+
22
+ private_constant :SAMPLES_COUNT
23
+
24
+ # Builds up nested concurrent hash for data tracking
25
+ def initialize
26
+ @processing_times = Concurrent::Hash.new do |topics_hash, topic|
27
+ topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
28
+ # This array does not have to be concurrent because we always access single partition
29
+ # data via instrumentation that operates in a single thread via consumer
30
+ partitions_hash[partition] = []
31
+ end
32
+ end
33
+ end
34
+
35
+ # @param topic [String]
36
+ # @param partition [Integer]
37
+ # @return [Float] p95 processing time of a single message from a single topic partition
38
+ def processing_time_p95(topic, partition)
39
+ values = @processing_times[topic][partition]
40
+
41
+ return 0 if values.empty?
42
+ return values.first if values.size == 1
43
+
44
+ percentile(0.95, values)
45
+ end
46
+
47
+ # @private
48
+ # @param event [Dry::Events::Event] event details
49
+ # Tracks time taken to process a single message of a given topic partition
50
+ def on_consumer_consumed(event)
51
+ consumer = event[:caller]
52
+ messages = consumer.messages
53
+ topic = messages.metadata.topic
54
+ partition = messages.metadata.partition
55
+
56
+ samples = @processing_times[topic][partition]
57
+ samples << event[:time] / messages.count
58
+
59
+ return unless samples.size > SAMPLES_COUNT
60
+
61
+ samples.shift
62
+ end
63
+
64
+ private
65
+
66
+ # Computers the requested percentile out of provided values
67
+ # @param percentile [Float]
68
+ # @param values [Array<String>] all the values based on which we should
69
+ # @return [Float] computed percentile
70
+ def percentile(percentile, values)
71
+ values_sorted = values.sort
72
+
73
+ floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
74
+ mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
75
+
76
+ values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
77
+ end
78
+ end
79
+ end
80
+ end
@@ -4,10 +4,10 @@ module Karafka
4
4
  # Namespace that encapsulates all the logic related to processing data.
5
5
  module Processing
6
6
  # Executors:
7
- # - run consumers code with provided messages batch (for `#call`) or run given teardown
8
- # operations when needed from separate threads.
9
- # - they re-create consumer instances in case of partitions that were revoked
10
- # and assigned back.
7
+ # - run consumers code (for `#call`) or run given preparation / teardown operations when needed
8
+ # from separate threads.
9
+ # - they re-create consumer instances in case of partitions that were revoked and assigned
10
+ # back.
11
11
  #
12
12
  # @note Executors are not removed after partition is revoked. They are not that big and will
13
13
  # be re-used in case of a re-claim
@@ -21,21 +21,21 @@ module Karafka
21
21
  # @param group_id [String] id of the subscription group to which the executor belongs
22
22
  # @param client [Karafka::Connection::Client] kafka client
23
23
  # @param topic [Karafka::Routing::Topic] topic for which this executor will run
24
- # @param pause [Karafka::TimeTrackers::Pause] fetch pause object for crash pausing
25
- def initialize(group_id, client, topic, pause)
24
+ # @param pause_tracker [Karafka::TimeTrackers::Pause] fetch pause tracker for pausing
25
+ def initialize(group_id, client, topic, pause_tracker)
26
26
  @id = SecureRandom.uuid
27
27
  @group_id = group_id
28
28
  @client = client
29
29
  @topic = topic
30
- @pause = pause
30
+ @pause_tracker = pause_tracker
31
31
  end
32
32
 
33
- # Runs consumer data processing against given batch and handles failures and errors.
33
+ # Builds the consumer instance and sets all that is needed to run the user consumption logic
34
34
  #
35
35
  # @param messages [Array<Rdkafka::Consumer::Message>] raw rdkafka messages
36
36
  # @param received_at [Time] the moment we've received the batch (actually the moment we've)
37
37
  # enqueued it, but good enough
38
- def consume(messages, received_at)
38
+ def prepare(messages, received_at)
39
39
  # Recreate consumer with each batch if persistence is not enabled
40
40
  # We reload the consumers with each batch instead of relying on some external signals
41
41
  # when needed for consistency. That way devs may have it on or off and not in this
@@ -49,6 +49,11 @@ module Karafka
49
49
  received_at
50
50
  )
51
51
 
52
+ consumer.on_prepared
53
+ end
54
+
55
+ # Runs consumer data processing against given batch and handles failures and errors.
56
+ def consume
52
57
  # We run the consumer client logic...
53
58
  consumer.on_consume
54
59
  end
@@ -86,7 +91,7 @@ module Karafka
86
91
  consumer = @topic.consumer.new
87
92
  consumer.topic = @topic
88
93
  consumer.client = @client
89
- consumer.pause = @pause
94
+ consumer.pause_tracker = @pause_tracker
90
95
  consumer.producer = ::Karafka::App.producer
91
96
  consumer
92
97
  end
@@ -5,6 +5,8 @@ module Karafka
5
5
  # Namespace for all the jobs that are suppose to run in workers.
6
6
  module Jobs
7
7
  # Base class for all the jobs types that are suppose to run in workers threads.
8
+ # Each job can have 3 main entry-points: `#prepare`, `#call` and `#teardown`
9
+ # Only `#call` is required.
8
10
  class Base
9
11
  extend Forwardable
10
12
 
@@ -12,6 +14,20 @@ module Karafka
12
14
  def_delegators :executor, :id, :group_id
13
15
 
14
16
  attr_reader :executor
17
+
18
+ # When redefined can run any code that should run before executing the proper code
19
+ def prepare; end
20
+
21
+ # When redefined can run any code that should run after executing the proper code
22
+ def teardown; end
23
+
24
+ # @return [Boolean] is this a non-blocking job
25
+ # @note Blocking job is a job, that will cause the job queue to wait until it is finished
26
+ # before removing the lock on new jobs being added
27
+ # @note All the jobs are blocking by default
28
+ def non_blocking?
29
+ false
30
+ end
15
31
  end
16
32
  end
17
33
  end
@@ -18,9 +18,14 @@ module Karafka
18
18
  super()
19
19
  end
20
20
 
21
- # Runs the given executor.
21
+ # Runs the preparations on the executor
22
+ def prepare
23
+ executor.prepare(@messages, @created_at)
24
+ end
25
+
26
+ # Runs the given executor
22
27
  def call
23
- executor.consume(@messages, @created_at)
28
+ executor.consume
24
29
  end
25
30
  end
26
31
  end
@@ -21,7 +21,7 @@ module Karafka
21
21
  # We cannot use a single semaphore as it could potentially block in listeners that should
22
22
  # process with their data and also could unlock when a given group needs to remain locked
23
23
  @semaphores = Hash.new { |h, k| h[k] = Queue.new }
24
- @in_processing = Hash.new { |h, k| h[k] = {} }
24
+ @in_processing = Hash.new { |h, k| h[k] = [] }
25
25
  @mutex = Mutex.new
26
26
  end
27
27
 
@@ -44,9 +44,9 @@ module Karafka
44
44
  @mutex.synchronize do
45
45
  group = @in_processing[job.group_id]
46
46
 
47
- raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.key?(job.id)
47
+ raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
48
48
 
49
- group[job.id] = true
49
+ group << job
50
50
  end
51
51
 
52
52
  @queue << job
@@ -60,14 +60,21 @@ module Karafka
60
60
  @queue.pop
61
61
  end
62
62
 
63
+ # Causes the wait lock to re-check the lock conditions and potential unlock.
64
+ # @param group_id [String] id of the group we want to unlock for one tick
65
+ # @note This does not release the wait lock. It just causes a conditions recheck
66
+ def tick(group_id)
67
+ @semaphores[group_id] << true
68
+ end
69
+
63
70
  # Marks a given job from a given group as completed. When there are no more jobs from a given
64
71
  # group to be executed, we won't wait.
65
72
  #
66
73
  # @param [Jobs::Base] job that was completed
67
74
  def complete(job)
68
75
  @mutex.synchronize do
69
- @in_processing[job.group_id].delete(job.id)
70
- @semaphores[job.group_id] << true
76
+ @in_processing[job.group_id].delete(job)
77
+ tick(job.group_id)
71
78
  end
72
79
  end
73
80
 
@@ -79,7 +86,7 @@ module Karafka
79
86
  @mutex.synchronize do
80
87
  @in_processing[group_id].clear
81
88
  # We unlock it just in case it was blocked when clearing started
82
- @semaphores[group_id] << true
89
+ tick(group_id)
83
90
  end
84
91
  end
85
92
 
@@ -108,13 +115,15 @@ module Karafka
108
115
  # @param group_id [String] id of the group in which jobs we're interested.
109
116
  # @return [Boolean] should we keep waiting or not
110
117
  def wait?(group_id)
118
+ group = @in_processing[group_id]
119
+
111
120
  # If it is stopping, all the previous messages that are processed at the moment need to
112
121
  # finish. Otherwise we may risk closing the client and committing offsets afterwards
113
- return false if Karafka::App.stopping? && @in_processing[group_id].empty?
122
+ return false if Karafka::App.stopping? && group.empty?
114
123
  return false if @queue.closed?
115
- return false if @in_processing[group_id].empty?
124
+ return false if group.empty?
116
125
 
117
- true
126
+ !group.all?(&:non_blocking?)
118
127
  end
119
128
  end
120
129
  end
@@ -4,6 +4,18 @@ module Karafka
4
4
  module Processing
5
5
  # Workers are used to run jobs in separate threads.
6
6
  # Workers are the main processing units of the Karafka framework.
7
+ #
8
+ # Each job runs in three stages:
9
+ # - prepare - here we can run any code that we would need to run blocking before we allow
10
+ # the job to run fully async (non blocking). This will always run in a blocking
11
+ # way and can be used to make sure all the resources and external dependencies
12
+ # are satisfied before going async.
13
+ #
14
+ # - call - actual processing logic that can run sync or async
15
+ #
16
+ # - teardown - it should include any code that we want to run after we executed the user
17
+ # code. This can be used to unlock certain resources or do other things that are
18
+ # not user code but need to run after user code base is executed.
7
19
  class Worker
8
20
  extend Forwardable
9
21
 
@@ -33,7 +45,18 @@ module Karafka
33
45
  job = @jobs_queue.pop
34
46
 
35
47
  if job
48
+ job.prepare
49
+
50
+ # If a job is marked as non blocking, we can run a tick in the job queue and if there
51
+ # are no other blocking factors, the job queue will be unlocked.
52
+ # If this does not run, all the things will be blocking and job queue won't allow to
53
+ # pass it until done.
54
+ @jobs_queue.tick(job.group_id) if job.non_blocking?
55
+
36
56
  job.call
57
+
58
+ job.teardown
59
+
37
60
  true
38
61
  else
39
62
  false
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # FIFO scheduler for messages coming from various topics and partitions
5
+ class Scheduler
6
+ # Yields messages from partitions in the fifo order
7
+ #
8
+ # @param messages_buffer [Karafka::Connection::MessagesBuffer] messages buffer with data from
9
+ # multiple topics and partitions
10
+ # @yieldparam [String] topic name
11
+ # @yieldparam [Integer] partition number
12
+ # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
13
+ def call(messages_buffer)
14
+ messages_buffer.each do |topic, partitions|
15
+ partitions.each do |partition, messages|
16
+ yield(topic, partition, messages)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -96,6 +96,8 @@ module Karafka
96
96
  # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
97
97
  # group builder
98
98
  setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
99
+ # option scheduler [Class] scheduler we will be using
100
+ setting :scheduler, default: Scheduler.new
99
101
 
100
102
  # Karafka components for ActiveJob
101
103
  setting :active_job do
@@ -41,9 +41,12 @@ module Karafka
41
41
 
42
42
  # Pauses the processing from now till the end of the interval (backoff or non-backoff)
43
43
  # and records the count.
44
- def pause
44
+ # @param timeout [Integer] timeout value in milliseconds that overwrites the default timeout
45
+ # @note Providing this value can be useful when we explicitly want to pause for a certain
46
+ # period of time, outside of any regular pausing logic
47
+ def pause(timeout = backoff_interval)
45
48
  @started_at = now
46
- @ends_at = @started_at + backoff_interval
49
+ @ends_at = @started_at + timeout
47
50
  @count += 1
48
51
  end
49
52
 
@@ -53,6 +56,11 @@ module Karafka
53
56
  @ends_at = nil
54
57
  end
55
58
 
59
+ # Expires the pause, so it can be considered expired
60
+ def expire
61
+ @ends_at = nil
62
+ end
63
+
56
64
  # @return [Boolean] are we paused from processing
57
65
  def paused?
58
66
  !@started_at.nil?
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.0.alpha6'
6
+ VERSION = '2.0.0.beta1'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.alpha6
4
+ version: 2.0.0.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -34,7 +34,7 @@ cert_chain:
34
34
  R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
35
35
  pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
36
36
  -----END CERTIFICATE-----
37
- date: 2022-04-17 00:00:00.000000000 Z
37
+ date: 2022-05-22 00:00:00.000000000 Z
38
38
  dependencies:
39
39
  - !ruby/object:Gem::Dependency
40
40
  name: dry-configurable
@@ -228,6 +228,7 @@ files:
228
228
  - lib/karafka/pro/active_job/dispatcher.rb
229
229
  - lib/karafka/pro/active_job/job_options_contract.rb
230
230
  - lib/karafka/pro/loader.rb
231
+ - lib/karafka/pro/performance_tracker.rb
231
232
  - lib/karafka/process.rb
232
233
  - lib/karafka/processing/executor.rb
233
234
  - lib/karafka/processing/executors_buffer.rb
@@ -248,6 +249,7 @@ files:
248
249
  - lib/karafka/routing/subscription_groups_builder.rb
249
250
  - lib/karafka/routing/topic.rb
250
251
  - lib/karafka/runner.rb
252
+ - lib/karafka/scheduler.rb
251
253
  - lib/karafka/serialization/json/deserializer.rb
252
254
  - lib/karafka/server.rb
253
255
  - lib/karafka/setup/config.rb
@@ -282,7 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
282
284
  - !ruby/object:Gem::Version
283
285
  version: 1.3.1
284
286
  requirements: []
285
- rubygems_version: 3.3.3
287
+ rubygems_version: 3.3.7
286
288
  signing_key:
287
289
  specification_version: 4
288
290
  summary: Ruby based framework for working with Apache Kafka
metadata.gz.sig CHANGED
Binary file