karafka 2.0.0.alpha5 → 2.0.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +35 -2
- data/Gemfile.lock +6 -6
- data/bin/integrations +55 -43
- data/config/errors.yml +1 -0
- data/docker-compose.yml +4 -1
- data/lib/active_job/karafka.rb +2 -2
- data/lib/karafka/active_job/routing/extensions.rb +21 -0
- data/lib/karafka/base_consumer.rb +65 -12
- data/lib/karafka/connection/client.rb +36 -6
- data/lib/karafka/connection/listener.rb +92 -27
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +49 -22
- data/lib/karafka/connection/pauses_manager.rb +2 -2
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/connection/rebalance_manager.rb +35 -20
- data/lib/karafka/contracts/config.rb +8 -0
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/instrumentation/monitor.rb +2 -1
- data/lib/karafka/messages/batch_metadata.rb +26 -3
- data/lib/karafka/messages/builders/batch_metadata.rb +17 -29
- data/lib/karafka/messages/builders/message.rb +1 -0
- data/lib/karafka/messages/builders/messages.rb +4 -12
- data/lib/karafka/pro/active_job/consumer.rb +21 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +10 -10
- data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
- data/lib/karafka/pro/loader.rb +17 -8
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
- data/lib/karafka/pro/scheduler.rb +54 -0
- data/lib/karafka/processing/executor.rb +19 -11
- data/lib/karafka/processing/executors_buffer.rb +15 -7
- data/lib/karafka/processing/jobs/base.rb +28 -0
- data/lib/karafka/processing/jobs/consume.rb +11 -4
- data/lib/karafka/processing/jobs_queue.rb +28 -16
- data/lib/karafka/processing/worker.rb +30 -9
- data/lib/karafka/processing/workers_batch.rb +5 -0
- data/lib/karafka/railtie.rb +12 -0
- data/lib/karafka/routing/consumer_group.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/routing/subscription_groups_builder.rb +3 -2
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +19 -27
- data/lib/karafka/scheduler.rb +20 -0
- data/lib/karafka/server.rb +24 -23
- data/lib/karafka/setup/config.rb +4 -1
- data/lib/karafka/time_trackers/pause.rb +10 -2
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +13 -4
- metadata.gz.sig +0 -0
- data/lib/karafka/active_job/routing_extensions.rb +0 -18
@@ -8,42 +8,30 @@ module Karafka
|
|
8
8
|
class << self
|
9
9
|
# Creates metadata based on the kafka batch data.
|
10
10
|
#
|
11
|
-
# @param
|
11
|
+
# @param messages [Array<Karafka::Messages::Message>] messages array
|
12
12
|
# @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
|
13
13
|
# @param scheduled_at [Time] moment when the batch was scheduled for processing
|
14
14
|
# @return [Karafka::Messages::BatchMetadata] batch metadata object
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
def call(
|
19
|
-
now = Time.now
|
20
|
-
|
15
|
+
#
|
16
|
+
# @note We do not set `processed_at` as this needs to be assigned when the batch is
|
17
|
+
# picked up for processing.
|
18
|
+
def call(messages, topic, scheduled_at)
|
21
19
|
Karafka::Messages::BatchMetadata.new(
|
22
|
-
size:
|
23
|
-
first_offset:
|
24
|
-
last_offset:
|
20
|
+
size: messages.count,
|
21
|
+
first_offset: messages.first.offset,
|
22
|
+
last_offset: messages.last.offset,
|
25
23
|
deserializer: topic.deserializer,
|
26
|
-
partition:
|
24
|
+
partition: messages.first.partition,
|
27
25
|
topic: topic.name,
|
26
|
+
# We go with the assumption that the creation of the whole batch is the last message
|
27
|
+
# creation time
|
28
|
+
created_at: messages.last.timestamp,
|
29
|
+
# When this batch was built and scheduled for execution
|
28
30
|
scheduled_at: scheduled_at,
|
29
|
-
#
|
30
|
-
#
|
31
|
-
|
32
|
-
|
33
|
-
# one of the workers
|
34
|
-
processing_lag: time_distance_in_ms(now, scheduled_at)
|
35
|
-
).freeze
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
# Computes time distance in between two times in ms
|
41
|
-
#
|
42
|
-
# @param time1 [Time]
|
43
|
-
# @param time2 [Time]
|
44
|
-
# @return [Integer] distance in between two times in ms
|
45
|
-
def time_distance_in_ms(time1, time2)
|
46
|
-
((time1 - time2) * 1_000).round
|
31
|
+
# We build the batch metadata when we pick up the job in the worker, thus we can use
|
32
|
+
# current time here
|
33
|
+
processed_at: Time.now
|
34
|
+
)
|
47
35
|
end
|
48
36
|
end
|
49
37
|
end
|
@@ -9,27 +9,19 @@ module Karafka
|
|
9
9
|
# Creates messages batch with messages inside based on the incoming messages and the
|
10
10
|
# topic from which it comes.
|
11
11
|
#
|
12
|
-
# @param
|
12
|
+
# @param messages [Array<Karafka::Messages::Message>] karafka messages array
|
13
13
|
# @param topic [Karafka::Routing::Topic] topic for which we're received messages
|
14
14
|
# @param received_at [Time] moment in time when the messages were received
|
15
15
|
# @return [Karafka::Messages::Messages] messages batch object
|
16
|
-
def call(
|
17
|
-
messages_array = kafka_messages.map do |message|
|
18
|
-
Karafka::Messages::Builders::Message.call(
|
19
|
-
message,
|
20
|
-
topic,
|
21
|
-
received_at
|
22
|
-
)
|
23
|
-
end
|
24
|
-
|
16
|
+
def call(messages, topic, received_at)
|
25
17
|
metadata = BatchMetadata.call(
|
26
|
-
|
18
|
+
messages,
|
27
19
|
topic,
|
28
20
|
received_at
|
29
21
|
).freeze
|
30
22
|
|
31
23
|
Karafka::Messages::Messages.new(
|
32
|
-
|
24
|
+
messages,
|
33
25
|
metadata
|
34
26
|
).freeze
|
35
27
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Pro
|
5
|
+
module ActiveJob
|
6
|
+
# This Karafka component is a Pro component.
|
7
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
8
|
+
# repository and their usage requires commercial license agreement.
|
9
|
+
#
|
10
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
11
|
+
#
|
12
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
13
|
+
# of your code to Maciej Mensfeld.
|
14
|
+
|
15
|
+
# Pro ActiveJob consumer that is suppose to handle long-running jobs as well as short
|
16
|
+
# running jobs
|
17
|
+
class Consumer < Karafka::ActiveJob::Consumer
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -1,24 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
-
# and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
|
-
|
12
3
|
module Karafka
|
13
4
|
module Pro
|
14
5
|
# Karafka Pro ActiveJob components
|
15
6
|
module ActiveJob
|
7
|
+
# This Karafka component is a Pro component.
|
8
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
9
|
+
# repository and their usage requires commercial license agreement.
|
10
|
+
#
|
11
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
12
|
+
#
|
13
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
14
|
+
# of your code to Maciej Mensfeld.
|
15
|
+
|
16
16
|
# Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
|
17
17
|
# and that allows to inject additional options into the producer, effectively allowing for a
|
18
18
|
# much better and more granular control over the dispatch and consumption process.
|
19
19
|
class Dispatcher < ::Karafka::ActiveJob::Dispatcher
|
20
20
|
# Defaults for dispatching
|
21
|
-
#
|
21
|
+
# They can be updated by using `#karafka_options` on the job
|
22
22
|
DEFAULTS = {
|
23
23
|
dispatch_method: :produce_async,
|
24
24
|
# We don't create a dummy proc based partitioner as we would have to evaluate it with
|
@@ -1,17 +1,17 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
-
# and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
|
-
|
12
3
|
module Karafka
|
13
4
|
module Pro
|
14
5
|
module ActiveJob
|
6
|
+
# This Karafka component is a Pro component.
|
7
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
8
|
+
# repository and their usage requires commercial license agreement.
|
9
|
+
#
|
10
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
11
|
+
#
|
12
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
13
|
+
# of your code to Maciej Mensfeld.
|
14
|
+
|
15
15
|
# Contract for validating the options that can be altered with `#karafka_options` per job
|
16
16
|
# class that works with Pro features.
|
17
17
|
class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
-
# and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
3
|
module Karafka
|
12
4
|
module Pro
|
5
|
+
# This Karafka component is a Pro component.
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
12
|
+
# of your code to Maciej Mensfeld.
|
13
|
+
|
13
14
|
# Loader requires and loads all the pro components only when they are needed
|
14
15
|
class Loader
|
15
16
|
class << self
|
@@ -17,11 +18,19 @@ module Karafka
|
|
17
18
|
# @param config [Dry::Configurable::Config] whole app config that we can alter with pro
|
18
19
|
# components
|
19
20
|
def setup(config)
|
21
|
+
require_relative 'performance_tracker'
|
22
|
+
require_relative 'scheduler'
|
23
|
+
require_relative 'processing/jobs/consume_non_blocking'
|
24
|
+
require_relative 'active_job/consumer'
|
20
25
|
require_relative 'active_job/dispatcher'
|
21
26
|
require_relative 'active_job/job_options_contract'
|
22
27
|
|
28
|
+
config.internal.scheduler = Scheduler.new
|
29
|
+
config.internal.active_job.consumer = ActiveJob::Consumer
|
23
30
|
config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
|
24
31
|
config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
32
|
+
|
33
|
+
config.monitor.subscribe(PerformanceTracker.instance)
|
25
34
|
end
|
26
35
|
end
|
27
36
|
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Pro
|
5
|
+
# This Karafka component is a Pro component.
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
12
|
+
# of your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
# Tracker used to keep track of performance metrics
|
15
|
+
# It provides insights that can be used to optimize processing flow
|
16
|
+
class PerformanceTracker
|
17
|
+
include Singleton
|
18
|
+
|
19
|
+
# How many samples do we collect per topic partition
|
20
|
+
SAMPLES_COUNT = 200
|
21
|
+
|
22
|
+
private_constant :SAMPLES_COUNT
|
23
|
+
|
24
|
+
# Builds up nested concurrent hash for data tracking
|
25
|
+
def initialize
|
26
|
+
@processing_times = Concurrent::Hash.new do |topics_hash, topic|
|
27
|
+
topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
|
28
|
+
# This array does not have to be concurrent because we always access single partition
|
29
|
+
# data via instrumentation that operates in a single thread via consumer
|
30
|
+
partitions_hash[partition] = []
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# @param topic [String]
|
36
|
+
# @param partition [Integer]
|
37
|
+
# @return [Float] p95 processing time of a single message from a single topic partition
|
38
|
+
def processing_time_p95(topic, partition)
|
39
|
+
values = @processing_times[topic][partition]
|
40
|
+
|
41
|
+
return 0 if values.empty?
|
42
|
+
return values.first if values.size == 1
|
43
|
+
|
44
|
+
percentile(0.95, values)
|
45
|
+
end
|
46
|
+
|
47
|
+
# @private
|
48
|
+
# @param event [Dry::Events::Event] event details
|
49
|
+
# Tracks time taken to process a single message of a given topic partition
|
50
|
+
def on_consumer_consumed(event)
|
51
|
+
consumer = event[:caller]
|
52
|
+
messages = consumer.messages
|
53
|
+
topic = messages.metadata.topic
|
54
|
+
partition = messages.metadata.partition
|
55
|
+
|
56
|
+
samples = @processing_times[topic][partition]
|
57
|
+
samples << event[:time] / messages.count
|
58
|
+
|
59
|
+
return unless samples.size > SAMPLES_COUNT
|
60
|
+
|
61
|
+
samples.shift
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
# Computers the requested percentile out of provided values
|
67
|
+
# @param percentile [Float]
|
68
|
+
# @param values [Array<String>] all the values based on which we should
|
69
|
+
# @return [Float] computed percentile
|
70
|
+
def percentile(percentile, values)
|
71
|
+
values_sorted = values.sort
|
72
|
+
|
73
|
+
floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
|
74
|
+
mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
|
75
|
+
|
76
|
+
values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Pro
|
5
|
+
# Pro components related to processing part of Karafka
|
6
|
+
module Processing
|
7
|
+
# Pro jobs
|
8
|
+
module Jobs
|
9
|
+
# This Karafka component is a Pro component.
|
10
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
11
|
+
# repository and their usage requires commercial license agreement.
|
12
|
+
#
|
13
|
+
# Karafka has also commercial-friendly license, commercial support and commercial
|
14
|
+
# components.
|
15
|
+
#
|
16
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the
|
17
|
+
# copyright of your code to Maciej Mensfeld.
|
18
|
+
|
19
|
+
# The main job type in a non-blocking variant.
|
20
|
+
# This variant works "like" the regular consumption but pauses the partition for as long
|
21
|
+
# as it is needed until a job is done.
|
22
|
+
#
|
23
|
+
# It can be useful when having long lasting jobs that would exceed `max.poll.interval`
|
24
|
+
# if would block.
|
25
|
+
#
|
26
|
+
# @note It needs to be working with a proper consumer that will handle the partition
|
27
|
+
# management. This layer of the framework knows nothing about Kafka messages consumption.
|
28
|
+
class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
|
29
|
+
# Releases the blocking lock after it is done with the preparation phase for this job
|
30
|
+
def prepare
|
31
|
+
super
|
32
|
+
@non_blocking = true
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Pro
|
5
|
+
# This Karafka component is a Pro component.
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
12
|
+
# of your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
# Optimizes scheduler that takes into consideration of execution time needed to process
|
15
|
+
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
16
|
+
#
|
17
|
+
# This scheduler is designed to optimize execution times on jobs that perform IO operations as
|
18
|
+
# when taking IO into consideration, the can achieve optimized parallel processing.
|
19
|
+
#
|
20
|
+
# This scheduler can also work with virtual partitions.
|
21
|
+
#
|
22
|
+
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
23
|
+
# default FIFO scheduler from the default Karafka scheduler
|
24
|
+
class Scheduler < ::Karafka::Scheduler
|
25
|
+
# Schedules jobs in the LJF order for consumption
|
26
|
+
#
|
27
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
28
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
29
|
+
#
|
30
|
+
def schedule_consumption(queue, jobs_array)
|
31
|
+
pt = PerformanceTracker.instance
|
32
|
+
|
33
|
+
ordered = []
|
34
|
+
|
35
|
+
jobs_array.each do |job|
|
36
|
+
messages = job.messages
|
37
|
+
message = messages.first
|
38
|
+
|
39
|
+
cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
|
40
|
+
|
41
|
+
ordered << [job, cost]
|
42
|
+
end
|
43
|
+
|
44
|
+
ordered.sort_by!(&:last)
|
45
|
+
ordered.reverse!
|
46
|
+
ordered.map!(&:first)
|
47
|
+
|
48
|
+
ordered.each do |job|
|
49
|
+
queue << job
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -4,10 +4,10 @@ module Karafka
|
|
4
4
|
# Namespace that encapsulates all the logic related to processing data.
|
5
5
|
module Processing
|
6
6
|
# Executors:
|
7
|
-
# - run consumers code
|
8
|
-
#
|
9
|
-
# - they re-create consumer instances in case of partitions that were revoked
|
10
|
-
#
|
7
|
+
# - run consumers code (for `#call`) or run given preparation / teardown operations when needed
|
8
|
+
# from separate threads.
|
9
|
+
# - they re-create consumer instances in case of partitions that were revoked and assigned
|
10
|
+
# back.
|
11
11
|
#
|
12
12
|
# @note Executors are not removed after partition is revoked. They are not that big and will
|
13
13
|
# be re-used in case of a re-claim
|
@@ -18,24 +18,27 @@ module Karafka
|
|
18
18
|
# @return [String] subscription group id to which a given executor belongs
|
19
19
|
attr_reader :group_id
|
20
20
|
|
21
|
+
attr_reader :messages
|
22
|
+
|
21
23
|
# @param group_id [String] id of the subscription group to which the executor belongs
|
22
24
|
# @param client [Karafka::Connection::Client] kafka client
|
23
25
|
# @param topic [Karafka::Routing::Topic] topic for which this executor will run
|
24
|
-
# @param
|
25
|
-
def initialize(group_id, client, topic,
|
26
|
+
# @param pause_tracker [Karafka::TimeTrackers::Pause] fetch pause tracker for pausing
|
27
|
+
def initialize(group_id, client, topic, pause_tracker)
|
26
28
|
@id = SecureRandom.uuid
|
27
29
|
@group_id = group_id
|
28
30
|
@client = client
|
29
31
|
@topic = topic
|
30
|
-
@
|
32
|
+
@pause_tracker = pause_tracker
|
31
33
|
end
|
32
34
|
|
33
|
-
#
|
35
|
+
# Builds the consumer instance, builds messages batch and sets all that is needed to run the
|
36
|
+
# user consumption logic
|
34
37
|
#
|
35
|
-
# @param messages [Array<
|
38
|
+
# @param messages [Array<Karafka::Messages::Message>]
|
36
39
|
# @param received_at [Time] the moment we've received the batch (actually the moment we've)
|
37
40
|
# enqueued it, but good enough
|
38
|
-
def
|
41
|
+
def prepare(messages, received_at)
|
39
42
|
# Recreate consumer with each batch if persistence is not enabled
|
40
43
|
# We reload the consumers with each batch instead of relying on some external signals
|
41
44
|
# when needed for consistency. That way devs may have it on or off and not in this
|
@@ -49,6 +52,11 @@ module Karafka
|
|
49
52
|
received_at
|
50
53
|
)
|
51
54
|
|
55
|
+
consumer.on_prepared
|
56
|
+
end
|
57
|
+
|
58
|
+
# Runs consumer data processing against given batch and handles failures and errors.
|
59
|
+
def consume
|
52
60
|
# We run the consumer client logic...
|
53
61
|
consumer.on_consume
|
54
62
|
end
|
@@ -86,7 +94,7 @@ module Karafka
|
|
86
94
|
consumer = @topic.consumer.new
|
87
95
|
consumer.topic = @topic
|
88
96
|
consumer.client = @client
|
89
|
-
consumer.
|
97
|
+
consumer.pause_tracker = @pause_tracker
|
90
98
|
consumer.producer = ::Karafka::App.producer
|
91
99
|
consumer
|
92
100
|
end
|
@@ -23,21 +23,29 @@ module Karafka
|
|
23
23
|
partition,
|
24
24
|
pause
|
25
25
|
)
|
26
|
-
|
26
|
+
ktopic = @subscription_group.topics.find(topic)
|
27
27
|
|
28
|
-
|
28
|
+
ktopic || raise(Errors::TopicNotFoundError, topic)
|
29
29
|
|
30
|
-
@buffer[
|
30
|
+
@buffer[ktopic][partition] ||= Executor.new(
|
31
31
|
@subscription_group.id,
|
32
32
|
@client,
|
33
|
-
|
33
|
+
ktopic,
|
34
34
|
pause
|
35
35
|
)
|
36
36
|
end
|
37
37
|
|
38
|
-
#
|
39
|
-
|
40
|
-
|
38
|
+
# Iterates over all available executors and yields them together with topic and partition
|
39
|
+
# info
|
40
|
+
# @yieldparam [Routing::Topic] karafka routing topic object
|
41
|
+
# @yieldparam [Integer] partition number
|
42
|
+
# @yieldparam [Executor] given executor
|
43
|
+
def each
|
44
|
+
@buffer.each do |ktopic, partitions|
|
45
|
+
partitions.each do |partition, executor|
|
46
|
+
yield(ktopic, partition, executor)
|
47
|
+
end
|
48
|
+
end
|
41
49
|
end
|
42
50
|
|
43
51
|
# Clears the executors buffer. Useful for critical errors recovery.
|
@@ -5,6 +5,8 @@ module Karafka
|
|
5
5
|
# Namespace for all the jobs that are suppose to run in workers.
|
6
6
|
module Jobs
|
7
7
|
# Base class for all the jobs types that are suppose to run in workers threads.
|
8
|
+
# Each job can have 3 main entry-points: `#prepare`, `#call` and `#teardown`
|
9
|
+
# Only `#call` is required.
|
8
10
|
class Base
|
9
11
|
extend Forwardable
|
10
12
|
|
@@ -12,6 +14,32 @@ module Karafka
|
|
12
14
|
def_delegators :executor, :id, :group_id
|
13
15
|
|
14
16
|
attr_reader :executor
|
17
|
+
|
18
|
+
# Creates a new job instance
|
19
|
+
def initialize
|
20
|
+
# All jobs are blocking by default and they can release the lock when blocking operations
|
21
|
+
# are done (if needed)
|
22
|
+
@non_blocking = false
|
23
|
+
end
|
24
|
+
|
25
|
+
# When redefined can run any code that should run before executing the proper code
|
26
|
+
def prepare; end
|
27
|
+
|
28
|
+
# When redefined can run any code that should run after executing the proper code
|
29
|
+
def teardown; end
|
30
|
+
|
31
|
+
# @return [Boolean] is this a non-blocking job
|
32
|
+
#
|
33
|
+
# @note Blocking job is a job, that will cause the job queue to wait until it is finished
|
34
|
+
# before removing the lock on new jobs being added
|
35
|
+
#
|
36
|
+
# @note All the jobs are blocking by default
|
37
|
+
#
|
38
|
+
# @note Job **needs** to mark itself as non-blocking only **after** it is done with all
|
39
|
+
# the blocking things (pausing partition, etc).
|
40
|
+
def non_blocking?
|
41
|
+
@non_blocking
|
42
|
+
end
|
15
43
|
end
|
16
44
|
end
|
17
45
|
end
|
@@ -6,10 +6,12 @@ module Karafka
|
|
6
6
|
# The main job type. It runs the executor that triggers given topic partition messages
|
7
7
|
# processing in an underlying consumer instance.
|
8
8
|
class Consume < Base
|
9
|
+
# @return [Array<Rdkafka::Consumer::Message>] array with messages
|
10
|
+
attr_reader :messages
|
11
|
+
|
9
12
|
# @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
|
10
13
|
# job
|
11
|
-
# @param messages [
|
12
|
-
# which we are suppose to work
|
14
|
+
# @param messages [Karafka::Messages::Messages] karafka messages batch
|
13
15
|
# @return [Consume]
|
14
16
|
def initialize(executor, messages)
|
15
17
|
@executor = executor
|
@@ -18,9 +20,14 @@ module Karafka
|
|
18
20
|
super()
|
19
21
|
end
|
20
22
|
|
21
|
-
# Runs the
|
23
|
+
# Runs the preparations on the executor
|
24
|
+
def prepare
|
25
|
+
executor.prepare(@messages, @created_at)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Runs the given executor
|
22
29
|
def call
|
23
|
-
executor.consume
|
30
|
+
executor.consume
|
24
31
|
end
|
25
32
|
end
|
26
33
|
end
|