karafka 2.0.0.beta1 → 2.0.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +13 -0
- data/Gemfile.lock +1 -1
- data/config/errors.yml +1 -0
- data/lib/active_job/karafka.rb +2 -2
- data/lib/karafka/active_job/routing/extensions.rb +21 -0
- data/lib/karafka/base_consumer.rb +1 -1
- data/lib/karafka/connection/client.rb +1 -1
- data/lib/karafka/connection/listener.rb +88 -27
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +50 -54
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/contracts/config.rb +7 -0
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/messages/batch_metadata.rb +26 -3
- data/lib/karafka/messages/builders/batch_metadata.rb +17 -29
- data/lib/karafka/messages/builders/message.rb +1 -0
- data/lib/karafka/messages/builders/messages.rb +4 -12
- data/lib/karafka/pro/active_job/consumer.rb +21 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +1 -1
- data/lib/karafka/pro/loader.rb +5 -1
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
- data/lib/karafka/pro/scheduler.rb +54 -0
- data/lib/karafka/processing/executor.rb +5 -2
- data/lib/karafka/processing/executors_buffer.rb +15 -7
- data/lib/karafka/processing/jobs/base.rb +13 -1
- data/lib/karafka/processing/jobs/consume.rb +4 -2
- data/lib/karafka/processing/jobs_queue.rb +15 -12
- data/lib/karafka/processing/worker.rb +7 -9
- data/lib/karafka/processing/workers_batch.rb +5 -0
- data/lib/karafka/routing/consumer_group.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/routing/subscription_groups_builder.rb +3 -2
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +19 -27
- data/lib/karafka/scheduler.rb +10 -11
- data/lib/karafka/server.rb +24 -23
- data/lib/karafka/setup/config.rb +1 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +1 -3
- metadata +10 -3
- metadata.gz.sig +0 -0
- data/lib/karafka/active_job/routing_extensions.rb +0 -18
@@ -8,42 +8,30 @@ module Karafka
|
|
8
8
|
class << self
|
9
9
|
# Creates metadata based on the kafka batch data.
|
10
10
|
#
|
11
|
-
# @param
|
11
|
+
# @param messages [Array<Karafka::Messages::Message>] messages array
|
12
12
|
# @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
|
13
13
|
# @param scheduled_at [Time] moment when the batch was scheduled for processing
|
14
14
|
# @return [Karafka::Messages::BatchMetadata] batch metadata object
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
def call(
|
19
|
-
now = Time.now
|
20
|
-
|
15
|
+
#
|
16
|
+
# @note We do not set `processed_at` as this needs to be assigned when the batch is
|
17
|
+
# picked up for processing.
|
18
|
+
def call(messages, topic, scheduled_at)
|
21
19
|
Karafka::Messages::BatchMetadata.new(
|
22
|
-
size:
|
23
|
-
first_offset:
|
24
|
-
last_offset:
|
20
|
+
size: messages.count,
|
21
|
+
first_offset: messages.first.offset,
|
22
|
+
last_offset: messages.last.offset,
|
25
23
|
deserializer: topic.deserializer,
|
26
|
-
partition:
|
24
|
+
partition: messages.first.partition,
|
27
25
|
topic: topic.name,
|
26
|
+
# We go with the assumption that the creation of the whole batch is the last message
|
27
|
+
# creation time
|
28
|
+
created_at: messages.last.timestamp,
|
29
|
+
# When this batch was built and scheduled for execution
|
28
30
|
scheduled_at: scheduled_at,
|
29
|
-
#
|
30
|
-
#
|
31
|
-
|
32
|
-
|
33
|
-
# one of the workers
|
34
|
-
processing_lag: time_distance_in_ms(now, scheduled_at)
|
35
|
-
).freeze
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
# Computes time distance in between two times in ms
|
41
|
-
#
|
42
|
-
# @param time1 [Time]
|
43
|
-
# @param time2 [Time]
|
44
|
-
# @return [Integer] distance in between two times in ms
|
45
|
-
def time_distance_in_ms(time1, time2)
|
46
|
-
((time1 - time2) * 1_000).round
|
31
|
+
# We build the batch metadata when we pick up the job in the worker, thus we can use
|
32
|
+
# current time here
|
33
|
+
processed_at: Time.now
|
34
|
+
)
|
47
35
|
end
|
48
36
|
end
|
49
37
|
end
|
@@ -9,27 +9,19 @@ module Karafka
|
|
9
9
|
# Creates messages batch with messages inside based on the incoming messages and the
|
10
10
|
# topic from which it comes.
|
11
11
|
#
|
12
|
-
# @param
|
12
|
+
# @param messages [Array<Karafka::Messages::Message>] karafka messages array
|
13
13
|
# @param topic [Karafka::Routing::Topic] topic for which we're received messages
|
14
14
|
# @param received_at [Time] moment in time when the messages were received
|
15
15
|
# @return [Karafka::Messages::Messages] messages batch object
|
16
|
-
def call(
|
17
|
-
messages_array = kafka_messages.map do |message|
|
18
|
-
Karafka::Messages::Builders::Message.call(
|
19
|
-
message,
|
20
|
-
topic,
|
21
|
-
received_at
|
22
|
-
)
|
23
|
-
end
|
24
|
-
|
16
|
+
def call(messages, topic, received_at)
|
25
17
|
metadata = BatchMetadata.call(
|
26
|
-
|
18
|
+
messages,
|
27
19
|
topic,
|
28
20
|
received_at
|
29
21
|
).freeze
|
30
22
|
|
31
23
|
Karafka::Messages::Messages.new(
|
32
|
-
|
24
|
+
messages,
|
33
25
|
metadata
|
34
26
|
).freeze
|
35
27
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Pro
|
5
|
+
module ActiveJob
|
6
|
+
# This Karafka component is a Pro component.
|
7
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
8
|
+
# repository and their usage requires commercial license agreement.
|
9
|
+
#
|
10
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
11
|
+
#
|
12
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
13
|
+
# of your code to Maciej Mensfeld.
|
14
|
+
|
15
|
+
# Pro ActiveJob consumer that is suppose to handle long-running jobs as well as short
|
16
|
+
# running jobs
|
17
|
+
class Consumer < Karafka::ActiveJob::Consumer
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -18,7 +18,7 @@ module Karafka
|
|
18
18
|
# much better and more granular control over the dispatch and consumption process.
|
19
19
|
class Dispatcher < ::Karafka::ActiveJob::Dispatcher
|
20
20
|
# Defaults for dispatching
|
21
|
-
#
|
21
|
+
# They can be updated by using `#karafka_options` on the job
|
22
22
|
DEFAULTS = {
|
23
23
|
dispatch_method: :produce_async,
|
24
24
|
# We don't create a dummy proc based partitioner as we would have to evaluate it with
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -19,13 +19,17 @@ module Karafka
|
|
19
19
|
# components
|
20
20
|
def setup(config)
|
21
21
|
require_relative 'performance_tracker'
|
22
|
+
require_relative 'scheduler'
|
23
|
+
require_relative 'processing/jobs/consume_non_blocking'
|
24
|
+
require_relative 'active_job/consumer'
|
22
25
|
require_relative 'active_job/dispatcher'
|
23
26
|
require_relative 'active_job/job_options_contract'
|
24
27
|
|
28
|
+
config.internal.scheduler = Scheduler.new
|
29
|
+
config.internal.active_job.consumer = ActiveJob::Consumer
|
25
30
|
config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
|
26
31
|
config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
27
32
|
|
28
|
-
# Monitor time needed to process each message from a single partition
|
29
33
|
config.monitor.subscribe(PerformanceTracker.instance)
|
30
34
|
end
|
31
35
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Pro
|
5
|
+
# Pro components related to processing part of Karafka
|
6
|
+
module Processing
|
7
|
+
# Pro jobs
|
8
|
+
module Jobs
|
9
|
+
# This Karafka component is a Pro component.
|
10
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
11
|
+
# repository and their usage requires commercial license agreement.
|
12
|
+
#
|
13
|
+
# Karafka has also commercial-friendly license, commercial support and commercial
|
14
|
+
# components.
|
15
|
+
#
|
16
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the
|
17
|
+
# copyright of your code to Maciej Mensfeld.
|
18
|
+
|
19
|
+
# The main job type in a non-blocking variant.
|
20
|
+
# This variant works "like" the regular consumption but pauses the partition for as long
|
21
|
+
# as it is needed until a job is done.
|
22
|
+
#
|
23
|
+
# It can be useful when having long lasting jobs that would exceed `max.poll.interval`
|
24
|
+
# if would block.
|
25
|
+
#
|
26
|
+
# @note It needs to be working with a proper consumer that will handle the partition
|
27
|
+
# management. This layer of the framework knows nothing about Kafka messages consumption.
|
28
|
+
class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
|
29
|
+
# Releases the blocking lock after it is done with the preparation phase for this job
|
30
|
+
def prepare
|
31
|
+
super
|
32
|
+
@non_blocking = true
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Pro
|
5
|
+
# This Karafka component is a Pro component.
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright
|
12
|
+
# of your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
# Optimizes scheduler that takes into consideration of execution time needed to process
|
15
|
+
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
16
|
+
#
|
17
|
+
# This scheduler is designed to optimize execution times on jobs that perform IO operations as
|
18
|
+
# when taking IO into consideration, the can achieve optimized parallel processing.
|
19
|
+
#
|
20
|
+
# This scheduler can also work with virtual partitions.
|
21
|
+
#
|
22
|
+
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
23
|
+
# default FIFO scheduler from the default Karafka scheduler
|
24
|
+
class Scheduler < ::Karafka::Scheduler
|
25
|
+
# Schedules jobs in the LJF order for consumption
|
26
|
+
#
|
27
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
28
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
29
|
+
#
|
30
|
+
def schedule_consumption(queue, jobs_array)
|
31
|
+
pt = PerformanceTracker.instance
|
32
|
+
|
33
|
+
ordered = []
|
34
|
+
|
35
|
+
jobs_array.each do |job|
|
36
|
+
messages = job.messages
|
37
|
+
message = messages.first
|
38
|
+
|
39
|
+
cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
|
40
|
+
|
41
|
+
ordered << [job, cost]
|
42
|
+
end
|
43
|
+
|
44
|
+
ordered.sort_by!(&:last)
|
45
|
+
ordered.reverse!
|
46
|
+
ordered.map!(&:first)
|
47
|
+
|
48
|
+
ordered.each do |job|
|
49
|
+
queue << job
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -18,6 +18,8 @@ module Karafka
|
|
18
18
|
# @return [String] subscription group id to which a given executor belongs
|
19
19
|
attr_reader :group_id
|
20
20
|
|
21
|
+
attr_reader :messages
|
22
|
+
|
21
23
|
# @param group_id [String] id of the subscription group to which the executor belongs
|
22
24
|
# @param client [Karafka::Connection::Client] kafka client
|
23
25
|
# @param topic [Karafka::Routing::Topic] topic for which this executor will run
|
@@ -30,9 +32,10 @@ module Karafka
|
|
30
32
|
@pause_tracker = pause_tracker
|
31
33
|
end
|
32
34
|
|
33
|
-
# Builds the consumer instance and sets all that is needed to run the
|
35
|
+
# Builds the consumer instance, builds messages batch and sets all that is needed to run the
|
36
|
+
# user consumption logic
|
34
37
|
#
|
35
|
-
# @param messages [Array<
|
38
|
+
# @param messages [Array<Karafka::Messages::Message>]
|
36
39
|
# @param received_at [Time] the moment we've received the batch (actually the moment we've)
|
37
40
|
# enqueued it, but good enough
|
38
41
|
def prepare(messages, received_at)
|
@@ -23,21 +23,29 @@ module Karafka
|
|
23
23
|
partition,
|
24
24
|
pause
|
25
25
|
)
|
26
|
-
|
26
|
+
ktopic = @subscription_group.topics.find(topic)
|
27
27
|
|
28
|
-
|
28
|
+
ktopic || raise(Errors::TopicNotFoundError, topic)
|
29
29
|
|
30
|
-
@buffer[
|
30
|
+
@buffer[ktopic][partition] ||= Executor.new(
|
31
31
|
@subscription_group.id,
|
32
32
|
@client,
|
33
|
-
|
33
|
+
ktopic,
|
34
34
|
pause
|
35
35
|
)
|
36
36
|
end
|
37
37
|
|
38
|
-
#
|
39
|
-
|
40
|
-
|
38
|
+
# Iterates over all available executors and yields them together with topic and partition
|
39
|
+
# info
|
40
|
+
# @yieldparam [Routing::Topic] karafka routing topic object
|
41
|
+
# @yieldparam [Integer] partition number
|
42
|
+
# @yieldparam [Executor] given executor
|
43
|
+
def each
|
44
|
+
@buffer.each do |ktopic, partitions|
|
45
|
+
partitions.each do |partition, executor|
|
46
|
+
yield(ktopic, partition, executor)
|
47
|
+
end
|
48
|
+
end
|
41
49
|
end
|
42
50
|
|
43
51
|
# Clears the executors buffer. Useful for critical errors recovery.
|
@@ -15,6 +15,13 @@ module Karafka
|
|
15
15
|
|
16
16
|
attr_reader :executor
|
17
17
|
|
18
|
+
# Creates a new job instance
|
19
|
+
def initialize
|
20
|
+
# All jobs are blocking by default and they can release the lock when blocking operations
|
21
|
+
# are done (if needed)
|
22
|
+
@non_blocking = false
|
23
|
+
end
|
24
|
+
|
18
25
|
# When redefined can run any code that should run before executing the proper code
|
19
26
|
def prepare; end
|
20
27
|
|
@@ -22,11 +29,16 @@ module Karafka
|
|
22
29
|
def teardown; end
|
23
30
|
|
24
31
|
# @return [Boolean] is this a non-blocking job
|
32
|
+
#
|
25
33
|
# @note Blocking job is a job, that will cause the job queue to wait until it is finished
|
26
34
|
# before removing the lock on new jobs being added
|
35
|
+
#
|
27
36
|
# @note All the jobs are blocking by default
|
37
|
+
#
|
38
|
+
# @note Job **needs** to mark itself as non-blocking only **after** it is done with all
|
39
|
+
# the blocking things (pausing partition, etc).
|
28
40
|
def non_blocking?
|
29
|
-
|
41
|
+
@non_blocking
|
30
42
|
end
|
31
43
|
end
|
32
44
|
end
|
@@ -6,10 +6,12 @@ module Karafka
|
|
6
6
|
# The main job type. It runs the executor that triggers given topic partition messages
|
7
7
|
# processing in an underlying consumer instance.
|
8
8
|
class Consume < Base
|
9
|
+
# @return [Array<Rdkafka::Consumer::Message>] array with messages
|
10
|
+
attr_reader :messages
|
11
|
+
|
9
12
|
# @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
|
10
13
|
# job
|
11
|
-
# @param messages [
|
12
|
-
# which we are suppose to work
|
14
|
+
# @param messages [Karafka::Messages::Messages] karafka messages batch
|
13
15
|
# @return [Consume]
|
14
16
|
def initialize(executor, messages)
|
15
17
|
@executor = executor
|
@@ -12,7 +12,7 @@ module Karafka
|
|
12
12
|
class JobsQueue
|
13
13
|
# @return [Karafka::Processing::JobsQueue]
|
14
14
|
def initialize
|
15
|
-
@queue =
|
15
|
+
@queue = Queue.new
|
16
16
|
# Those queues will act as a semaphores internally. Since we need an indicator for waiting
|
17
17
|
# we could use Thread.pass but this is expensive. Instead we can just lock until any
|
18
18
|
# of the workers finishes their work and we can re-check. This means that in the worse
|
@@ -100,8 +100,17 @@ module Karafka
|
|
100
100
|
end
|
101
101
|
end
|
102
102
|
|
103
|
-
#
|
104
|
-
#
|
103
|
+
# @param group_id [String]
|
104
|
+
#
|
105
|
+
# @return [Boolean] tell us if we have anything in the processing (or for processing) from
|
106
|
+
# a given group.
|
107
|
+
def empty?(group_id)
|
108
|
+
@in_processing[group_id].empty?
|
109
|
+
end
|
110
|
+
|
111
|
+
# Blocks when there are things in the queue in a given group and waits until all the blocking
|
112
|
+
# jobs from a given group are completed
|
113
|
+
#
|
105
114
|
# @param group_id [String] id of the group in which jobs we're interested.
|
106
115
|
# @note This method is blocking.
|
107
116
|
def wait(group_id)
|
@@ -114,16 +123,10 @@ module Karafka
|
|
114
123
|
|
115
124
|
# @param group_id [String] id of the group in which jobs we're interested.
|
116
125
|
# @return [Boolean] should we keep waiting or not
|
126
|
+
# @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
|
127
|
+
# as they may exceed `max.poll.interval`
|
117
128
|
def wait?(group_id)
|
118
|
-
|
119
|
-
|
120
|
-
# If it is stopping, all the previous messages that are processed at the moment need to
|
121
|
-
# finish. Otherwise we may risk closing the client and committing offsets afterwards
|
122
|
-
return false if Karafka::App.stopping? && group.empty?
|
123
|
-
return false if @queue.closed?
|
124
|
-
return false if group.empty?
|
125
|
-
|
126
|
-
!group.all?(&:non_blocking?)
|
129
|
+
!@in_processing[group_id].all?(&:non_blocking?)
|
127
130
|
end
|
128
131
|
end
|
129
132
|
end
|
@@ -17,24 +17,22 @@ module Karafka
|
|
17
17
|
# code. This can be used to unlock certain resources or do other things that are
|
18
18
|
# not user code but need to run after user code base is executed.
|
19
19
|
class Worker
|
20
|
-
|
21
|
-
|
22
|
-
def_delegators :@thread, :join, :terminate, :alive?
|
20
|
+
include Helpers::Async
|
23
21
|
|
24
22
|
# @param jobs_queue [JobsQueue]
|
25
23
|
# @return [Worker]
|
26
24
|
def initialize(jobs_queue)
|
27
25
|
@jobs_queue = jobs_queue
|
28
|
-
@thread = Thread.new do
|
29
|
-
# If anything goes wrong in this worker thread, it means something went really wrong and
|
30
|
-
# we should terminate.
|
31
|
-
Thread.current.abort_on_exception = true
|
32
|
-
loop { break unless process }
|
33
|
-
end
|
34
26
|
end
|
35
27
|
|
36
28
|
private
|
37
29
|
|
30
|
+
# Runs processing of jobs in a loop
|
31
|
+
# Stops when queue is closed.
|
32
|
+
def call
|
33
|
+
loop { break unless process }
|
34
|
+
end
|
35
|
+
|
38
36
|
# Fetches a single job, processes it and marks as completed.
|
39
37
|
#
|
40
38
|
# @note We do not have error handling here, as no errors should propagate this far. If they
|
@@ -10,7 +10,7 @@ module Karafka
|
|
10
10
|
class SubscriptionGroup
|
11
11
|
attr_reader :id, :topics
|
12
12
|
|
13
|
-
# @param topics [
|
13
|
+
# @param topics [Karafka::Routing::Topics] all the topics that share the same key settings
|
14
14
|
# @return [SubscriptionGroup] built subscription group
|
15
15
|
def initialize(topics)
|
16
16
|
@id = SecureRandom.uuid
|
@@ -23,8 +23,8 @@ module Karafka
|
|
23
23
|
|
24
24
|
private_constant :DISTRIBUTION_KEYS
|
25
25
|
|
26
|
-
# @param topics [
|
27
|
-
# groups
|
26
|
+
# @param topics [Karafka::Routing::Topics] all the topics based on which we want to build
|
27
|
+
# subscription groups
|
28
28
|
# @return [Array<SubscriptionGroup>] all subscription groups we need in separate threads
|
29
29
|
def call(topics)
|
30
30
|
topics
|
@@ -32,6 +32,7 @@ module Karafka
|
|
32
32
|
.group_by(&:first)
|
33
33
|
.values
|
34
34
|
.map { |value| value.map(&:last) }
|
35
|
+
.map { |topics_array| Routing::Topics.new(topics_array) }
|
35
36
|
.map { |grouped_topics| SubscriptionGroup.new(grouped_topics) }
|
36
37
|
end
|
37
38
|
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module Karafka
|
6
|
+
module Routing
|
7
|
+
# Abstraction layer on top of groups of topics
|
8
|
+
class Topics
|
9
|
+
include Enumerable
|
10
|
+
extend Forwardable
|
11
|
+
|
12
|
+
def_delegators :@accumulator, :[], :size, :empty?, :last, :<<
|
13
|
+
|
14
|
+
# @param topics_array [Array<Karafka::Routing::Topic>] array with topics
|
15
|
+
def initialize(topics_array)
|
16
|
+
@accumulator = topics_array.dup
|
17
|
+
end
|
18
|
+
|
19
|
+
# Yields each topic
|
20
|
+
#
|
21
|
+
# @param [Proc] block we want to yield with on each topic
|
22
|
+
def each(&block)
|
23
|
+
@accumulator.each(&block)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Finds topic by its name
|
27
|
+
#
|
28
|
+
# @param topic_name [String] topic name
|
29
|
+
# @return [Karafka::Routing::Topic]
|
30
|
+
# @raise [Karafka::Errors::TopicNotFoundError] this should never happen. If you see it,
|
31
|
+
# please create an issue.
|
32
|
+
def find(topic_name)
|
33
|
+
@accumulator.find { |topic| topic.name == topic_name } ||
|
34
|
+
raise(Karafka::Errors::TopicNotFoundError, topic_name)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/karafka/runner.rb
CHANGED
@@ -3,32 +3,37 @@
|
|
3
3
|
module Karafka
|
4
4
|
# Class used to run the Karafka listeners in separate threads
|
5
5
|
class Runner
|
6
|
-
# Starts listening on all the listeners asynchronously
|
7
|
-
#
|
6
|
+
# Starts listening on all the listeners asynchronously and handles the jobs queue closing
|
7
|
+
# after listeners are done with their work.
|
8
8
|
def call
|
9
9
|
# Despite possibility of having several independent listeners, we aim to have one queue for
|
10
10
|
# jobs across and one workers poll for that
|
11
11
|
jobs_queue = Processing::JobsQueue.new
|
12
12
|
|
13
13
|
workers = Processing::WorkersBatch.new(jobs_queue)
|
14
|
-
|
14
|
+
listeners = Connection::ListenersBatch.new(jobs_queue)
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
# each listener running in separate threads, so the exceptions should never leak
|
19
|
-
# and if that happens, it means that something really bad happened and we should stop
|
20
|
-
# the whole process
|
21
|
-
Thread
|
22
|
-
.new { listener.call }
|
23
|
-
.tap { |thread| thread.abort_on_exception = true }
|
24
|
-
end
|
16
|
+
workers.each(&:async_call)
|
17
|
+
listeners.each(&:async_call)
|
25
18
|
|
26
19
|
# We aggregate threads here for a supervised shutdown process
|
27
|
-
Karafka::Server.
|
20
|
+
Karafka::Server.workers = workers
|
21
|
+
Karafka::Server.listeners = listeners
|
28
22
|
|
29
23
|
# All the listener threads need to finish
|
30
|
-
|
24
|
+
listeners.each(&:join)
|
25
|
+
|
26
|
+
# We close the jobs queue only when no listener threads are working.
|
27
|
+
# This ensures, that everything was closed prior to us not accepting anymore jobs and that
|
28
|
+
# no more jobs will be enqueued. Since each listener waits for jobs to finish, once those
|
29
|
+
# are done, we can close.
|
30
|
+
jobs_queue.close
|
31
|
+
|
31
32
|
# All the workers need to stop processing anything before we can stop the runner completely
|
33
|
+
# This ensures that even async long-running jobs have time to finish before we are done
|
34
|
+
# with everything. One thing worth keeping in mind though: It is the end user responsibility
|
35
|
+
# to handle the shutdown detection in their long-running processes. Otherwise if timeout
|
36
|
+
# is exceeded, there will be a forced shutdown.
|
32
37
|
workers.each(&:join)
|
33
38
|
# If anything crashes here, we need to raise the error and crush the runner because it means
|
34
39
|
# that something terrible happened
|
@@ -42,18 +47,5 @@ module Karafka
|
|
42
47
|
Karafka::App.stop!
|
43
48
|
raise e
|
44
49
|
end
|
45
|
-
|
46
|
-
private
|
47
|
-
|
48
|
-
# @param jobs_queue [Processing::JobsQueue] the main processing queue
|
49
|
-
# @return [Array<Karafka::Connection::Listener>] listeners that will consume messages for each
|
50
|
-
# of the subscription groups
|
51
|
-
def listeners(jobs_queue)
|
52
|
-
App
|
53
|
-
.subscription_groups
|
54
|
-
.map do |subscription_group|
|
55
|
-
Karafka::Connection::Listener.new(subscription_group, jobs_queue)
|
56
|
-
end
|
57
|
-
end
|
58
50
|
end
|
59
51
|
end
|
data/lib/karafka/scheduler.rb
CHANGED
@@ -3,19 +3,18 @@
|
|
3
3
|
module Karafka
|
4
4
|
# FIFO scheduler for messages coming from various topics and partitions
|
5
5
|
class Scheduler
|
6
|
-
#
|
6
|
+
# Schedules jobs in the fifo order
|
7
7
|
#
|
8
|
-
# @param
|
9
|
-
#
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
def call(messages_buffer)
|
14
|
-
messages_buffer.each do |topic, partitions|
|
15
|
-
partitions.each do |partition, messages|
|
16
|
-
yield(topic, partition, messages)
|
17
|
-
end
|
8
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
9
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
10
|
+
def schedule_consumption(queue, jobs_array)
|
11
|
+
jobs_array.each do |job|
|
12
|
+
queue << job
|
18
13
|
end
|
19
14
|
end
|
15
|
+
|
16
|
+
# Both revocation and shutdown jobs can also run in fifo by default
|
17
|
+
alias schedule_revocation schedule_consumption
|
18
|
+
alias schedule_shutdown schedule_consumption
|
20
19
|
end
|
21
20
|
end
|