karafka 2.0.0.beta1 → 2.0.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +13 -0
  4. data/Gemfile.lock +1 -1
  5. data/config/errors.yml +1 -0
  6. data/lib/active_job/karafka.rb +2 -2
  7. data/lib/karafka/active_job/routing/extensions.rb +21 -0
  8. data/lib/karafka/base_consumer.rb +1 -1
  9. data/lib/karafka/connection/client.rb +1 -1
  10. data/lib/karafka/connection/listener.rb +88 -27
  11. data/lib/karafka/connection/listeners_batch.rb +24 -0
  12. data/lib/karafka/connection/messages_buffer.rb +50 -54
  13. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  14. data/lib/karafka/contracts/config.rb +7 -0
  15. data/lib/karafka/helpers/async.rb +33 -0
  16. data/lib/karafka/messages/batch_metadata.rb +26 -3
  17. data/lib/karafka/messages/builders/batch_metadata.rb +17 -29
  18. data/lib/karafka/messages/builders/message.rb +1 -0
  19. data/lib/karafka/messages/builders/messages.rb +4 -12
  20. data/lib/karafka/pro/active_job/consumer.rb +21 -0
  21. data/lib/karafka/pro/active_job/dispatcher.rb +1 -1
  22. data/lib/karafka/pro/loader.rb +5 -1
  23. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
  24. data/lib/karafka/pro/scheduler.rb +54 -0
  25. data/lib/karafka/processing/executor.rb +5 -2
  26. data/lib/karafka/processing/executors_buffer.rb +15 -7
  27. data/lib/karafka/processing/jobs/base.rb +13 -1
  28. data/lib/karafka/processing/jobs/consume.rb +4 -2
  29. data/lib/karafka/processing/jobs_queue.rb +15 -12
  30. data/lib/karafka/processing/worker.rb +7 -9
  31. data/lib/karafka/processing/workers_batch.rb +5 -0
  32. data/lib/karafka/routing/consumer_group.rb +1 -1
  33. data/lib/karafka/routing/subscription_group.rb +1 -1
  34. data/lib/karafka/routing/subscription_groups_builder.rb +3 -2
  35. data/lib/karafka/routing/topics.rb +38 -0
  36. data/lib/karafka/runner.rb +19 -27
  37. data/lib/karafka/scheduler.rb +10 -11
  38. data/lib/karafka/server.rb +24 -23
  39. data/lib/karafka/setup/config.rb +1 -0
  40. data/lib/karafka/version.rb +1 -1
  41. data.tar.gz.sig +1 -3
  42. metadata +10 -3
  43. metadata.gz.sig +0 -0
  44. data/lib/karafka/active_job/routing_extensions.rb +0 -18
@@ -8,42 +8,30 @@ module Karafka
8
8
  class << self
9
9
  # Creates metadata based on the kafka batch data.
10
10
  #
11
- # @param kafka_batch [Array<Rdkafka::Consumer::Message>] raw fetched messages
11
+ # @param messages [Array<Karafka::Messages::Message>] messages array
12
12
  # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
13
13
  # @param scheduled_at [Time] moment when the batch was scheduled for processing
14
14
  # @return [Karafka::Messages::BatchMetadata] batch metadata object
15
- # @note Regarding the time lags: we can use the current time here, as batch metadata is
16
- # created in the worker. So whenever this is being built, it means that the processing
17
- # of this batch has already started.
18
- def call(kafka_batch, topic, scheduled_at)
19
- now = Time.now
20
-
15
+ #
16
+ # @note We do not set `processed_at` as this needs to be assigned when the batch is
17
+ # picked up for processing.
18
+ def call(messages, topic, scheduled_at)
21
19
  Karafka::Messages::BatchMetadata.new(
22
- size: kafka_batch.count,
23
- first_offset: kafka_batch.first.offset,
24
- last_offset: kafka_batch.last.offset,
20
+ size: messages.count,
21
+ first_offset: messages.first.offset,
22
+ last_offset: messages.last.offset,
25
23
  deserializer: topic.deserializer,
26
- partition: kafka_batch[0].partition,
24
+ partition: messages.first.partition,
27
25
  topic: topic.name,
26
+ # We go with the assumption that the creation of the whole batch is the last message
27
+ # creation time
28
+ created_at: messages.last.timestamp,
29
+ # When this batch was built and scheduled for execution
28
30
  scheduled_at: scheduled_at,
29
- # This lag describes how long did it take for a message to be consumed from the
30
- # moment it was created
31
- consumption_lag: time_distance_in_ms(now, kafka_batch.last.timestamp),
32
- # This lag describes how long did a batch have to wait before it was picked up by
33
- # one of the workers
34
- processing_lag: time_distance_in_ms(now, scheduled_at)
35
- ).freeze
36
- end
37
-
38
- private
39
-
40
- # Computes time distance in between two times in ms
41
- #
42
- # @param time1 [Time]
43
- # @param time2 [Time]
44
- # @return [Integer] distance in between two times in ms
45
- def time_distance_in_ms(time1, time2)
46
- ((time1 - time2) * 1_000).round
31
+ # We build the batch metadata when we pick up the job in the worker, thus we can use
32
+ # current time here
33
+ processed_at: Time.now
34
+ )
47
35
  end
48
36
  end
49
37
  end
@@ -26,6 +26,7 @@ module Karafka
26
26
  received_at: received_at
27
27
  ).freeze
28
28
 
29
+ # Karafka messages cannot be frozen because of the lazy deserialization feature
29
30
  Karafka::Messages::Message.new(
30
31
  kafka_message.payload,
31
32
  metadata
@@ -9,27 +9,19 @@ module Karafka
9
9
  # Creates messages batch with messages inside based on the incoming messages and the
10
10
  # topic from which it comes.
11
11
  #
12
- # @param kafka_messages [Array<Rdkafka::Consumer::Message>] raw fetched messages
12
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages array
13
13
  # @param topic [Karafka::Routing::Topic] topic for which we're received messages
14
14
  # @param received_at [Time] moment in time when the messages were received
15
15
  # @return [Karafka::Messages::Messages] messages batch object
16
- def call(kafka_messages, topic, received_at)
17
- messages_array = kafka_messages.map do |message|
18
- Karafka::Messages::Builders::Message.call(
19
- message,
20
- topic,
21
- received_at
22
- )
23
- end
24
-
16
+ def call(messages, topic, received_at)
25
17
  metadata = BatchMetadata.call(
26
- kafka_messages,
18
+ messages,
27
19
  topic,
28
20
  received_at
29
21
  ).freeze
30
22
 
31
23
  Karafka::Messages::Messages.new(
32
- messages_array,
24
+ messages,
33
25
  metadata
34
26
  ).freeze
35
27
  end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ module ActiveJob
6
+ # This Karafka component is a Pro component.
7
+ # All of the commercial components are present in the lib/karafka/pro directory of this
8
+ # repository and their usage requires commercial license agreement.
9
+ #
10
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
11
+ #
12
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
13
+ # of your code to Maciej Mensfeld.
14
+
15
+ # Pro ActiveJob consumer that is suppose to handle long-running jobs as well as short
16
+ # running jobs
17
+ class Consumer < Karafka::ActiveJob::Consumer
18
+ end
19
+ end
20
+ end
21
+ end
@@ -18,7 +18,7 @@ module Karafka
18
18
  # much better and more granular control over the dispatch and consumption process.
19
19
  class Dispatcher < ::Karafka::ActiveJob::Dispatcher
20
20
  # Defaults for dispatching
21
- # The can be updated by using `#karafka_options` on the job
21
+ # They can be updated by using `#karafka_options` on the job
22
22
  DEFAULTS = {
23
23
  dispatch_method: :produce_async,
24
24
  # We don't create a dummy proc based partitioner as we would have to evaluate it with
@@ -19,13 +19,17 @@ module Karafka
19
19
  # components
20
20
  def setup(config)
21
21
  require_relative 'performance_tracker'
22
+ require_relative 'scheduler'
23
+ require_relative 'processing/jobs/consume_non_blocking'
24
+ require_relative 'active_job/consumer'
22
25
  require_relative 'active_job/dispatcher'
23
26
  require_relative 'active_job/job_options_contract'
24
27
 
28
+ config.internal.scheduler = Scheduler.new
29
+ config.internal.active_job.consumer = ActiveJob::Consumer
25
30
  config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
26
31
  config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
27
32
 
28
- # Monitor time needed to process each message from a single partition
29
33
  config.monitor.subscribe(PerformanceTracker.instance)
30
34
  end
31
35
  end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ # Pro components related to processing part of Karafka
6
+ module Processing
7
+ # Pro jobs
8
+ module Jobs
9
+ # This Karafka component is a Pro component.
10
+ # All of the commercial components are present in the lib/karafka/pro directory of this
11
+ # repository and their usage requires commercial license agreement.
12
+ #
13
+ # Karafka has also commercial-friendly license, commercial support and commercial
14
+ # components.
15
+ #
16
+ # By sending a pull request to the pro components, you are agreeing to transfer the
17
+ # copyright of your code to Maciej Mensfeld.
18
+
19
+ # The main job type in a non-blocking variant.
20
+ # This variant works "like" the regular consumption but pauses the partition for as long
21
+ # as it is needed until a job is done.
22
+ #
23
+ # It can be useful when having long lasting jobs that would exceed `max.poll.interval`
24
+ # if would block.
25
+ #
26
+ # @note It needs to be working with a proper consumer that will handle the partition
27
+ # management. This layer of the framework knows nothing about Kafka messages consumption.
28
+ class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
29
+ # Releases the blocking lock after it is done with the preparation phase for this job
30
+ def prepare
31
+ super
32
+ @non_blocking = true
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ # This Karafka component is a Pro component.
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
12
+ # of your code to Maciej Mensfeld.
13
+
14
+ # Optimizes scheduler that takes into consideration of execution time needed to process
15
+ # messages from given topics partitions. It uses the non-preemptive LJF algorithm
16
+ #
17
+ # This scheduler is designed to optimize execution times on jobs that perform IO operations as
18
+ # when taking IO into consideration, the can achieve optimized parallel processing.
19
+ #
20
+ # This scheduler can also work with virtual partitions.
21
+ #
22
+ # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
23
+ # default FIFO scheduler from the default Karafka scheduler
24
+ class Scheduler < ::Karafka::Scheduler
25
+ # Schedules jobs in the LJF order for consumption
26
+ #
27
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
28
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
29
+ #
30
+ def schedule_consumption(queue, jobs_array)
31
+ pt = PerformanceTracker.instance
32
+
33
+ ordered = []
34
+
35
+ jobs_array.each do |job|
36
+ messages = job.messages
37
+ message = messages.first
38
+
39
+ cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
40
+
41
+ ordered << [job, cost]
42
+ end
43
+
44
+ ordered.sort_by!(&:last)
45
+ ordered.reverse!
46
+ ordered.map!(&:first)
47
+
48
+ ordered.each do |job|
49
+ queue << job
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -18,6 +18,8 @@ module Karafka
18
18
  # @return [String] subscription group id to which a given executor belongs
19
19
  attr_reader :group_id
20
20
 
21
+ attr_reader :messages
22
+
21
23
  # @param group_id [String] id of the subscription group to which the executor belongs
22
24
  # @param client [Karafka::Connection::Client] kafka client
23
25
  # @param topic [Karafka::Routing::Topic] topic for which this executor will run
@@ -30,9 +32,10 @@ module Karafka
30
32
  @pause_tracker = pause_tracker
31
33
  end
32
34
 
33
- # Builds the consumer instance and sets all that is needed to run the user consumption logic
35
+ # Builds the consumer instance, builds messages batch and sets all that is needed to run the
36
+ # user consumption logic
34
37
  #
35
- # @param messages [Array<Rdkafka::Consumer::Message>] raw rdkafka messages
38
+ # @param messages [Array<Karafka::Messages::Message>]
36
39
  # @param received_at [Time] the moment we've received the batch (actually the moment we've)
37
40
  # enqueued it, but good enough
38
41
  def prepare(messages, received_at)
@@ -23,21 +23,29 @@ module Karafka
23
23
  partition,
24
24
  pause
25
25
  )
26
- topic = @subscription_group.topics.find { |ktopic| ktopic.name == topic }
26
+ ktopic = @subscription_group.topics.find(topic)
27
27
 
28
- topic || raise(Errors::TopicNotFoundError, topic)
28
+ ktopic || raise(Errors::TopicNotFoundError, topic)
29
29
 
30
- @buffer[topic][partition] ||= Executor.new(
30
+ @buffer[ktopic][partition] ||= Executor.new(
31
31
  @subscription_group.id,
32
32
  @client,
33
- topic,
33
+ ktopic,
34
34
  pause
35
35
  )
36
36
  end
37
37
 
38
- # Runs the shutdown on all active executors.
39
- def shutdown
40
- @buffer.values.map(&:values).flatten.each(&:shutdown)
38
+ # Iterates over all available executors and yields them together with topic and partition
39
+ # info
40
+ # @yieldparam [Routing::Topic] karafka routing topic object
41
+ # @yieldparam [Integer] partition number
42
+ # @yieldparam [Executor] given executor
43
+ def each
44
+ @buffer.each do |ktopic, partitions|
45
+ partitions.each do |partition, executor|
46
+ yield(ktopic, partition, executor)
47
+ end
48
+ end
41
49
  end
42
50
 
43
51
  # Clears the executors buffer. Useful for critical errors recovery.
@@ -15,6 +15,13 @@ module Karafka
15
15
 
16
16
  attr_reader :executor
17
17
 
18
+ # Creates a new job instance
19
+ def initialize
20
+ # All jobs are blocking by default and they can release the lock when blocking operations
21
+ # are done (if needed)
22
+ @non_blocking = false
23
+ end
24
+
18
25
  # When redefined can run any code that should run before executing the proper code
19
26
  def prepare; end
20
27
 
@@ -22,11 +29,16 @@ module Karafka
22
29
  def teardown; end
23
30
 
24
31
  # @return [Boolean] is this a non-blocking job
32
+ #
25
33
  # @note Blocking job is a job, that will cause the job queue to wait until it is finished
26
34
  # before removing the lock on new jobs being added
35
+ #
27
36
  # @note All the jobs are blocking by default
37
+ #
38
+ # @note Job **needs** to mark itself as non-blocking only **after** it is done with all
39
+ # the blocking things (pausing partition, etc).
28
40
  def non_blocking?
29
- false
41
+ @non_blocking
30
42
  end
31
43
  end
32
44
  end
@@ -6,10 +6,12 @@ module Karafka
6
6
  # The main job type. It runs the executor that triggers given topic partition messages
7
7
  # processing in an underlying consumer instance.
8
8
  class Consume < Base
9
+ # @return [Array<Rdkafka::Consumer::Message>] array with messages
10
+ attr_reader :messages
11
+
9
12
  # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
10
13
  # job
11
- # @param messages [Array<dkafka::Consumer::Message>] array with raw rdkafka messages with
12
- # which we are suppose to work
14
+ # @param messages [Karafka::Messages::Messages] karafka messages batch
13
15
  # @return [Consume]
14
16
  def initialize(executor, messages)
15
17
  @executor = executor
@@ -12,7 +12,7 @@ module Karafka
12
12
  class JobsQueue
13
13
  # @return [Karafka::Processing::JobsQueue]
14
14
  def initialize
15
- @queue = ::Queue.new
15
+ @queue = Queue.new
16
16
  # Those queues will act as a semaphores internally. Since we need an indicator for waiting
17
17
  # we could use Thread.pass but this is expensive. Instead we can just lock until any
18
18
  # of the workers finishes their work and we can re-check. This means that in the worse
@@ -100,8 +100,17 @@ module Karafka
100
100
  end
101
101
  end
102
102
 
103
- # Blocks when there are things in the queue in a given group and waits until all the jobs
104
- # from a given group are completed
103
+ # @param group_id [String]
104
+ #
105
+ # @return [Boolean] tell us if we have anything in the processing (or for processing) from
106
+ # a given group.
107
+ def empty?(group_id)
108
+ @in_processing[group_id].empty?
109
+ end
110
+
111
+ # Blocks when there are things in the queue in a given group and waits until all the blocking
112
+ # jobs from a given group are completed
113
+ #
105
114
  # @param group_id [String] id of the group in which jobs we're interested.
106
115
  # @note This method is blocking.
107
116
  def wait(group_id)
@@ -114,16 +123,10 @@ module Karafka
114
123
 
115
124
  # @param group_id [String] id of the group in which jobs we're interested.
116
125
  # @return [Boolean] should we keep waiting or not
126
+ # @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
127
+ # as they may exceed `max.poll.interval`
117
128
  def wait?(group_id)
118
- group = @in_processing[group_id]
119
-
120
- # If it is stopping, all the previous messages that are processed at the moment need to
121
- # finish. Otherwise we may risk closing the client and committing offsets afterwards
122
- return false if Karafka::App.stopping? && group.empty?
123
- return false if @queue.closed?
124
- return false if group.empty?
125
-
126
- !group.all?(&:non_blocking?)
129
+ !@in_processing[group_id].all?(&:non_blocking?)
127
130
  end
128
131
  end
129
132
  end
@@ -17,24 +17,22 @@ module Karafka
17
17
  # code. This can be used to unlock certain resources or do other things that are
18
18
  # not user code but need to run after user code base is executed.
19
19
  class Worker
20
- extend Forwardable
21
-
22
- def_delegators :@thread, :join, :terminate, :alive?
20
+ include Helpers::Async
23
21
 
24
22
  # @param jobs_queue [JobsQueue]
25
23
  # @return [Worker]
26
24
  def initialize(jobs_queue)
27
25
  @jobs_queue = jobs_queue
28
- @thread = Thread.new do
29
- # If anything goes wrong in this worker thread, it means something went really wrong and
30
- # we should terminate.
31
- Thread.current.abort_on_exception = true
32
- loop { break unless process }
33
- end
34
26
  end
35
27
 
36
28
  private
37
29
 
30
+ # Runs processing of jobs in a loop
31
+ # Stops when queue is closed.
32
+ def call
33
+ loop { break unless process }
34
+ end
35
+
38
36
  # Fetches a single job, processes it and marks as completed.
39
37
  #
40
38
  # @note We do not have error handling here, as no errors should propagate this far. If they
@@ -17,6 +17,11 @@ module Karafka
17
17
  def each(&block)
18
18
  @batch.each(&block)
19
19
  end
20
+
21
+ # @return [Integer] number of workers in the batch
22
+ def size
23
+ @batch.size
24
+ end
20
25
  end
21
26
  end
22
27
  end
@@ -17,7 +17,7 @@ module Karafka
17
17
  def initialize(name)
18
18
  @name = name
19
19
  @id = Karafka::App.config.consumer_mapper.call(name)
20
- @topics = []
20
+ @topics = Topics.new([])
21
21
  end
22
22
 
23
23
  # @return [Boolean] true if this consumer group should be active in our current process
@@ -10,7 +10,7 @@ module Karafka
10
10
  class SubscriptionGroup
11
11
  attr_reader :id, :topics
12
12
 
13
- # @param topics [Array<Topic>] all the topics that share the same key settings
13
+ # @param topics [Karafka::Routing::Topics] all the topics that share the same key settings
14
14
  # @return [SubscriptionGroup] built subscription group
15
15
  def initialize(topics)
16
16
  @id = SecureRandom.uuid
@@ -23,8 +23,8 @@ module Karafka
23
23
 
24
24
  private_constant :DISTRIBUTION_KEYS
25
25
 
26
- # @param topics [Array<Topic>] array with topics based on which we want to build subscription
27
- # groups
26
+ # @param topics [Karafka::Routing::Topics] all the topics based on which we want to build
27
+ # subscription groups
28
28
  # @return [Array<SubscriptionGroup>] all subscription groups we need in separate threads
29
29
  def call(topics)
30
30
  topics
@@ -32,6 +32,7 @@ module Karafka
32
32
  .group_by(&:first)
33
33
  .values
34
34
  .map { |value| value.map(&:last) }
35
+ .map { |topics_array| Routing::Topics.new(topics_array) }
35
36
  .map { |grouped_topics| SubscriptionGroup.new(grouped_topics) }
36
37
  end
37
38
 
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ # frozen_string_literal: true
4
+
5
+ module Karafka
6
+ module Routing
7
+ # Abstraction layer on top of groups of topics
8
+ class Topics
9
+ include Enumerable
10
+ extend Forwardable
11
+
12
+ def_delegators :@accumulator, :[], :size, :empty?, :last, :<<
13
+
14
+ # @param topics_array [Array<Karafka::Routing::Topic>] array with topics
15
+ def initialize(topics_array)
16
+ @accumulator = topics_array.dup
17
+ end
18
+
19
+ # Yields each topic
20
+ #
21
+ # @param [Proc] block we want to yield with on each topic
22
+ def each(&block)
23
+ @accumulator.each(&block)
24
+ end
25
+
26
+ # Finds topic by its name
27
+ #
28
+ # @param topic_name [String] topic name
29
+ # @return [Karafka::Routing::Topic]
30
+ # @raise [Karafka::Errors::TopicNotFoundError] this should never happen. If you see it,
31
+ # please create an issue.
32
+ def find(topic_name)
33
+ @accumulator.find { |topic| topic.name == topic_name } ||
34
+ raise(Karafka::Errors::TopicNotFoundError, topic_name)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -3,32 +3,37 @@
3
3
  module Karafka
4
4
  # Class used to run the Karafka listeners in separate threads
5
5
  class Runner
6
- # Starts listening on all the listeners asynchronously
7
- # Fetch loop should never end. If they do, it is a critical error
6
+ # Starts listening on all the listeners asynchronously and handles the jobs queue closing
7
+ # after listeners are done with their work.
8
8
  def call
9
9
  # Despite possibility of having several independent listeners, we aim to have one queue for
10
10
  # jobs across and one workers poll for that
11
11
  jobs_queue = Processing::JobsQueue.new
12
12
 
13
13
  workers = Processing::WorkersBatch.new(jobs_queue)
14
- Karafka::Server.workers = workers
14
+ listeners = Connection::ListenersBatch.new(jobs_queue)
15
15
 
16
- threads = listeners(jobs_queue).map do |listener|
17
- # We abort on exception because there should be an exception handling developed for
18
- # each listener running in separate threads, so the exceptions should never leak
19
- # and if that happens, it means that something really bad happened and we should stop
20
- # the whole process
21
- Thread
22
- .new { listener.call }
23
- .tap { |thread| thread.abort_on_exception = true }
24
- end
16
+ workers.each(&:async_call)
17
+ listeners.each(&:async_call)
25
18
 
26
19
  # We aggregate threads here for a supervised shutdown process
27
- Karafka::Server.consumer_threads = threads
20
+ Karafka::Server.workers = workers
21
+ Karafka::Server.listeners = listeners
28
22
 
29
23
  # All the listener threads need to finish
30
- threads.each(&:join)
24
+ listeners.each(&:join)
25
+
26
+ # We close the jobs queue only when no listener threads are working.
27
+ # This ensures, that everything was closed prior to us not accepting anymore jobs and that
28
+ # no more jobs will be enqueued. Since each listener waits for jobs to finish, once those
29
+ # are done, we can close.
30
+ jobs_queue.close
31
+
31
32
  # All the workers need to stop processing anything before we can stop the runner completely
33
+ # This ensures that even async long-running jobs have time to finish before we are done
34
+ # with everything. One thing worth keeping in mind though: It is the end user responsibility
35
+ # to handle the shutdown detection in their long-running processes. Otherwise if timeout
36
+ # is exceeded, there will be a forced shutdown.
32
37
  workers.each(&:join)
33
38
  # If anything crashes here, we need to raise the error and crush the runner because it means
34
39
  # that something terrible happened
@@ -42,18 +47,5 @@ module Karafka
42
47
  Karafka::App.stop!
43
48
  raise e
44
49
  end
45
-
46
- private
47
-
48
- # @param jobs_queue [Processing::JobsQueue] the main processing queue
49
- # @return [Array<Karafka::Connection::Listener>] listeners that will consume messages for each
50
- # of the subscription groups
51
- def listeners(jobs_queue)
52
- App
53
- .subscription_groups
54
- .map do |subscription_group|
55
- Karafka::Connection::Listener.new(subscription_group, jobs_queue)
56
- end
57
- end
58
50
  end
59
51
  end
@@ -3,19 +3,18 @@
3
3
  module Karafka
4
4
  # FIFO scheduler for messages coming from various topics and partitions
5
5
  class Scheduler
6
- # Yields messages from partitions in the fifo order
6
+ # Schedules jobs in the fifo order
7
7
  #
8
- # @param messages_buffer [Karafka::Connection::MessagesBuffer] messages buffer with data from
9
- # multiple topics and partitions
10
- # @yieldparam [String] topic name
11
- # @yieldparam [Integer] partition number
12
- # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
13
- def call(messages_buffer)
14
- messages_buffer.each do |topic, partitions|
15
- partitions.each do |partition, messages|
16
- yield(topic, partition, messages)
17
- end
8
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
9
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
10
+ def schedule_consumption(queue, jobs_array)
11
+ jobs_array.each do |job|
12
+ queue << job
18
13
  end
19
14
  end
15
+
16
+ # Both revocation and shutdown jobs can also run in fifo by default
17
+ alias schedule_revocation schedule_consumption
18
+ alias schedule_shutdown schedule_consumption
20
19
  end
21
20
  end