karafka 2.0.0.beta1 → 2.0.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +13 -0
  4. data/Gemfile.lock +1 -1
  5. data/config/errors.yml +1 -0
  6. data/lib/active_job/karafka.rb +2 -2
  7. data/lib/karafka/active_job/routing/extensions.rb +21 -0
  8. data/lib/karafka/base_consumer.rb +1 -1
  9. data/lib/karafka/connection/client.rb +1 -1
  10. data/lib/karafka/connection/listener.rb +88 -27
  11. data/lib/karafka/connection/listeners_batch.rb +24 -0
  12. data/lib/karafka/connection/messages_buffer.rb +50 -54
  13. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  14. data/lib/karafka/contracts/config.rb +7 -0
  15. data/lib/karafka/helpers/async.rb +33 -0
  16. data/lib/karafka/messages/batch_metadata.rb +26 -3
  17. data/lib/karafka/messages/builders/batch_metadata.rb +17 -29
  18. data/lib/karafka/messages/builders/message.rb +1 -0
  19. data/lib/karafka/messages/builders/messages.rb +4 -12
  20. data/lib/karafka/pro/active_job/consumer.rb +21 -0
  21. data/lib/karafka/pro/active_job/dispatcher.rb +1 -1
  22. data/lib/karafka/pro/loader.rb +5 -1
  23. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
  24. data/lib/karafka/pro/scheduler.rb +54 -0
  25. data/lib/karafka/processing/executor.rb +5 -2
  26. data/lib/karafka/processing/executors_buffer.rb +15 -7
  27. data/lib/karafka/processing/jobs/base.rb +13 -1
  28. data/lib/karafka/processing/jobs/consume.rb +4 -2
  29. data/lib/karafka/processing/jobs_queue.rb +15 -12
  30. data/lib/karafka/processing/worker.rb +7 -9
  31. data/lib/karafka/processing/workers_batch.rb +5 -0
  32. data/lib/karafka/routing/consumer_group.rb +1 -1
  33. data/lib/karafka/routing/subscription_group.rb +1 -1
  34. data/lib/karafka/routing/subscription_groups_builder.rb +3 -2
  35. data/lib/karafka/routing/topics.rb +38 -0
  36. data/lib/karafka/runner.rb +19 -27
  37. data/lib/karafka/scheduler.rb +10 -11
  38. data/lib/karafka/server.rb +24 -23
  39. data/lib/karafka/setup/config.rb +1 -0
  40. data/lib/karafka/version.rb +1 -1
  41. data.tar.gz.sig +1 -3
  42. metadata +10 -3
  43. metadata.gz.sig +0 -0
  44. data/lib/karafka/active_job/routing_extensions.rb +0 -18
@@ -8,42 +8,30 @@ module Karafka
8
8
  class << self
9
9
  # Creates metadata based on the kafka batch data.
10
10
  #
11
- # @param kafka_batch [Array<Rdkafka::Consumer::Message>] raw fetched messages
11
+ # @param messages [Array<Karafka::Messages::Message>] messages array
12
12
  # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
13
13
  # @param scheduled_at [Time] moment when the batch was scheduled for processing
14
14
  # @return [Karafka::Messages::BatchMetadata] batch metadata object
15
- # @note Regarding the time lags: we can use the current time here, as batch metadata is
16
- # created in the worker. So whenever this is being built, it means that the processing
17
- # of this batch has already started.
18
- def call(kafka_batch, topic, scheduled_at)
19
- now = Time.now
20
-
15
+ #
16
+ # @note We do not set `processed_at` as this needs to be assigned when the batch is
17
+ # picked up for processing.
18
+ def call(messages, topic, scheduled_at)
21
19
  Karafka::Messages::BatchMetadata.new(
22
- size: kafka_batch.count,
23
- first_offset: kafka_batch.first.offset,
24
- last_offset: kafka_batch.last.offset,
20
+ size: messages.count,
21
+ first_offset: messages.first.offset,
22
+ last_offset: messages.last.offset,
25
23
  deserializer: topic.deserializer,
26
- partition: kafka_batch[0].partition,
24
+ partition: messages.first.partition,
27
25
  topic: topic.name,
26
+ # We go with the assumption that the creation of the whole batch is the last message
27
+ # creation time
28
+ created_at: messages.last.timestamp,
29
+ # When this batch was built and scheduled for execution
28
30
  scheduled_at: scheduled_at,
29
- # This lag describes how long did it take for a message to be consumed from the
30
- # moment it was created
31
- consumption_lag: time_distance_in_ms(now, kafka_batch.last.timestamp),
32
- # This lag describes how long did a batch have to wait before it was picked up by
33
- # one of the workers
34
- processing_lag: time_distance_in_ms(now, scheduled_at)
35
- ).freeze
36
- end
37
-
38
- private
39
-
40
- # Computes time distance in between two times in ms
41
- #
42
- # @param time1 [Time]
43
- # @param time2 [Time]
44
- # @return [Integer] distance in between two times in ms
45
- def time_distance_in_ms(time1, time2)
46
- ((time1 - time2) * 1_000).round
31
+ # We build the batch metadata when we pick up the job in the worker, thus we can use
32
+ # current time here
33
+ processed_at: Time.now
34
+ )
47
35
  end
48
36
  end
49
37
  end
@@ -26,6 +26,7 @@ module Karafka
26
26
  received_at: received_at
27
27
  ).freeze
28
28
 
29
+ # Karafka messages cannot be frozen because of the lazy deserialization feature
29
30
  Karafka::Messages::Message.new(
30
31
  kafka_message.payload,
31
32
  metadata
@@ -9,27 +9,19 @@ module Karafka
9
9
  # Creates messages batch with messages inside based on the incoming messages and the
10
10
  # topic from which it comes.
11
11
  #
12
- # @param kafka_messages [Array<Rdkafka::Consumer::Message>] raw fetched messages
12
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages array
13
13
  # @param topic [Karafka::Routing::Topic] topic for which we're received messages
14
14
  # @param received_at [Time] moment in time when the messages were received
15
15
  # @return [Karafka::Messages::Messages] messages batch object
16
- def call(kafka_messages, topic, received_at)
17
- messages_array = kafka_messages.map do |message|
18
- Karafka::Messages::Builders::Message.call(
19
- message,
20
- topic,
21
- received_at
22
- )
23
- end
24
-
16
+ def call(messages, topic, received_at)
25
17
  metadata = BatchMetadata.call(
26
- kafka_messages,
18
+ messages,
27
19
  topic,
28
20
  received_at
29
21
  ).freeze
30
22
 
31
23
  Karafka::Messages::Messages.new(
32
- messages_array,
24
+ messages,
33
25
  metadata
34
26
  ).freeze
35
27
  end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ module ActiveJob
6
+ # This Karafka component is a Pro component.
7
+ # All of the commercial components are present in the lib/karafka/pro directory of this
8
+ # repository and their usage requires commercial license agreement.
9
+ #
10
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
11
+ #
12
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
13
+ # of your code to Maciej Mensfeld.
14
+
15
+ # Pro ActiveJob consumer that is suppose to handle long-running jobs as well as short
16
+ # running jobs
17
+ class Consumer < Karafka::ActiveJob::Consumer
18
+ end
19
+ end
20
+ end
21
+ end
@@ -18,7 +18,7 @@ module Karafka
18
18
  # much better and more granular control over the dispatch and consumption process.
19
19
  class Dispatcher < ::Karafka::ActiveJob::Dispatcher
20
20
  # Defaults for dispatching
21
- # The can be updated by using `#karafka_options` on the job
21
+ # They can be updated by using `#karafka_options` on the job
22
22
  DEFAULTS = {
23
23
  dispatch_method: :produce_async,
24
24
  # We don't create a dummy proc based partitioner as we would have to evaluate it with
@@ -19,13 +19,17 @@ module Karafka
19
19
  # components
20
20
  def setup(config)
21
21
  require_relative 'performance_tracker'
22
+ require_relative 'scheduler'
23
+ require_relative 'processing/jobs/consume_non_blocking'
24
+ require_relative 'active_job/consumer'
22
25
  require_relative 'active_job/dispatcher'
23
26
  require_relative 'active_job/job_options_contract'
24
27
 
28
+ config.internal.scheduler = Scheduler.new
29
+ config.internal.active_job.consumer = ActiveJob::Consumer
25
30
  config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
26
31
  config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
27
32
 
28
- # Monitor time needed to process each message from a single partition
29
33
  config.monitor.subscribe(PerformanceTracker.instance)
30
34
  end
31
35
  end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ # Pro components related to processing part of Karafka
6
+ module Processing
7
+ # Pro jobs
8
+ module Jobs
9
+ # This Karafka component is a Pro component.
10
+ # All of the commercial components are present in the lib/karafka/pro directory of this
11
+ # repository and their usage requires commercial license agreement.
12
+ #
13
+ # Karafka has also commercial-friendly license, commercial support and commercial
14
+ # components.
15
+ #
16
+ # By sending a pull request to the pro components, you are agreeing to transfer the
17
+ # copyright of your code to Maciej Mensfeld.
18
+
19
+ # The main job type in a non-blocking variant.
20
+ # This variant works "like" the regular consumption but pauses the partition for as long
21
+ # as it is needed until a job is done.
22
+ #
23
+ # It can be useful when having long lasting jobs that would exceed `max.poll.interval`
24
+ # if would block.
25
+ #
26
+ # @note It needs to be working with a proper consumer that will handle the partition
27
+ # management. This layer of the framework knows nothing about Kafka messages consumption.
28
+ class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
29
+ # Releases the blocking lock after it is done with the preparation phase for this job
30
+ def prepare
31
+ super
32
+ @non_blocking = true
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ # This Karafka component is a Pro component.
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright
12
+ # of your code to Maciej Mensfeld.
13
+
14
+ # Optimizes scheduler that takes into consideration of execution time needed to process
15
+ # messages from given topics partitions. It uses the non-preemptive LJF algorithm
16
+ #
17
+ # This scheduler is designed to optimize execution times on jobs that perform IO operations as
18
+ # when taking IO into consideration, the can achieve optimized parallel processing.
19
+ #
20
+ # This scheduler can also work with virtual partitions.
21
+ #
22
+ # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
23
+ # default FIFO scheduler from the default Karafka scheduler
24
+ class Scheduler < ::Karafka::Scheduler
25
+ # Schedules jobs in the LJF order for consumption
26
+ #
27
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
28
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
29
+ #
30
+ def schedule_consumption(queue, jobs_array)
31
+ pt = PerformanceTracker.instance
32
+
33
+ ordered = []
34
+
35
+ jobs_array.each do |job|
36
+ messages = job.messages
37
+ message = messages.first
38
+
39
+ cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
40
+
41
+ ordered << [job, cost]
42
+ end
43
+
44
+ ordered.sort_by!(&:last)
45
+ ordered.reverse!
46
+ ordered.map!(&:first)
47
+
48
+ ordered.each do |job|
49
+ queue << job
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -18,6 +18,8 @@ module Karafka
18
18
  # @return [String] subscription group id to which a given executor belongs
19
19
  attr_reader :group_id
20
20
 
21
+ attr_reader :messages
22
+
21
23
  # @param group_id [String] id of the subscription group to which the executor belongs
22
24
  # @param client [Karafka::Connection::Client] kafka client
23
25
  # @param topic [Karafka::Routing::Topic] topic for which this executor will run
@@ -30,9 +32,10 @@ module Karafka
30
32
  @pause_tracker = pause_tracker
31
33
  end
32
34
 
33
- # Builds the consumer instance and sets all that is needed to run the user consumption logic
35
+ # Builds the consumer instance, builds messages batch and sets all that is needed to run the
36
+ # user consumption logic
34
37
  #
35
- # @param messages [Array<Rdkafka::Consumer::Message>] raw rdkafka messages
38
+ # @param messages [Array<Karafka::Messages::Message>]
36
39
  # @param received_at [Time] the moment we've received the batch (actually the moment we've)
37
40
  # enqueued it, but good enough
38
41
  def prepare(messages, received_at)
@@ -23,21 +23,29 @@ module Karafka
23
23
  partition,
24
24
  pause
25
25
  )
26
- topic = @subscription_group.topics.find { |ktopic| ktopic.name == topic }
26
+ ktopic = @subscription_group.topics.find(topic)
27
27
 
28
- topic || raise(Errors::TopicNotFoundError, topic)
28
+ ktopic || raise(Errors::TopicNotFoundError, topic)
29
29
 
30
- @buffer[topic][partition] ||= Executor.new(
30
+ @buffer[ktopic][partition] ||= Executor.new(
31
31
  @subscription_group.id,
32
32
  @client,
33
- topic,
33
+ ktopic,
34
34
  pause
35
35
  )
36
36
  end
37
37
 
38
- # Runs the shutdown on all active executors.
39
- def shutdown
40
- @buffer.values.map(&:values).flatten.each(&:shutdown)
38
+ # Iterates over all available executors and yields them together with topic and partition
39
+ # info
40
+ # @yieldparam [Routing::Topic] karafka routing topic object
41
+ # @yieldparam [Integer] partition number
42
+ # @yieldparam [Executor] given executor
43
+ def each
44
+ @buffer.each do |ktopic, partitions|
45
+ partitions.each do |partition, executor|
46
+ yield(ktopic, partition, executor)
47
+ end
48
+ end
41
49
  end
42
50
 
43
51
  # Clears the executors buffer. Useful for critical errors recovery.
@@ -15,6 +15,13 @@ module Karafka
15
15
 
16
16
  attr_reader :executor
17
17
 
18
+ # Creates a new job instance
19
+ def initialize
20
+ # All jobs are blocking by default and they can release the lock when blocking operations
21
+ # are done (if needed)
22
+ @non_blocking = false
23
+ end
24
+
18
25
  # When redefined can run any code that should run before executing the proper code
19
26
  def prepare; end
20
27
 
@@ -22,11 +29,16 @@ module Karafka
22
29
  def teardown; end
23
30
 
24
31
  # @return [Boolean] is this a non-blocking job
32
+ #
25
33
  # @note Blocking job is a job, that will cause the job queue to wait until it is finished
26
34
  # before removing the lock on new jobs being added
35
+ #
27
36
  # @note All the jobs are blocking by default
37
+ #
38
+ # @note Job **needs** to mark itself as non-blocking only **after** it is done with all
39
+ # the blocking things (pausing partition, etc).
28
40
  def non_blocking?
29
- false
41
+ @non_blocking
30
42
  end
31
43
  end
32
44
  end
@@ -6,10 +6,12 @@ module Karafka
6
6
  # The main job type. It runs the executor that triggers given topic partition messages
7
7
  # processing in an underlying consumer instance.
8
8
  class Consume < Base
9
+ # @return [Array<Rdkafka::Consumer::Message>] array with messages
10
+ attr_reader :messages
11
+
9
12
  # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
10
13
  # job
11
- # @param messages [Array<dkafka::Consumer::Message>] array with raw rdkafka messages with
12
- # which we are suppose to work
14
+ # @param messages [Karafka::Messages::Messages] karafka messages batch
13
15
  # @return [Consume]
14
16
  def initialize(executor, messages)
15
17
  @executor = executor
@@ -12,7 +12,7 @@ module Karafka
12
12
  class JobsQueue
13
13
  # @return [Karafka::Processing::JobsQueue]
14
14
  def initialize
15
- @queue = ::Queue.new
15
+ @queue = Queue.new
16
16
  # Those queues will act as a semaphores internally. Since we need an indicator for waiting
17
17
  # we could use Thread.pass but this is expensive. Instead we can just lock until any
18
18
  # of the workers finishes their work and we can re-check. This means that in the worse
@@ -100,8 +100,17 @@ module Karafka
100
100
  end
101
101
  end
102
102
 
103
- # Blocks when there are things in the queue in a given group and waits until all the jobs
104
- # from a given group are completed
103
+ # @param group_id [String]
104
+ #
105
+ # @return [Boolean] tell us if we have anything in the processing (or for processing) from
106
+ # a given group.
107
+ def empty?(group_id)
108
+ @in_processing[group_id].empty?
109
+ end
110
+
111
+ # Blocks when there are things in the queue in a given group and waits until all the blocking
112
+ # jobs from a given group are completed
113
+ #
105
114
  # @param group_id [String] id of the group in which jobs we're interested.
106
115
  # @note This method is blocking.
107
116
  def wait(group_id)
@@ -114,16 +123,10 @@ module Karafka
114
123
 
115
124
  # @param group_id [String] id of the group in which jobs we're interested.
116
125
  # @return [Boolean] should we keep waiting or not
126
+ # @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
127
+ # as they may exceed `max.poll.interval`
117
128
  def wait?(group_id)
118
- group = @in_processing[group_id]
119
-
120
- # If it is stopping, all the previous messages that are processed at the moment need to
121
- # finish. Otherwise we may risk closing the client and committing offsets afterwards
122
- return false if Karafka::App.stopping? && group.empty?
123
- return false if @queue.closed?
124
- return false if group.empty?
125
-
126
- !group.all?(&:non_blocking?)
129
+ !@in_processing[group_id].all?(&:non_blocking?)
127
130
  end
128
131
  end
129
132
  end
@@ -17,24 +17,22 @@ module Karafka
17
17
  # code. This can be used to unlock certain resources or do other things that are
18
18
  # not user code but need to run after user code base is executed.
19
19
  class Worker
20
- extend Forwardable
21
-
22
- def_delegators :@thread, :join, :terminate, :alive?
20
+ include Helpers::Async
23
21
 
24
22
  # @param jobs_queue [JobsQueue]
25
23
  # @return [Worker]
26
24
  def initialize(jobs_queue)
27
25
  @jobs_queue = jobs_queue
28
- @thread = Thread.new do
29
- # If anything goes wrong in this worker thread, it means something went really wrong and
30
- # we should terminate.
31
- Thread.current.abort_on_exception = true
32
- loop { break unless process }
33
- end
34
26
  end
35
27
 
36
28
  private
37
29
 
30
+ # Runs processing of jobs in a loop
31
+ # Stops when queue is closed.
32
+ def call
33
+ loop { break unless process }
34
+ end
35
+
38
36
  # Fetches a single job, processes it and marks as completed.
39
37
  #
40
38
  # @note We do not have error handling here, as no errors should propagate this far. If they
@@ -17,6 +17,11 @@ module Karafka
17
17
  def each(&block)
18
18
  @batch.each(&block)
19
19
  end
20
+
21
+ # @return [Integer] number of workers in the batch
22
+ def size
23
+ @batch.size
24
+ end
20
25
  end
21
26
  end
22
27
  end
@@ -17,7 +17,7 @@ module Karafka
17
17
  def initialize(name)
18
18
  @name = name
19
19
  @id = Karafka::App.config.consumer_mapper.call(name)
20
- @topics = []
20
+ @topics = Topics.new([])
21
21
  end
22
22
 
23
23
  # @return [Boolean] true if this consumer group should be active in our current process
@@ -10,7 +10,7 @@ module Karafka
10
10
  class SubscriptionGroup
11
11
  attr_reader :id, :topics
12
12
 
13
- # @param topics [Array<Topic>] all the topics that share the same key settings
13
+ # @param topics [Karafka::Routing::Topics] all the topics that share the same key settings
14
14
  # @return [SubscriptionGroup] built subscription group
15
15
  def initialize(topics)
16
16
  @id = SecureRandom.uuid
@@ -23,8 +23,8 @@ module Karafka
23
23
 
24
24
  private_constant :DISTRIBUTION_KEYS
25
25
 
26
- # @param topics [Array<Topic>] array with topics based on which we want to build subscription
27
- # groups
26
+ # @param topics [Karafka::Routing::Topics] all the topics based on which we want to build
27
+ # subscription groups
28
28
  # @return [Array<SubscriptionGroup>] all subscription groups we need in separate threads
29
29
  def call(topics)
30
30
  topics
@@ -32,6 +32,7 @@ module Karafka
32
32
  .group_by(&:first)
33
33
  .values
34
34
  .map { |value| value.map(&:last) }
35
+ .map { |topics_array| Routing::Topics.new(topics_array) }
35
36
  .map { |grouped_topics| SubscriptionGroup.new(grouped_topics) }
36
37
  end
37
38
 
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ # frozen_string_literal: true
4
+
5
+ module Karafka
6
+ module Routing
7
+ # Abstraction layer on top of groups of topics
8
+ class Topics
9
+ include Enumerable
10
+ extend Forwardable
11
+
12
+ def_delegators :@accumulator, :[], :size, :empty?, :last, :<<
13
+
14
+ # @param topics_array [Array<Karafka::Routing::Topic>] array with topics
15
+ def initialize(topics_array)
16
+ @accumulator = topics_array.dup
17
+ end
18
+
19
+ # Yields each topic
20
+ #
21
+ # @param [Proc] block we want to yield with on each topic
22
+ def each(&block)
23
+ @accumulator.each(&block)
24
+ end
25
+
26
+ # Finds topic by its name
27
+ #
28
+ # @param topic_name [String] topic name
29
+ # @return [Karafka::Routing::Topic]
30
+ # @raise [Karafka::Errors::TopicNotFoundError] this should never happen. If you see it,
31
+ # please create an issue.
32
+ def find(topic_name)
33
+ @accumulator.find { |topic| topic.name == topic_name } ||
34
+ raise(Karafka::Errors::TopicNotFoundError, topic_name)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -3,32 +3,37 @@
3
3
  module Karafka
4
4
  # Class used to run the Karafka listeners in separate threads
5
5
  class Runner
6
- # Starts listening on all the listeners asynchronously
7
- # Fetch loop should never end. If they do, it is a critical error
6
+ # Starts listening on all the listeners asynchronously and handles the jobs queue closing
7
+ # after listeners are done with their work.
8
8
  def call
9
9
  # Despite possibility of having several independent listeners, we aim to have one queue for
10
10
  # jobs across and one workers poll for that
11
11
  jobs_queue = Processing::JobsQueue.new
12
12
 
13
13
  workers = Processing::WorkersBatch.new(jobs_queue)
14
- Karafka::Server.workers = workers
14
+ listeners = Connection::ListenersBatch.new(jobs_queue)
15
15
 
16
- threads = listeners(jobs_queue).map do |listener|
17
- # We abort on exception because there should be an exception handling developed for
18
- # each listener running in separate threads, so the exceptions should never leak
19
- # and if that happens, it means that something really bad happened and we should stop
20
- # the whole process
21
- Thread
22
- .new { listener.call }
23
- .tap { |thread| thread.abort_on_exception = true }
24
- end
16
+ workers.each(&:async_call)
17
+ listeners.each(&:async_call)
25
18
 
26
19
  # We aggregate threads here for a supervised shutdown process
27
- Karafka::Server.consumer_threads = threads
20
+ Karafka::Server.workers = workers
21
+ Karafka::Server.listeners = listeners
28
22
 
29
23
  # All the listener threads need to finish
30
- threads.each(&:join)
24
+ listeners.each(&:join)
25
+
26
+ # We close the jobs queue only when no listener threads are working.
27
+ # This ensures, that everything was closed prior to us not accepting anymore jobs and that
28
+ # no more jobs will be enqueued. Since each listener waits for jobs to finish, once those
29
+ # are done, we can close.
30
+ jobs_queue.close
31
+
31
32
  # All the workers need to stop processing anything before we can stop the runner completely
33
+ # This ensures that even async long-running jobs have time to finish before we are done
34
+ # with everything. One thing worth keeping in mind though: It is the end user responsibility
35
+ # to handle the shutdown detection in their long-running processes. Otherwise if timeout
36
+ # is exceeded, there will be a forced shutdown.
32
37
  workers.each(&:join)
33
38
  # If anything crashes here, we need to raise the error and crush the runner because it means
34
39
  # that something terrible happened
@@ -42,18 +47,5 @@ module Karafka
42
47
  Karafka::App.stop!
43
48
  raise e
44
49
  end
45
-
46
- private
47
-
48
- # @param jobs_queue [Processing::JobsQueue] the main processing queue
49
- # @return [Array<Karafka::Connection::Listener>] listeners that will consume messages for each
50
- # of the subscription groups
51
- def listeners(jobs_queue)
52
- App
53
- .subscription_groups
54
- .map do |subscription_group|
55
- Karafka::Connection::Listener.new(subscription_group, jobs_queue)
56
- end
57
- end
58
50
  end
59
51
  end
@@ -3,19 +3,18 @@
3
3
  module Karafka
4
4
  # FIFO scheduler for messages coming from various topics and partitions
5
5
  class Scheduler
6
- # Yields messages from partitions in the fifo order
6
+ # Schedules jobs in the fifo order
7
7
  #
8
- # @param messages_buffer [Karafka::Connection::MessagesBuffer] messages buffer with data from
9
- # multiple topics and partitions
10
- # @yieldparam [String] topic name
11
- # @yieldparam [Integer] partition number
12
- # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
13
- def call(messages_buffer)
14
- messages_buffer.each do |topic, partitions|
15
- partitions.each do |partition, messages|
16
- yield(topic, partition, messages)
17
- end
8
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
9
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
10
+ def schedule_consumption(queue, jobs_array)
11
+ jobs_array.each do |job|
12
+ queue << job
18
13
  end
19
14
  end
15
+
16
+ # Both revocation and shutdown jobs can also run in fifo by default
17
+ alias schedule_revocation schedule_consumption
18
+ alias schedule_shutdown schedule_consumption
20
19
  end
21
20
  end