karafka 2.0.0.beta4 → 2.0.0.rc2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +18 -1
  4. data/CHANGELOG.md +30 -0
  5. data/CONTRIBUTING.md +0 -5
  6. data/Gemfile.lock +12 -42
  7. data/README.md +2 -12
  8. data/bin/benchmarks +2 -2
  9. data/bin/integrations +10 -3
  10. data/bin/{stress → stress_many} +1 -1
  11. data/bin/stress_one +13 -0
  12. data/config/errors.yml +48 -5
  13. data/docker-compose.yml +27 -18
  14. data/karafka.gemspec +2 -4
  15. data/lib/karafka/active_job/job_options_contract.rb +8 -2
  16. data/lib/karafka/active_job/routing/extensions.rb +1 -1
  17. data/lib/karafka/app.rb +2 -1
  18. data/lib/karafka/base_consumer.rb +24 -19
  19. data/lib/karafka/cli/install.rb +15 -2
  20. data/lib/karafka/cli/server.rb +4 -2
  21. data/lib/karafka/connection/client.rb +40 -17
  22. data/lib/karafka/connection/listener.rb +37 -11
  23. data/lib/karafka/connection/rebalance_manager.rb +20 -19
  24. data/lib/karafka/contracts/base.rb +2 -8
  25. data/lib/karafka/contracts/config.rb +71 -38
  26. data/lib/karafka/contracts/consumer_group.rb +25 -18
  27. data/lib/karafka/contracts/consumer_group_topic.rb +30 -16
  28. data/lib/karafka/contracts/server_cli_options.rb +18 -7
  29. data/lib/karafka/errors.rb +3 -0
  30. data/lib/karafka/helpers/colorize.rb +20 -0
  31. data/lib/karafka/pro/active_job/consumer.rb +1 -8
  32. data/lib/karafka/pro/active_job/job_options_contract.rb +10 -6
  33. data/lib/karafka/pro/base_consumer.rb +27 -21
  34. data/lib/karafka/pro/loader.rb +13 -6
  35. data/lib/karafka/pro/processing/coordinator.rb +63 -0
  36. data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
  37. data/lib/karafka/pro/processing/partitioner.rb +41 -0
  38. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  39. data/lib/karafka/pro/routing/extensions.rb +6 -0
  40. data/lib/karafka/processing/coordinator.rb +88 -0
  41. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  42. data/lib/karafka/processing/executor.rb +7 -17
  43. data/lib/karafka/processing/executors_buffer.rb +46 -15
  44. data/lib/karafka/processing/jobs/consume.rb +4 -2
  45. data/lib/karafka/processing/jobs_builder.rb +3 -2
  46. data/lib/karafka/processing/partitioner.rb +22 -0
  47. data/lib/karafka/processing/result.rb +0 -5
  48. data/lib/karafka/processing/scheduler.rb +22 -0
  49. data/lib/karafka/routing/consumer_group.rb +1 -1
  50. data/lib/karafka/routing/topic.rb +9 -0
  51. data/lib/karafka/setup/config.rb +26 -12
  52. data/lib/karafka/templates/example_consumer.rb.erb +2 -2
  53. data/lib/karafka/version.rb +1 -1
  54. data/lib/karafka.rb +0 -2
  55. data.tar.gz.sig +0 -0
  56. metadata +15 -36
  57. metadata.gz.sig +0 -0
  58. data/lib/karafka/pro/scheduler.rb +0 -54
  59. data/lib/karafka/scheduler.rb +0 -20
@@ -26,7 +26,7 @@ module Karafka
26
26
  messages.each do |message|
27
27
  # If for any reason we've lost this partition, not worth iterating over new messages
28
28
  # as they are no longer ours
29
- return if revoked?
29
+ break if revoked?
30
30
  break if Karafka::App.stopping?
31
31
 
32
32
  ::ActiveJob::Base.execute(
@@ -34,13 +34,6 @@ module Karafka
34
34
  )
35
35
 
36
36
  mark_as_consumed(message)
37
-
38
- # We check it twice as the job may be long running
39
- # If marking fails, it also means it got revoked and we can stop consuming
40
- return if revoked?
41
-
42
- # Do not process more if we are shutting down
43
- break if Karafka::App.stopping?
44
37
  end
45
38
  end
46
39
  end
@@ -14,13 +14,17 @@ module Karafka
14
14
  module ActiveJob
15
15
  # Contract for validating the options that can be altered with `#karafka_options` per job
16
16
  # class that works with Pro features.
17
- class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract
18
- # Dry types
19
- Types = include Dry.Types()
20
-
21
- params do
22
- optional(:partitioner).value(Types.Interface(:call))
17
+ class JobOptionsContract < Contracts::Base
18
+ configure do |config|
19
+ config.error_messages = YAML.safe_load(
20
+ File.read(
21
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
22
+ )
23
+ ).fetch('en').fetch('validations').fetch('job_options')
23
24
  end
25
+
26
+ optional(:dispatch_method) { |val| %i[produce_async produce_sync].include?(val) }
27
+ optional(:partitioner) { |val| val.respond_to?(:call) }
24
28
  end
25
29
  end
26
30
  end
@@ -26,26 +26,38 @@ module Karafka
26
26
  # Pauses processing of a given partition until we're done with the processing
27
27
  # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
28
  def on_before_consume
29
- # Pause at the first message in a batch. That way in case of a crash, we will not loose
30
- # any messages
31
29
  return unless topic.long_running_job?
32
30
 
33
- pause(messages.first.offset, MAX_PAUSE_TIME)
31
+ # This ensures, that when running LRJ with VP, things operate as expected
32
+ coordinator.on_started do |first_group_message|
33
+ # Pause at the first message in a batch. That way in case of a crash, we will not loose
34
+ # any messages
35
+ pause(first_group_message.offset, MAX_PAUSE_TIME)
36
+ end
34
37
  end
35
38
 
36
39
  # Runs extra logic after consumption that is related to handling long running jobs
37
40
  # @note This overwrites the '#on_after_consume' from the base consumer
38
41
  def on_after_consume
39
- # Nothing to do if we lost the partition
40
- return if revoked?
42
+ coordinator.on_finished do |first_group_message, last_group_message|
43
+ on_after_consume_regular(first_group_message, last_group_message)
44
+ end
45
+ end
46
+
47
+ private
41
48
 
42
- if @consumption.success?
43
- pause_tracker.reset
49
+ # Handles the post-consumption flow depending on topic settings
50
+ #
51
+ # @param first_message [Karafka::Messages::Message]
52
+ # @param last_message [Karafka::Messages::Message]
53
+ def on_after_consume_regular(first_message, last_message)
54
+ if coordinator.success?
55
+ coordinator.pause_tracker.reset
44
56
 
45
57
  # We use the non-blocking one here. If someone needs the blocking one, can implement it
46
58
  # with manual offset management
47
59
  # Mark as consumed only if manual offset management is not on
48
- mark_as_consumed(messages.last) unless topic.manual_offset_management?
60
+ mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
49
61
 
50
62
  # If this is not a long running job there is nothing for us to do here
51
63
  return unless topic.long_running_job?
@@ -53,24 +65,18 @@ module Karafka
53
65
  # Once processing is done, we move to the new offset based on commits
54
66
  # Here, in case manual offset management is off, we have the new proper offset of a
55
67
  # first message from another batch from `@seek_offset`. If manual offset management
56
- # is on, we move to place where the user indicated it was finished.
57
- seek(@seek_offset || messages.first.offset)
68
+ # is on, we move to place where the user indicated it was finished. This can create an
69
+ # interesting (yet valid) corner case, where with manual offset management on and no
70
+ # marking as consumed, we end up with an infinite loop processing same messages over and
71
+ # over again
72
+ seek(@seek_offset || first_message.offset)
73
+
58
74
  resume
59
75
  else
60
76
  # If processing failed, we need to pause
61
- pause(@seek_offset || messages.first.offset)
77
+ pause(@seek_offset || first_message.offset)
62
78
  end
63
79
  end
64
-
65
- # Marks this consumer revoked state as true
66
- # This allows us for things like lrj to finish early as this state may change during lrj
67
- # execution
68
- def on_revoked
69
- # @note This may already be set to true if we tried to commit offsets and failed. In case
70
- # like this it will automatically be marked as revoked.
71
- @revoked = true
72
- super
73
- end
74
80
  end
75
81
  end
76
82
  end
@@ -17,9 +17,11 @@ module Karafka
17
17
  COMPONENTS = %w[
18
18
  base_consumer
19
19
  performance_tracker
20
- scheduler
20
+ processing/scheduler
21
21
  processing/jobs/consume_non_blocking
22
22
  processing/jobs_builder
23
+ processing/coordinator
24
+ processing/partitioner
23
25
  routing/extensions
24
26
  active_job/consumer
25
27
  active_job/dispatcher
@@ -35,11 +37,16 @@ module Karafka
35
37
  def setup(config)
36
38
  COMPONENTS.each { |component| require_relative(component) }
37
39
 
38
- config.internal.scheduler = Scheduler.new
39
- config.internal.jobs_builder = Processing::JobsBuilder.new
40
- config.internal.active_job.consumer = ActiveJob::Consumer
41
- config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
42
- config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
40
+ icfg = config.internal
41
+
42
+ icfg.processing.coordinator_class = Processing::Coordinator
43
+ icfg.processing.partitioner_class = Processing::Partitioner
44
+ icfg.processing.scheduler = Processing::Scheduler.new
45
+ icfg.processing.jobs_builder = Processing::JobsBuilder.new
46
+
47
+ icfg.active_job.consumer_class = ActiveJob::Consumer
48
+ icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
49
+ icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
43
50
 
44
51
  ::Karafka::Routing::Topic.include(Routing::Extensions)
45
52
 
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ module Processing
6
+ # Pro coordinator that provides extra orchestration methods useful for parallel processing
7
+ # within the same partition
8
+ class Coordinator < ::Karafka::Processing::Coordinator
9
+ # @param args [Object] anything the base coordinator accepts
10
+ def initialize(*args)
11
+ super
12
+ @on_started_invoked = false
13
+ @on_finished_invoked = false
14
+ @flow_lock = Mutex.new
15
+ end
16
+
17
+ # Starts the coordination process
18
+ # @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
19
+ # going to coordinate.
20
+ def start(messages)
21
+ super
22
+
23
+ @mutex.synchronize do
24
+ @on_started_invoked = false
25
+ @on_finished_invoked = false
26
+ @first_message = messages.first
27
+ @last_message = messages.last
28
+ end
29
+ end
30
+
31
+ # @return [Boolean] is the coordinated work finished or not
32
+ def finished?
33
+ @running_jobs.zero?
34
+ end
35
+
36
+ # Runs given code only once per all the coordinated jobs upon starting first of them
37
+ def on_started
38
+ @flow_lock.synchronize do
39
+ return if @on_started_invoked
40
+
41
+ @on_started_invoked = true
42
+
43
+ yield(@first_message, @last_message)
44
+ end
45
+ end
46
+
47
+ # Runs once when all the work that is suppose to be coordinated is finished
48
+ # It runs once per all the coordinated jobs and should be used to run any type of post
49
+ # jobs coordination processing execution
50
+ def on_finished
51
+ @flow_lock.synchronize do
52
+ return unless finished?
53
+ return if @on_finished_invoked
54
+
55
+ @on_finished_invoked = true
56
+
57
+ yield(@first_message, @last_message)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -16,11 +16,12 @@ module Karafka
16
16
  class JobsBuilder < ::Karafka::Processing::JobsBuilder
17
17
  # @param executor [Karafka::Processing::Executor]
18
18
  # @param messages [Karafka::Messages::Messages] messages batch to be consumed
19
+ # @param coordinator [Karafka::Processing::Coordinator]
19
20
  # @return [Karafka::Processing::Jobs::Consume] blocking job
20
21
  # @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
21
- def consume(executor, messages)
22
+ def consume(executor, messages, coordinator)
22
23
  if executor.topic.long_running_job?
23
- Jobs::ConsumeNonBlocking.new(executor, messages)
24
+ Jobs::ConsumeNonBlocking.new(executor, messages, coordinator)
24
25
  else
25
26
  super
26
27
  end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro partitioner that can distribute work based on the virtual partitioner settings
16
+ class Partitioner < ::Karafka::Processing::Partitioner
17
+ # @param topic [String] topic name
18
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages
19
+ # @yieldparam [Integer] group id
20
+ # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
21
+ def call(topic, messages)
22
+ ktopic = @subscription_group.topics.find(topic)
23
+
24
+ @concurrency ||= ::Karafka::App.config.concurrency
25
+
26
+ # We only partition work if we have a virtual partitioner and more than one thread to
27
+ # process the data. With one thread it is not worth partitioning the work as the work
28
+ # itself will be assigned to one thread (pointless work)
29
+ if ktopic.virtual_partitioner? && @concurrency > 1
30
+ messages
31
+ .group_by { |msg| ktopic.virtual_partitioner.call(msg).hash.abs % @concurrency }
32
+ .each { |group_id, messages_group| yield(group_id, messages_group) }
33
+ else
34
+ # When no virtual partitioner, works as regular one
35
+ yield(0, messages)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Optimizes scheduler that takes into consideration of execution time needed to process
16
+ # messages from given topics partitions. It uses the non-preemptive LJF algorithm
17
+ #
18
+ # This scheduler is designed to optimize execution times on jobs that perform IO operations
19
+ # as when taking IO into consideration, the can achieve optimized parallel processing.
20
+ #
21
+ # This scheduler can also work with virtual partitions.
22
+ #
23
+ # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
24
+ # default FIFO scheduler from the default Karafka scheduler
25
+ class Scheduler < ::Karafka::Processing::Scheduler
26
+ # Schedules jobs in the LJF order for consumption
27
+ #
28
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
29
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
30
+ #
31
+ def schedule_consumption(queue, jobs_array)
32
+ pt = PerformanceTracker.instance
33
+
34
+ ordered = []
35
+
36
+ jobs_array.each do |job|
37
+ messages = job.messages
38
+ message = messages.first
39
+
40
+ cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
41
+
42
+ ordered << [job, cost]
43
+ end
44
+
45
+ ordered.sort_by!(&:last)
46
+ ordered.reverse!
47
+ ordered.map!(&:first)
48
+
49
+ ordered.each do |job|
50
+ queue << job
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -19,9 +19,15 @@ module Karafka
19
19
  # @param base [Class] class we extend
20
20
  def included(base)
21
21
  base.attr_accessor :long_running_job
22
+ base.attr_accessor :virtual_partitioner
22
23
  end
23
24
  end
24
25
 
26
+ # @return [Boolean] true if virtual partitioner is defined, false otherwise
27
+ def virtual_partitioner?
28
+ virtual_partitioner != nil
29
+ end
30
+
25
31
  # @return [Boolean] is a given job on a topic a long running one
26
32
  def long_running_job?
27
33
  @long_running_job || false
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Basic coordinator that allows us to provide coordination objects into consumers.
6
+ #
7
+ # This is a wrapping layer to simplify management of work to be handled around consumption.
8
+ #
9
+ # @note This coordinator needs to be thread safe. Some operations are performed only in the
10
+ # listener thread, but we go with thread-safe by default for all not to worry about potential
11
+ # future mistakes.
12
+ class Coordinator
13
+ # @return [Karafka::TimeTrackers::Pause]
14
+ attr_reader :pause_tracker
15
+
16
+ # @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
17
+ def initialize(pause_tracker)
18
+ @pause_tracker = pause_tracker
19
+ @revoked = false
20
+ @consumptions = {}
21
+ @running_jobs = 0
22
+ @mutex = Mutex.new
23
+ end
24
+
25
+ # Starts the coordinator for given consumption jobs
26
+ # @param _messages [Array<Karafka::Messages::Message>] batch of message for which we are
27
+ # going to coordinate work. Not used with regular coordinator.
28
+ def start(_messages)
29
+ @mutex.synchronize do
30
+ @running_jobs = 0
31
+ # We need to clear the consumption results hash here, otherwise we could end up storing
32
+ # consumption results of consumer instances we no longer control
33
+ @consumptions.clear
34
+ end
35
+ end
36
+
37
+ # Increases number of jobs that we handle with this coordinator
38
+ def increment
39
+ @mutex.synchronize { @running_jobs += 1 }
40
+ end
41
+
42
+ # Decrements number of jobs we handle at the moment
43
+ def decrement
44
+ @mutex.synchronize do
45
+ @running_jobs -= 1
46
+
47
+ return @running_jobs unless @running_jobs.negative?
48
+
49
+ # This should never happen. If it does, something is heavily out of sync. Please reach
50
+ # out to us if you encounter this
51
+ raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
52
+ end
53
+ end
54
+
55
+ # @param consumer [Object] karafka consumer (normal or pro)
56
+ # @return [Karafka::Processing::Result] result object which we can use to indicate
57
+ # consumption processing state.
58
+ def consumption(consumer)
59
+ @mutex.synchronize do
60
+ @consumptions[consumer] ||= Processing::Result.new
61
+ end
62
+ end
63
+
64
+ # Is all the consumption done and finished successfully for this coordinator
65
+ def success?
66
+ @mutex.synchronize { @running_jobs.zero? && @consumptions.values.all?(&:success?) }
67
+ end
68
+
69
+ # Marks given coordinator for processing group as revoked
70
+ #
71
+ # This is invoked in two places:
72
+ # - from the main listener loop when we detect revoked partitions
73
+ # - from the consumer in case checkpointing fails
74
+ #
75
+ # This means, we can end up having consumer being aware that it was revoked prior to the
76
+ # listener loop dispatching the revocation job. It is ok, as effectively nothing will be
77
+ # processed until revocation jobs are done.
78
+ def revoke
79
+ @mutex.synchronize { @revoked = true }
80
+ end
81
+
82
+ # @return [Boolean] is the partition we are processing revoked or not
83
+ def revoked?
84
+ @revoked
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Coordinators builder used to build coordinators per topic partition
6
+ #
7
+ # It provides direct pauses access for revocation
8
+ #
9
+ # @note This buffer operates only from the listener loop, thus we do not have to make it
10
+ # thread-safe.
11
+ class CoordinatorsBuffer
12
+ def initialize
13
+ @pauses_manager = Connection::PausesManager.new
14
+ @coordinator_class = ::Karafka::App.config.internal.processing.coordinator_class
15
+ @coordinators = Hash.new { |h, k| h[k] = {} }
16
+ end
17
+
18
+ # @param topic [String] topic name
19
+ # @param partition [Integer] partition number
20
+ def find_or_create(topic, partition)
21
+ @coordinators[topic][partition] ||= @coordinator_class.new(
22
+ @pauses_manager.fetch(topic, partition)
23
+ )
24
+ end
25
+
26
+ # Resumes processing of partitions for which pause time has ended.
27
+ # @param block we want to run for resumed topic partitions
28
+ # @yieldparam [String] topic name
29
+ # @yieldparam [Integer] partition number
30
+ def resume(&block)
31
+ @pauses_manager.resume(&block)
32
+ end
33
+
34
+ # @param topic [String] topic name
35
+ # @param partition [Integer] partition number
36
+ def revoke(topic, partition)
37
+ return unless @coordinators[topic].key?(partition)
38
+
39
+ # The fact that we delete here does not change the fact that the executor still holds the
40
+ # reference to this coordinator. We delete it here, as we will no longer process any
41
+ # new stuff with it and we may need a new coordinator if we regain this partition, but the
42
+ # coordinator may still be in use
43
+ @coordinators[topic].delete(partition).revoke
44
+ end
45
+
46
+ # Clears coordinators and re-created the pauses manager
47
+ # This should be used only for critical errors recovery
48
+ def reset
49
+ @pauses_manager = Connection::PausesManager.new
50
+ @coordinators.clear
51
+ end
52
+ end
53
+ end
54
+ end
@@ -30,13 +30,11 @@ module Karafka
30
30
  # @param group_id [String] id of the subscription group to which the executor belongs
31
31
  # @param client [Karafka::Connection::Client] kafka client
32
32
  # @param topic [Karafka::Routing::Topic] topic for which this executor will run
33
- # @param pause_tracker [Karafka::TimeTrackers::Pause] fetch pause tracker for pausing
34
- def initialize(group_id, client, topic, pause_tracker)
33
+ def initialize(group_id, client, topic)
35
34
  @id = SecureRandom.uuid
36
35
  @group_id = group_id
37
36
  @client = client
38
37
  @topic = topic
39
- @pause_tracker = pause_tracker
40
38
  end
41
39
 
42
40
  # Builds the consumer instance, builds messages batch and sets all that is needed to run the
@@ -45,20 +43,15 @@ module Karafka
45
43
  # @param messages [Array<Karafka::Messages::Message>]
46
44
  # @param received_at [Time] the moment we've received the batch (actually the moment we've)
47
45
  # enqueued it, but good enough
48
- def before_consume(messages, received_at)
46
+ # @param coordinator [Karafka::Processing::Coordinator] coordinator for processing management
47
+ def before_consume(messages, received_at, coordinator)
49
48
  # Recreate consumer with each batch if persistence is not enabled
50
49
  # We reload the consumers with each batch instead of relying on some external signals
51
50
  # when needed for consistency. That way devs may have it on or off and not in this
52
51
  # middle state, where re-creation of a consumer instance would occur only sometimes
53
- @recreate = true unless ::Karafka::App.config.consumer_persistence
52
+ @consumer = nil unless ::Karafka::App.config.consumer_persistence
54
53
 
55
- # If @recreate was set to true (aside from non persistent), it means, that revocation or
56
- # a shutdown happened and we need to have a new instance for running another consume for
57
- # this topic partition
58
- if @recreate
59
- @consumer = nil
60
- @recreate = false
61
- end
54
+ consumer.coordinator = coordinator
62
55
 
63
56
  # First we build messages batch...
64
57
  consumer.messages = Messages::Builders::Messages.call(
@@ -78,7 +71,7 @@ module Karafka
78
71
 
79
72
  # Runs consumer after consumption code
80
73
  def after_consume
81
- consumer.on_after_consume if @consumer
74
+ consumer.on_after_consume
82
75
  end
83
76
 
84
77
  # Runs the controller `#revoked` method that should be triggered when a given consumer is
@@ -95,7 +88,6 @@ module Karafka
95
88
  # consumer instance.
96
89
  def revoked
97
90
  consumer.on_revoked if @consumer
98
- @recreate = true
99
91
  end
100
92
 
101
93
  # Runs the controller `#shutdown` method that should be triggered when a given consumer is
@@ -107,7 +99,6 @@ module Karafka
107
99
  # There is a case, where the consumer no longer exists because it was revoked, in case like
108
100
  # that we do not build a new instance and shutdown should not be triggered.
109
101
  consumer.on_shutdown if @consumer
110
- @recreate = true
111
102
  end
112
103
 
113
104
  private
@@ -115,10 +106,9 @@ module Karafka
115
106
  # @return [Object] cached consumer instance
116
107
  def consumer
117
108
  @consumer ||= begin
118
- consumer = @topic.consumer.new
109
+ consumer = @topic.consumer_class.new
119
110
  consumer.topic = @topic
120
111
  consumer.client = @client
121
- consumer.pause_tracker = @pause_tracker
122
112
  consumer.producer = ::Karafka::App.producer
123
113
  consumer
124
114
  end
@@ -11,30 +11,48 @@ module Karafka
11
11
  def initialize(client, subscription_group)
12
12
  @subscription_group = subscription_group
13
13
  @client = client
14
- @buffer = Hash.new { |h, k| h[k] = {} }
14
+ # We need two layers here to keep track of topics, partitions and processing groups
15
+ @buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
15
16
  end
16
17
 
18
+ # Finds or creates an executor based on the provided details
19
+ #
17
20
  # @param topic [String] topic name
18
21
  # @param partition [Integer] partition number
19
- # @param pause [TimeTrackers::Pause] pause corresponding with provided topic and partition
22
+ # @param parallel_key [String] parallel group key
20
23
  # @return [Executor] consumer executor
21
- def fetch(
22
- topic,
23
- partition,
24
- pause
25
- )
26
- ktopic = @subscription_group.topics.find(topic)
24
+ def find_or_create(topic, partition, parallel_key)
25
+ ktopic = find_topic(topic)
27
26
 
28
- ktopic || raise(Errors::TopicNotFoundError, topic)
29
-
30
- @buffer[ktopic][partition] ||= Executor.new(
27
+ @buffer[ktopic][partition][parallel_key] ||= Executor.new(
31
28
  @subscription_group.id,
32
29
  @client,
33
- ktopic,
34
- pause
30
+ ktopic
35
31
  )
36
32
  end
37
33
 
34
+ # Revokes executors of a given topic partition, so they won't be used anymore for incoming
35
+ # messages
36
+ #
37
+ # @param topic [String] topic name
38
+ # @param partition [Integer] partition number
39
+ def revoke(topic, partition)
40
+ ktopic = find_topic(topic)
41
+
42
+ @buffer[ktopic][partition].clear
43
+ end
44
+
45
+ # Finds all the executors available for a given topic partition
46
+ #
47
+ # @param topic [String] topic name
48
+ # @param partition [Integer] partition number
49
+ # @return [Array<Executor>] executors in use for this topic + partition
50
+ def find_all(topic, partition)
51
+ ktopic = find_topic(topic)
52
+
53
+ @buffer[ktopic][partition].values
54
+ end
55
+
38
56
  # Iterates over all available executors and yields them together with topic and partition
39
57
  # info
40
58
  # @yieldparam [Routing::Topic] karafka routing topic object
@@ -42,8 +60,11 @@ module Karafka
42
60
  # @yieldparam [Executor] given executor
43
61
  def each
44
62
  @buffer.each do |ktopic, partitions|
45
- partitions.each do |partition, executor|
46
- yield(ktopic, partition, executor)
63
+ partitions.each do |partition, executors|
64
+ executors.each do |_parallel_key, executor|
65
+ # We skip the parallel key here as it does not serve any value when iterating
66
+ yield(ktopic, partition, executor)
67
+ end
47
68
  end
48
69
  end
49
70
  end
@@ -52,6 +73,16 @@ module Karafka
52
73
  def clear
53
74
  @buffer.clear
54
75
  end
76
+
77
+ private
78
+
79
+ # Finds topic based on its name
80
+ #
81
+ # @param topic [String] topic we're looking for
82
+ # @return [Karafka::Routing::Topic] topic we're interested in
83
+ def find_topic(topic)
84
+ @subscription_group.topics.find(topic) || raise(Errors::TopicNotFoundError, topic)
85
+ end
55
86
  end
56
87
  end
57
88
  end