karafka 2.0.0.beta4 → 2.0.0.rc2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +18 -1
  4. data/CHANGELOG.md +30 -0
  5. data/CONTRIBUTING.md +0 -5
  6. data/Gemfile.lock +12 -42
  7. data/README.md +2 -12
  8. data/bin/benchmarks +2 -2
  9. data/bin/integrations +10 -3
  10. data/bin/{stress → stress_many} +1 -1
  11. data/bin/stress_one +13 -0
  12. data/config/errors.yml +48 -5
  13. data/docker-compose.yml +27 -18
  14. data/karafka.gemspec +2 -4
  15. data/lib/karafka/active_job/job_options_contract.rb +8 -2
  16. data/lib/karafka/active_job/routing/extensions.rb +1 -1
  17. data/lib/karafka/app.rb +2 -1
  18. data/lib/karafka/base_consumer.rb +24 -19
  19. data/lib/karafka/cli/install.rb +15 -2
  20. data/lib/karafka/cli/server.rb +4 -2
  21. data/lib/karafka/connection/client.rb +40 -17
  22. data/lib/karafka/connection/listener.rb +37 -11
  23. data/lib/karafka/connection/rebalance_manager.rb +20 -19
  24. data/lib/karafka/contracts/base.rb +2 -8
  25. data/lib/karafka/contracts/config.rb +71 -38
  26. data/lib/karafka/contracts/consumer_group.rb +25 -18
  27. data/lib/karafka/contracts/consumer_group_topic.rb +30 -16
  28. data/lib/karafka/contracts/server_cli_options.rb +18 -7
  29. data/lib/karafka/errors.rb +3 -0
  30. data/lib/karafka/helpers/colorize.rb +20 -0
  31. data/lib/karafka/pro/active_job/consumer.rb +1 -8
  32. data/lib/karafka/pro/active_job/job_options_contract.rb +10 -6
  33. data/lib/karafka/pro/base_consumer.rb +27 -21
  34. data/lib/karafka/pro/loader.rb +13 -6
  35. data/lib/karafka/pro/processing/coordinator.rb +63 -0
  36. data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
  37. data/lib/karafka/pro/processing/partitioner.rb +41 -0
  38. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  39. data/lib/karafka/pro/routing/extensions.rb +6 -0
  40. data/lib/karafka/processing/coordinator.rb +88 -0
  41. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  42. data/lib/karafka/processing/executor.rb +7 -17
  43. data/lib/karafka/processing/executors_buffer.rb +46 -15
  44. data/lib/karafka/processing/jobs/consume.rb +4 -2
  45. data/lib/karafka/processing/jobs_builder.rb +3 -2
  46. data/lib/karafka/processing/partitioner.rb +22 -0
  47. data/lib/karafka/processing/result.rb +0 -5
  48. data/lib/karafka/processing/scheduler.rb +22 -0
  49. data/lib/karafka/routing/consumer_group.rb +1 -1
  50. data/lib/karafka/routing/topic.rb +9 -0
  51. data/lib/karafka/setup/config.rb +26 -12
  52. data/lib/karafka/templates/example_consumer.rb.erb +2 -2
  53. data/lib/karafka/version.rb +1 -1
  54. data/lib/karafka.rb +0 -2
  55. data.tar.gz.sig +0 -0
  56. metadata +15 -36
  57. metadata.gz.sig +0 -0
  58. data/lib/karafka/pro/scheduler.rb +0 -54
  59. data/lib/karafka/scheduler.rb +0 -20
@@ -26,7 +26,7 @@ module Karafka
26
26
  messages.each do |message|
27
27
  # If for any reason we've lost this partition, not worth iterating over new messages
28
28
  # as they are no longer ours
29
- return if revoked?
29
+ break if revoked?
30
30
  break if Karafka::App.stopping?
31
31
 
32
32
  ::ActiveJob::Base.execute(
@@ -34,13 +34,6 @@ module Karafka
34
34
  )
35
35
 
36
36
  mark_as_consumed(message)
37
-
38
- # We check it twice as the job may be long running
39
- # If marking fails, it also means it got revoked and we can stop consuming
40
- return if revoked?
41
-
42
- # Do not process more if we are shutting down
43
- break if Karafka::App.stopping?
44
37
  end
45
38
  end
46
39
  end
@@ -14,13 +14,17 @@ module Karafka
14
14
  module ActiveJob
15
15
  # Contract for validating the options that can be altered with `#karafka_options` per job
16
16
  # class that works with Pro features.
17
- class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract
18
- # Dry types
19
- Types = include Dry.Types()
20
-
21
- params do
22
- optional(:partitioner).value(Types.Interface(:call))
17
+ class JobOptionsContract < Contracts::Base
18
+ configure do |config|
19
+ config.error_messages = YAML.safe_load(
20
+ File.read(
21
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
22
+ )
23
+ ).fetch('en').fetch('validations').fetch('job_options')
23
24
  end
25
+
26
+ optional(:dispatch_method) { |val| %i[produce_async produce_sync].include?(val) }
27
+ optional(:partitioner) { |val| val.respond_to?(:call) }
24
28
  end
25
29
  end
26
30
  end
@@ -26,26 +26,38 @@ module Karafka
26
26
  # Pauses processing of a given partition until we're done with the processing
27
27
  # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
28
  def on_before_consume
29
- # Pause at the first message in a batch. That way in case of a crash, we will not loose
30
- # any messages
31
29
  return unless topic.long_running_job?
32
30
 
33
- pause(messages.first.offset, MAX_PAUSE_TIME)
31
+ # This ensures, that when running LRJ with VP, things operate as expected
32
+ coordinator.on_started do |first_group_message|
33
+ # Pause at the first message in a batch. That way in case of a crash, we will not loose
34
+ # any messages
35
+ pause(first_group_message.offset, MAX_PAUSE_TIME)
36
+ end
34
37
  end
35
38
 
36
39
  # Runs extra logic after consumption that is related to handling long running jobs
37
40
  # @note This overwrites the '#on_after_consume' from the base consumer
38
41
  def on_after_consume
39
- # Nothing to do if we lost the partition
40
- return if revoked?
42
+ coordinator.on_finished do |first_group_message, last_group_message|
43
+ on_after_consume_regular(first_group_message, last_group_message)
44
+ end
45
+ end
46
+
47
+ private
41
48
 
42
- if @consumption.success?
43
- pause_tracker.reset
49
+ # Handles the post-consumption flow depending on topic settings
50
+ #
51
+ # @param first_message [Karafka::Messages::Message]
52
+ # @param last_message [Karafka::Messages::Message]
53
+ def on_after_consume_regular(first_message, last_message)
54
+ if coordinator.success?
55
+ coordinator.pause_tracker.reset
44
56
 
45
57
  # We use the non-blocking one here. If someone needs the blocking one, can implement it
46
58
  # with manual offset management
47
59
  # Mark as consumed only if manual offset management is not on
48
- mark_as_consumed(messages.last) unless topic.manual_offset_management?
60
+ mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
49
61
 
50
62
  # If this is not a long running job there is nothing for us to do here
51
63
  return unless topic.long_running_job?
@@ -53,24 +65,18 @@ module Karafka
53
65
  # Once processing is done, we move to the new offset based on commits
54
66
  # Here, in case manual offset management is off, we have the new proper offset of a
55
67
  # first message from another batch from `@seek_offset`. If manual offset management
56
- # is on, we move to place where the user indicated it was finished.
57
- seek(@seek_offset || messages.first.offset)
68
+ # is on, we move to place where the user indicated it was finished. This can create an
69
+ # interesting (yet valid) corner case, where with manual offset management on and no
70
+ # marking as consumed, we end up with an infinite loop processing same messages over and
71
+ # over again
72
+ seek(@seek_offset || first_message.offset)
73
+
58
74
  resume
59
75
  else
60
76
  # If processing failed, we need to pause
61
- pause(@seek_offset || messages.first.offset)
77
+ pause(@seek_offset || first_message.offset)
62
78
  end
63
79
  end
64
-
65
- # Marks this consumer revoked state as true
66
- # This allows us for things like lrj to finish early as this state may change during lrj
67
- # execution
68
- def on_revoked
69
- # @note This may already be set to true if we tried to commit offsets and failed. In case
70
- # like this it will automatically be marked as revoked.
71
- @revoked = true
72
- super
73
- end
74
80
  end
75
81
  end
76
82
  end
@@ -17,9 +17,11 @@ module Karafka
17
17
  COMPONENTS = %w[
18
18
  base_consumer
19
19
  performance_tracker
20
- scheduler
20
+ processing/scheduler
21
21
  processing/jobs/consume_non_blocking
22
22
  processing/jobs_builder
23
+ processing/coordinator
24
+ processing/partitioner
23
25
  routing/extensions
24
26
  active_job/consumer
25
27
  active_job/dispatcher
@@ -35,11 +37,16 @@ module Karafka
35
37
  def setup(config)
36
38
  COMPONENTS.each { |component| require_relative(component) }
37
39
 
38
- config.internal.scheduler = Scheduler.new
39
- config.internal.jobs_builder = Processing::JobsBuilder.new
40
- config.internal.active_job.consumer = ActiveJob::Consumer
41
- config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
42
- config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
40
+ icfg = config.internal
41
+
42
+ icfg.processing.coordinator_class = Processing::Coordinator
43
+ icfg.processing.partitioner_class = Processing::Partitioner
44
+ icfg.processing.scheduler = Processing::Scheduler.new
45
+ icfg.processing.jobs_builder = Processing::JobsBuilder.new
46
+
47
+ icfg.active_job.consumer_class = ActiveJob::Consumer
48
+ icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
49
+ icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
43
50
 
44
51
  ::Karafka::Routing::Topic.include(Routing::Extensions)
45
52
 
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ module Processing
6
+ # Pro coordinator that provides extra orchestration methods useful for parallel processing
7
+ # within the same partition
8
+ class Coordinator < ::Karafka::Processing::Coordinator
9
+ # @param args [Object] anything the base coordinator accepts
10
+ def initialize(*args)
11
+ super
12
+ @on_started_invoked = false
13
+ @on_finished_invoked = false
14
+ @flow_lock = Mutex.new
15
+ end
16
+
17
+ # Starts the coordination process
18
+ # @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
19
+ # going to coordinate.
20
+ def start(messages)
21
+ super
22
+
23
+ @mutex.synchronize do
24
+ @on_started_invoked = false
25
+ @on_finished_invoked = false
26
+ @first_message = messages.first
27
+ @last_message = messages.last
28
+ end
29
+ end
30
+
31
+ # @return [Boolean] is the coordinated work finished or not
32
+ def finished?
33
+ @running_jobs.zero?
34
+ end
35
+
36
+ # Runs given code only once per all the coordinated jobs upon starting first of them
37
+ def on_started
38
+ @flow_lock.synchronize do
39
+ return if @on_started_invoked
40
+
41
+ @on_started_invoked = true
42
+
43
+ yield(@first_message, @last_message)
44
+ end
45
+ end
46
+
47
+ # Runs once when all the work that is suppose to be coordinated is finished
48
+ # It runs once per all the coordinated jobs and should be used to run any type of post
49
+ # jobs coordination processing execution
50
+ def on_finished
51
+ @flow_lock.synchronize do
52
+ return unless finished?
53
+ return if @on_finished_invoked
54
+
55
+ @on_finished_invoked = true
56
+
57
+ yield(@first_message, @last_message)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -16,11 +16,12 @@ module Karafka
16
16
  class JobsBuilder < ::Karafka::Processing::JobsBuilder
17
17
  # @param executor [Karafka::Processing::Executor]
18
18
  # @param messages [Karafka::Messages::Messages] messages batch to be consumed
19
+ # @param coordinator [Karafka::Processing::Coordinator]
19
20
  # @return [Karafka::Processing::Jobs::Consume] blocking job
20
21
  # @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
21
- def consume(executor, messages)
22
+ def consume(executor, messages, coordinator)
22
23
  if executor.topic.long_running_job?
23
- Jobs::ConsumeNonBlocking.new(executor, messages)
24
+ Jobs::ConsumeNonBlocking.new(executor, messages, coordinator)
24
25
  else
25
26
  super
26
27
  end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro partitioner that can distribute work based on the virtual partitioner settings
16
+ class Partitioner < ::Karafka::Processing::Partitioner
17
+ # @param topic [String] topic name
18
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages
19
+ # @yieldparam [Integer] group id
20
+ # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
21
+ def call(topic, messages)
22
+ ktopic = @subscription_group.topics.find(topic)
23
+
24
+ @concurrency ||= ::Karafka::App.config.concurrency
25
+
26
+ # We only partition work if we have a virtual partitioner and more than one thread to
27
+ # process the data. With one thread it is not worth partitioning the work as the work
28
+ # itself will be assigned to one thread (pointless work)
29
+ if ktopic.virtual_partitioner? && @concurrency > 1
30
+ messages
31
+ .group_by { |msg| ktopic.virtual_partitioner.call(msg).hash.abs % @concurrency }
32
+ .each { |group_id, messages_group| yield(group_id, messages_group) }
33
+ else
34
+ # When no virtual partitioner, works as regular one
35
+ yield(0, messages)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Optimizes scheduler that takes into consideration of execution time needed to process
16
+ # messages from given topics partitions. It uses the non-preemptive LJF algorithm
17
+ #
18
+ # This scheduler is designed to optimize execution times on jobs that perform IO operations
19
+ # as when taking IO into consideration, the can achieve optimized parallel processing.
20
+ #
21
+ # This scheduler can also work with virtual partitions.
22
+ #
23
+ # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
24
+ # default FIFO scheduler from the default Karafka scheduler
25
+ class Scheduler < ::Karafka::Processing::Scheduler
26
+ # Schedules jobs in the LJF order for consumption
27
+ #
28
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
29
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
30
+ #
31
+ def schedule_consumption(queue, jobs_array)
32
+ pt = PerformanceTracker.instance
33
+
34
+ ordered = []
35
+
36
+ jobs_array.each do |job|
37
+ messages = job.messages
38
+ message = messages.first
39
+
40
+ cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
41
+
42
+ ordered << [job, cost]
43
+ end
44
+
45
+ ordered.sort_by!(&:last)
46
+ ordered.reverse!
47
+ ordered.map!(&:first)
48
+
49
+ ordered.each do |job|
50
+ queue << job
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -19,9 +19,15 @@ module Karafka
19
19
  # @param base [Class] class we extend
20
20
  def included(base)
21
21
  base.attr_accessor :long_running_job
22
+ base.attr_accessor :virtual_partitioner
22
23
  end
23
24
  end
24
25
 
26
+ # @return [Boolean] true if virtual partitioner is defined, false otherwise
27
+ def virtual_partitioner?
28
+ virtual_partitioner != nil
29
+ end
30
+
25
31
  # @return [Boolean] is a given job on a topic a long running one
26
32
  def long_running_job?
27
33
  @long_running_job || false
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Basic coordinator that allows us to provide coordination objects into consumers.
6
+ #
7
+ # This is a wrapping layer to simplify management of work to be handled around consumption.
8
+ #
9
+ # @note This coordinator needs to be thread safe. Some operations are performed only in the
10
+ # listener thread, but we go with thread-safe by default for all not to worry about potential
11
+ # future mistakes.
12
+ class Coordinator
13
+ # @return [Karafka::TimeTrackers::Pause]
14
+ attr_reader :pause_tracker
15
+
16
+ # @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
17
+ def initialize(pause_tracker)
18
+ @pause_tracker = pause_tracker
19
+ @revoked = false
20
+ @consumptions = {}
21
+ @running_jobs = 0
22
+ @mutex = Mutex.new
23
+ end
24
+
25
+ # Starts the coordinator for given consumption jobs
26
+ # @param _messages [Array<Karafka::Messages::Message>] batch of message for which we are
27
+ # going to coordinate work. Not used with regular coordinator.
28
+ def start(_messages)
29
+ @mutex.synchronize do
30
+ @running_jobs = 0
31
+ # We need to clear the consumption results hash here, otherwise we could end up storing
32
+ # consumption results of consumer instances we no longer control
33
+ @consumptions.clear
34
+ end
35
+ end
36
+
37
+ # Increases number of jobs that we handle with this coordinator
38
+ def increment
39
+ @mutex.synchronize { @running_jobs += 1 }
40
+ end
41
+
42
+ # Decrements number of jobs we handle at the moment
43
+ def decrement
44
+ @mutex.synchronize do
45
+ @running_jobs -= 1
46
+
47
+ return @running_jobs unless @running_jobs.negative?
48
+
49
+ # This should never happen. If it does, something is heavily out of sync. Please reach
50
+ # out to us if you encounter this
51
+ raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
52
+ end
53
+ end
54
+
55
+ # @param consumer [Object] karafka consumer (normal or pro)
56
+ # @return [Karafka::Processing::Result] result object which we can use to indicate
57
+ # consumption processing state.
58
+ def consumption(consumer)
59
+ @mutex.synchronize do
60
+ @consumptions[consumer] ||= Processing::Result.new
61
+ end
62
+ end
63
+
64
+ # Is all the consumption done and finished successfully for this coordinator
65
+ def success?
66
+ @mutex.synchronize { @running_jobs.zero? && @consumptions.values.all?(&:success?) }
67
+ end
68
+
69
+ # Marks given coordinator for processing group as revoked
70
+ #
71
+ # This is invoked in two places:
72
+ # - from the main listener loop when we detect revoked partitions
73
+ # - from the consumer in case checkpointing fails
74
+ #
75
+ # This means, we can end up having consumer being aware that it was revoked prior to the
76
+ # listener loop dispatching the revocation job. It is ok, as effectively nothing will be
77
+ # processed until revocation jobs are done.
78
+ def revoke
79
+ @mutex.synchronize { @revoked = true }
80
+ end
81
+
82
+ # @return [Boolean] is the partition we are processing revoked or not
83
+ def revoked?
84
+ @revoked
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Coordinators builder used to build coordinators per topic partition
6
+ #
7
+ # It provides direct pauses access for revocation
8
+ #
9
+ # @note This buffer operates only from the listener loop, thus we do not have to make it
10
+ # thread-safe.
11
+ class CoordinatorsBuffer
12
+ def initialize
13
+ @pauses_manager = Connection::PausesManager.new
14
+ @coordinator_class = ::Karafka::App.config.internal.processing.coordinator_class
15
+ @coordinators = Hash.new { |h, k| h[k] = {} }
16
+ end
17
+
18
+ # @param topic [String] topic name
19
+ # @param partition [Integer] partition number
20
+ def find_or_create(topic, partition)
21
+ @coordinators[topic][partition] ||= @coordinator_class.new(
22
+ @pauses_manager.fetch(topic, partition)
23
+ )
24
+ end
25
+
26
+ # Resumes processing of partitions for which pause time has ended.
27
+ # @param block we want to run for resumed topic partitions
28
+ # @yieldparam [String] topic name
29
+ # @yieldparam [Integer] partition number
30
+ def resume(&block)
31
+ @pauses_manager.resume(&block)
32
+ end
33
+
34
+ # @param topic [String] topic name
35
+ # @param partition [Integer] partition number
36
+ def revoke(topic, partition)
37
+ return unless @coordinators[topic].key?(partition)
38
+
39
+ # The fact that we delete here does not change the fact that the executor still holds the
40
+ # reference to this coordinator. We delete it here, as we will no longer process any
41
+ # new stuff with it and we may need a new coordinator if we regain this partition, but the
42
+ # coordinator may still be in use
43
+ @coordinators[topic].delete(partition).revoke
44
+ end
45
+
46
+ # Clears coordinators and re-created the pauses manager
47
+ # This should be used only for critical errors recovery
48
+ def reset
49
+ @pauses_manager = Connection::PausesManager.new
50
+ @coordinators.clear
51
+ end
52
+ end
53
+ end
54
+ end
@@ -30,13 +30,11 @@ module Karafka
30
30
  # @param group_id [String] id of the subscription group to which the executor belongs
31
31
  # @param client [Karafka::Connection::Client] kafka client
32
32
  # @param topic [Karafka::Routing::Topic] topic for which this executor will run
33
- # @param pause_tracker [Karafka::TimeTrackers::Pause] fetch pause tracker for pausing
34
- def initialize(group_id, client, topic, pause_tracker)
33
+ def initialize(group_id, client, topic)
35
34
  @id = SecureRandom.uuid
36
35
  @group_id = group_id
37
36
  @client = client
38
37
  @topic = topic
39
- @pause_tracker = pause_tracker
40
38
  end
41
39
 
42
40
  # Builds the consumer instance, builds messages batch and sets all that is needed to run the
@@ -45,20 +43,15 @@ module Karafka
45
43
  # @param messages [Array<Karafka::Messages::Message>]
46
44
  # @param received_at [Time] the moment we've received the batch (actually the moment we've)
47
45
  # enqueued it, but good enough
48
- def before_consume(messages, received_at)
46
+ # @param coordinator [Karafka::Processing::Coordinator] coordinator for processing management
47
+ def before_consume(messages, received_at, coordinator)
49
48
  # Recreate consumer with each batch if persistence is not enabled
50
49
  # We reload the consumers with each batch instead of relying on some external signals
51
50
  # when needed for consistency. That way devs may have it on or off and not in this
52
51
  # middle state, where re-creation of a consumer instance would occur only sometimes
53
- @recreate = true unless ::Karafka::App.config.consumer_persistence
52
+ @consumer = nil unless ::Karafka::App.config.consumer_persistence
54
53
 
55
- # If @recreate was set to true (aside from non persistent), it means, that revocation or
56
- # a shutdown happened and we need to have a new instance for running another consume for
57
- # this topic partition
58
- if @recreate
59
- @consumer = nil
60
- @recreate = false
61
- end
54
+ consumer.coordinator = coordinator
62
55
 
63
56
  # First we build messages batch...
64
57
  consumer.messages = Messages::Builders::Messages.call(
@@ -78,7 +71,7 @@ module Karafka
78
71
 
79
72
  # Runs consumer after consumption code
80
73
  def after_consume
81
- consumer.on_after_consume if @consumer
74
+ consumer.on_after_consume
82
75
  end
83
76
 
84
77
  # Runs the controller `#revoked` method that should be triggered when a given consumer is
@@ -95,7 +88,6 @@ module Karafka
95
88
  # consumer instance.
96
89
  def revoked
97
90
  consumer.on_revoked if @consumer
98
- @recreate = true
99
91
  end
100
92
 
101
93
  # Runs the controller `#shutdown` method that should be triggered when a given consumer is
@@ -107,7 +99,6 @@ module Karafka
107
99
  # There is a case, where the consumer no longer exists because it was revoked, in case like
108
100
  # that we do not build a new instance and shutdown should not be triggered.
109
101
  consumer.on_shutdown if @consumer
110
- @recreate = true
111
102
  end
112
103
 
113
104
  private
@@ -115,10 +106,9 @@ module Karafka
115
106
  # @return [Object] cached consumer instance
116
107
  def consumer
117
108
  @consumer ||= begin
118
- consumer = @topic.consumer.new
109
+ consumer = @topic.consumer_class.new
119
110
  consumer.topic = @topic
120
111
  consumer.client = @client
121
- consumer.pause_tracker = @pause_tracker
122
112
  consumer.producer = ::Karafka::App.producer
123
113
  consumer
124
114
  end
@@ -11,30 +11,48 @@ module Karafka
11
11
  def initialize(client, subscription_group)
12
12
  @subscription_group = subscription_group
13
13
  @client = client
14
- @buffer = Hash.new { |h, k| h[k] = {} }
14
+ # We need two layers here to keep track of topics, partitions and processing groups
15
+ @buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
15
16
  end
16
17
 
18
+ # Finds or creates an executor based on the provided details
19
+ #
17
20
  # @param topic [String] topic name
18
21
  # @param partition [Integer] partition number
19
- # @param pause [TimeTrackers::Pause] pause corresponding with provided topic and partition
22
+ # @param parallel_key [String] parallel group key
20
23
  # @return [Executor] consumer executor
21
- def fetch(
22
- topic,
23
- partition,
24
- pause
25
- )
26
- ktopic = @subscription_group.topics.find(topic)
24
+ def find_or_create(topic, partition, parallel_key)
25
+ ktopic = find_topic(topic)
27
26
 
28
- ktopic || raise(Errors::TopicNotFoundError, topic)
29
-
30
- @buffer[ktopic][partition] ||= Executor.new(
27
+ @buffer[ktopic][partition][parallel_key] ||= Executor.new(
31
28
  @subscription_group.id,
32
29
  @client,
33
- ktopic,
34
- pause
30
+ ktopic
35
31
  )
36
32
  end
37
33
 
34
+ # Revokes executors of a given topic partition, so they won't be used anymore for incoming
35
+ # messages
36
+ #
37
+ # @param topic [String] topic name
38
+ # @param partition [Integer] partition number
39
+ def revoke(topic, partition)
40
+ ktopic = find_topic(topic)
41
+
42
+ @buffer[ktopic][partition].clear
43
+ end
44
+
45
+ # Finds all the executors available for a given topic partition
46
+ #
47
+ # @param topic [String] topic name
48
+ # @param partition [Integer] partition number
49
+ # @return [Array<Executor>] executors in use for this topic + partition
50
+ def find_all(topic, partition)
51
+ ktopic = find_topic(topic)
52
+
53
+ @buffer[ktopic][partition].values
54
+ end
55
+
38
56
  # Iterates over all available executors and yields them together with topic and partition
39
57
  # info
40
58
  # @yieldparam [Routing::Topic] karafka routing topic object
@@ -42,8 +60,11 @@ module Karafka
42
60
  # @yieldparam [Executor] given executor
43
61
  def each
44
62
  @buffer.each do |ktopic, partitions|
45
- partitions.each do |partition, executor|
46
- yield(ktopic, partition, executor)
63
+ partitions.each do |partition, executors|
64
+ executors.each do |_parallel_key, executor|
65
+ # We skip the parallel key here as it does not serve any value when iterating
66
+ yield(ktopic, partition, executor)
67
+ end
47
68
  end
48
69
  end
49
70
  end
@@ -52,6 +73,16 @@ module Karafka
52
73
  def clear
53
74
  @buffer.clear
54
75
  end
76
+
77
+ private
78
+
79
+ # Finds topic based on its name
80
+ #
81
+ # @param topic [String] topic we're looking for
82
+ # @return [Karafka::Routing::Topic] topic we're interested in
83
+ def find_topic(topic)
84
+ @subscription_group.topics.find(topic) || raise(Errors::TopicNotFoundError, topic)
85
+ end
55
86
  end
56
87
  end
57
88
  end