karafka 2.2.14 → 2.3.0.alpha2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +38 -12
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +24 -0
  6. data/Gemfile.lock +16 -16
  7. data/README.md +0 -2
  8. data/SECURITY.md +23 -0
  9. data/bin/integrations +1 -1
  10. data/config/locales/errors.yml +7 -1
  11. data/config/locales/pro_errors.yml +22 -0
  12. data/docker-compose.yml +1 -1
  13. data/karafka.gemspec +2 -2
  14. data/lib/karafka/admin/acl.rb +287 -0
  15. data/lib/karafka/admin.rb +9 -13
  16. data/lib/karafka/app.rb +5 -3
  17. data/lib/karafka/base_consumer.rb +9 -1
  18. data/lib/karafka/cli/base.rb +1 -1
  19. data/lib/karafka/connection/client.rb +83 -76
  20. data/lib/karafka/connection/conductor.rb +28 -0
  21. data/lib/karafka/connection/listener.rb +159 -42
  22. data/lib/karafka/connection/listeners_batch.rb +5 -11
  23. data/lib/karafka/connection/manager.rb +72 -0
  24. data/lib/karafka/connection/messages_buffer.rb +12 -0
  25. data/lib/karafka/connection/proxy.rb +17 -0
  26. data/lib/karafka/connection/status.rb +75 -0
  27. data/lib/karafka/contracts/config.rb +14 -10
  28. data/lib/karafka/contracts/consumer_group.rb +9 -1
  29. data/lib/karafka/contracts/topic.rb +3 -1
  30. data/lib/karafka/errors.rb +17 -0
  31. data/lib/karafka/instrumentation/logger_listener.rb +3 -0
  32. data/lib/karafka/instrumentation/notifications.rb +13 -5
  33. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
  34. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +20 -1
  35. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
  36. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
  37. data/lib/karafka/pro/base_consumer.rb +47 -0
  38. data/lib/karafka/pro/connection/manager.rb +269 -0
  39. data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
  40. data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
  41. data/lib/karafka/pro/iterator.rb +1 -6
  42. data/lib/karafka/pro/loader.rb +14 -0
  43. data/lib/karafka/pro/processing/coordinator.rb +2 -1
  44. data/lib/karafka/pro/processing/executor.rb +37 -0
  45. data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
  46. data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
  47. data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
  48. data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
  49. data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
  50. data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
  51. data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
  52. data/lib/karafka/pro/processing/schedulers/base.rb +39 -23
  53. data/lib/karafka/pro/processing/schedulers/default.rb +12 -14
  54. data/lib/karafka/pro/processing/strategies/default.rb +154 -1
  55. data/lib/karafka/pro/processing/strategies/dlq/default.rb +39 -0
  56. data/lib/karafka/pro/processing/strategies/vp/default.rb +65 -25
  57. data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
  58. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
  59. data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
  60. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
  61. data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
  62. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
  63. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
  64. data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
  65. data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
  66. data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
  67. data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
  68. data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
  69. data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
  70. data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
  71. data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
  72. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
  73. data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
  74. data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
  75. data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
  76. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
  77. data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
  78. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
  79. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  80. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
  81. data/lib/karafka/process.rb +5 -3
  82. data/lib/karafka/processing/coordinator.rb +5 -1
  83. data/lib/karafka/processing/executor.rb +16 -10
  84. data/lib/karafka/processing/executors_buffer.rb +19 -4
  85. data/lib/karafka/processing/schedulers/default.rb +3 -2
  86. data/lib/karafka/processing/strategies/default.rb +6 -0
  87. data/lib/karafka/processing/strategies/dlq.rb +36 -0
  88. data/lib/karafka/routing/builder.rb +12 -2
  89. data/lib/karafka/routing/consumer_group.rb +5 -5
  90. data/lib/karafka/routing/features/base.rb +44 -8
  91. data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
  92. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  93. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
  94. data/lib/karafka/routing/subscription_group.rb +2 -2
  95. data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
  96. data/lib/karafka/routing/topic.rb +8 -10
  97. data/lib/karafka/runner.rb +13 -3
  98. data/lib/karafka/server.rb +5 -9
  99. data/lib/karafka/setup/config.rb +17 -0
  100. data/lib/karafka/status.rb +23 -14
  101. data/lib/karafka/templates/karafka.rb.erb +7 -0
  102. data/lib/karafka/time_trackers/partition_usage.rb +56 -0
  103. data/lib/karafka/version.rb +1 -1
  104. data.tar.gz.sig +0 -0
  105. metadata +42 -10
  106. metadata.gz.sig +0 -0
  107. data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ # Feature allowing to run consumer operations even when no data is present on periodic
19
+ # interval.
20
+ # This allows for advanced window-based operations regardless of income of new data and
21
+ # other advanced cases where the consumer is needed even when no data is coming
22
+ class PeriodicJob < Base
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -21,6 +21,7 @@ module Karafka
21
21
  :active,
22
22
  :partitioner,
23
23
  :max_partitions,
24
+ :offset_metadata_strategy,
24
25
  keyword_init: true
25
26
  ) { alias_method :active?, :active }
26
27
  end
@@ -32,6 +32,7 @@ module Karafka
32
32
  required(:active) { |val| [true, false].include?(val) }
33
33
  required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
34
34
  required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
35
+ required(:offset_metadata_strategy) { |val| %i[exact current].include?(val) }
35
36
  end
36
37
 
37
38
  # When virtual partitions are defined, partitioner needs to respond to `#call` and it
@@ -23,16 +23,21 @@ module Karafka
23
23
  # create more work than workers. When less, can ensure we have spare resources to
24
24
  # process other things in parallel.
25
25
  # @param partitioner [nil, #call] nil or callable partitioner
26
+ # @param offset_metadata_strategy [Symbol] how we should match the metadata for the
27
+ # offset. `:exact` will match the offset matching metadata and `:current` will select
28
+ # the most recently reported metadata
26
29
  # @return [VirtualPartitions] method that allows to set the virtual partitions details
27
30
  # during the routing configuration and then allows to retrieve it
28
31
  def virtual_partitions(
29
32
  max_partitions: Karafka::App.config.concurrency,
30
- partitioner: nil
33
+ partitioner: nil,
34
+ offset_metadata_strategy: :current
31
35
  )
32
36
  @virtual_partitions ||= Config.new(
33
37
  active: !partitioner.nil?,
34
38
  max_partitions: max_partitions,
35
- partitioner: partitioner
39
+ partitioner: partitioner,
40
+ offset_metadata_strategy: offset_metadata_strategy
36
41
  )
37
42
  end
38
43
 
@@ -25,9 +25,11 @@ module Karafka
25
25
  # Karafka.logger.info('Log something here')
26
26
  # exit
27
27
  # end
28
- define_method :"on_#{signal.to_s.downcase}" do |&block|
29
- @callbacks[signal] << block
30
- end
28
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
29
+ def on_#{signal.to_s.downcase}(&block)
30
+ @callbacks[:#{signal}] << block
31
+ end
32
+ RUBY
31
33
  end
32
34
 
33
35
  # Creates an instance of process and creates empty hash for callbacks
@@ -10,8 +10,12 @@ module Karafka
10
10
  # listener thread, but we go with thread-safe by default for all not to worry about potential
11
11
  # future mistakes.
12
12
  class Coordinator
13
+ extend Forwardable
14
+
13
15
  attr_reader :pause_tracker, :seek_offset, :topic, :partition
14
16
 
17
+ def_delegators :@pause_tracker, :attempt, :paused?
18
+
15
19
  # @param topic [Karafka::Routing::Topic]
16
20
  # @param partition [Integer]
17
21
  # @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
@@ -149,7 +153,7 @@ module Karafka
149
153
 
150
154
  # @return [Boolean] are we in a pause that was initiated by the user
151
155
  def manual_pause?
152
- @pause_tracker.paused? && @manual_pause
156
+ paused? && @manual_pause
153
157
  end
154
158
 
155
159
  # Marks seek as manual for coordination purposes
@@ -97,16 +97,6 @@ module Karafka
97
97
  # This may include house-keeping or other state management changes that can occur but that
98
98
  # not mean there are any new messages available for the end user to process
99
99
  def idle
100
- # Initializes the messages set in case idle operation would happen before any processing
101
- # This prevents us from having no messages object at all as the messages object and
102
- # its metadata may be used for statistics
103
- consumer.messages ||= Messages::Builders::Messages.call(
104
- [],
105
- topic,
106
- partition,
107
- Time.now
108
- )
109
-
110
100
  consumer.on_idle
111
101
  end
112
102
 
@@ -170,10 +160,26 @@ module Karafka
170
160
  consumer.client = @client
171
161
  consumer.producer = ::Karafka::App.producer
172
162
  consumer.coordinator = @coordinator
163
+ # Since we have some message-less flows (idle, etc), we initialize consumer with empty
164
+ # messages set. In production we have persistent consumers, so this is not a performance
165
+ # overhead as this will happen only once per consumer lifetime
166
+ consumer.messages = empty_messages
173
167
 
174
168
  consumer
175
169
  end
176
170
  end
171
+
172
+ # Initializes the messages set in case given operation would happen before any processing
173
+ # This prevents us from having no messages object at all as the messages object and
174
+ # its metadata may be used for statistics
175
+ def empty_messages
176
+ Messages::Builders::Messages.call(
177
+ [],
178
+ topic,
179
+ partition,
180
+ Time.now
181
+ )
182
+ end
177
183
  end
178
184
  end
179
185
  end
@@ -13,6 +13,7 @@ module Karafka
13
13
  @client = client
14
14
  # We need two layers here to keep track of topics, partitions and processing groups
15
15
  @buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
16
+ @executor_class = Karafka::App.config.internal.processing.executor_class
16
17
  end
17
18
 
18
19
  # Finds or creates an executor based on the provided details
@@ -21,15 +22,28 @@ module Karafka
21
22
  # @param partition [Integer] partition number
22
23
  # @param parallel_key [String] parallel group key
23
24
  # @param coordinator [Karafka::Processing::Coordinator]
24
- # @return [Executor] consumer executor
25
+ # @return [Executor, Pro::Processing::Executor] consumer executor
25
26
  def find_or_create(topic, partition, parallel_key, coordinator)
26
- @buffer[topic][partition][parallel_key] ||= Executor.new(
27
+ @buffer[topic][partition][parallel_key] ||= @executor_class.new(
27
28
  @subscription_group.id,
28
29
  @client,
29
30
  coordinator
30
31
  )
31
32
  end
32
33
 
34
+ # Finds all existing executors for given topic partition or creates one for it
35
+ # @param topic [String] topic name
36
+ # @param partition [Integer] partition number
37
+ # @param coordinator [Karafka::Processing::Coordinator]
38
+ # @return [Array<Executor, Pro::Processing::Executor>]
39
+ def find_all_or_create(topic, partition, coordinator)
40
+ existing = find_all(topic, partition)
41
+
42
+ return existing unless existing.empty?
43
+
44
+ [find_or_create(topic, partition, 0, coordinator)]
45
+ end
46
+
33
47
  # Revokes executors of a given topic partition, so they won't be used anymore for incoming
34
48
  # messages
35
49
  #
@@ -43,7 +57,8 @@ module Karafka
43
57
  #
44
58
  # @param topic [String] topic name
45
59
  # @param partition [Integer] partition number
46
- # @return [Array<Executor>] executors in use for this topic + partition
60
+ # @return [Array<Executor, Pro::Processing::Executor>] executors in use for this
61
+ # topic + partition
47
62
  def find_all(topic, partition)
48
63
  @buffer[topic][partition].values
49
64
  end
@@ -52,7 +67,7 @@ module Karafka
52
67
  # info
53
68
  # @yieldparam [Routing::Topic] karafka routing topic object
54
69
  # @yieldparam [Integer] partition number
55
- # @yieldparam [Executor] given executor
70
+ # @yieldparam [Executor, Pro::Processing::Executor] given executor
56
71
  def each
57
72
  @buffer.each_value do |partitions|
58
73
  partitions.each_value do |executors|
@@ -13,16 +13,17 @@ module Karafka
13
13
 
14
14
  # Schedules jobs in the fifo order
15
15
  #
16
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
16
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Consume>] jobs we want to schedule
17
17
  def on_schedule_consumption(jobs_array)
18
18
  jobs_array.each do |job|
19
19
  @queue << job
20
20
  end
21
21
  end
22
22
 
23
- # Both revocation and shutdown jobs can also run in fifo by default
23
+ # Revocation, shutdown and idle jobs can also run in fifo by default
24
24
  alias on_schedule_revocation on_schedule_consumption
25
25
  alias on_schedule_shutdown on_schedule_consumption
26
+ alias on_schedule_idle on_schedule_consumption
26
27
 
27
28
  # This scheduler does not have anything to manage as it is a pass through and has no state
28
29
  def on_manage
@@ -41,6 +41,9 @@ module Karafka
41
41
  # already processed but rather at the next one. This applies to both sync and async
42
42
  # versions of this method.
43
43
  def mark_as_consumed(message)
44
+ # seek offset can be nil only in case `#seek` was invoked with offset reset request
45
+ # In case like this we ignore marking
46
+ return true if coordinator.seek_offset.nil?
44
47
  # Ignore earlier offsets than the one we already committed
45
48
  return true if coordinator.seek_offset > message.offset
46
49
  return false if revoked?
@@ -57,6 +60,9 @@ module Karafka
57
60
  # @return [Boolean] true if we were able to mark the offset, false otherwise.
58
61
  # False indicates that we were not able and that we have lost the partition.
59
62
  def mark_as_consumed!(message)
63
+ # seek offset can be nil only in case `#seek` was invoked with offset reset request
64
+ # In case like this we ignore marking
65
+ return true if coordinator.seek_offset.nil?
60
66
  # Ignore earlier offsets than the one we already committed
61
67
  return true if coordinator.seek_offset > message.offset
62
68
  return false if revoked?
@@ -14,6 +14,42 @@ module Karafka
14
14
  dead_letter_queue
15
15
  ].freeze
16
16
 
17
+ # Override of the standard `#mark_as_consumed` in order to handle the pause tracker
18
+ # reset in case DLQ is marked as fully independent. When DLQ is marked independent,
19
+ # any offset marking causes the pause count tracker to reset. This is useful when
20
+ # the error is not due to the collective batch operations state but due to intermediate
21
+ # "crawling" errors that move with it
22
+ #
23
+ # @see `Strategies::Default#mark_as_consumed` for more details
24
+ # @param message [Messages::Message]
25
+ def mark_as_consumed(message)
26
+ # If we are not retrying pause count is already 0, no need to try to reset the state
27
+ return super unless retrying?
28
+ # If we do not use independent marking on DLQ, we just mark as consumed
29
+ return super unless topic.dead_letter_queue.independent?
30
+ # If we were not able to mark no need to reset
31
+ return false unless super
32
+
33
+ coordinator.pause_tracker.reset
34
+
35
+ true
36
+ end
37
+
38
+ # Override of the standard `#mark_as_consumed!`. Resets the pause tracker count in case
39
+ # DLQ was configured with the `independent` flag.
40
+ #
41
+ # @see `Strategies::Default#mark_as_consumed!` for more details
42
+ # @param message [Messages::Message]
43
+ def mark_as_consumed!(message)
44
+ return super unless retrying?
45
+ return super unless topic.dead_letter_queue.independent?
46
+ return false unless super
47
+
48
+ coordinator.pause_tracker.reset
49
+
50
+ true
51
+ end
52
+
17
53
  # When manual offset management is on, we do not mark anything as consumed automatically
18
54
  # and we rely on the user to figure things out
19
55
  def handle_after_consume
@@ -109,10 +109,20 @@ module Karafka
109
109
  # subscription group customization
110
110
  # @param subscription_group_name [String, Symbol] subscription group id. When not provided,
111
111
  # a random uuid will be used
112
+ # @param args [Array] any extra arguments accepted by the subscription group builder
112
113
  # @param block [Proc] further topics definitions
113
- def subscription_group(subscription_group_name = SubscriptionGroup.id, &block)
114
+ def subscription_group(
115
+ subscription_group_name = SubscriptionGroup.id,
116
+ **args,
117
+ &block
118
+ )
114
119
  consumer_group('app') do
115
- target.public_send(:subscription_group=, subscription_group_name.to_s, &block)
120
+ target.public_send(
121
+ :subscription_group=,
122
+ subscription_group_name.to_s,
123
+ **args,
124
+ &block
125
+ )
116
126
  end
117
127
  end
118
128
 
@@ -14,7 +14,7 @@ module Karafka
14
14
  # It allows us to store the "current" subscription group defined in the routing
15
15
  # This subscription group id is then injected into topics, so we can compute the subscription
16
16
  # groups
17
- attr_accessor :current_subscription_group_name
17
+ attr_accessor :current_subscription_group_details
18
18
 
19
19
  # @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
20
20
  # yet have an application client_id namespace, this will be added here by default.
@@ -26,7 +26,7 @@ module Karafka
26
26
  @topics = Topics.new([])
27
27
  # Initialize the subscription group so there's always a value for it, since even if not
28
28
  # defined directly, a subscription group will be created
29
- @current_subscription_group_name = SubscriptionGroup.id
29
+ @current_subscription_group_details = { name: SubscriptionGroup.id }
30
30
  end
31
31
 
32
32
  # @return [Boolean] true if this consumer group should be active in our current process
@@ -48,7 +48,7 @@ module Karafka
48
48
  built_topic = @topics.last
49
49
  # We overwrite it conditionally in case it was not set by the user inline in the topic
50
50
  # block definition
51
- built_topic.subscription_group_name ||= current_subscription_group_name
51
+ built_topic.subscription_group_details ||= current_subscription_group_details
52
52
  built_topic
53
53
  end
54
54
 
@@ -59,13 +59,13 @@ module Karafka
59
59
  def subscription_group=(name = SubscriptionGroup.id, &block)
60
60
  # We cast it here, so the routing supports symbol based but that's anyhow later on
61
61
  # validated as a string
62
- @current_subscription_group_name = name.to_s
62
+ @current_subscription_group_details = { name: name.to_s }
63
63
 
64
64
  Proxy.new(self, &block)
65
65
 
66
66
  # We need to reset the current subscription group after it is used, so it won't leak
67
67
  # outside to other topics that would be defined without a defined subscription group
68
- @current_subscription_group_name = SubscriptionGroup.id
68
+ @current_subscription_group_details = { name: SubscriptionGroup.id }
69
69
  end
70
70
 
71
71
  # @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on
@@ -13,17 +13,46 @@ module Karafka
13
13
  class << self
14
14
  # Extends topic and builder with given feature API
15
15
  def activate
16
- Topic.prepend(self::Topic) if const_defined?('Topic', false)
17
- Topics.prepend(self::Topics) if const_defined?('Topics', false)
18
- ConsumerGroup.prepend(self::ConsumerGroup) if const_defined?('ConsumerGroup', false)
19
- Proxy.prepend(self::Proxy) if const_defined?('Proxy', false)
20
- Builder.prepend(self::Builder) if const_defined?('Builder', false)
21
- Builder.prepend(Base::Expander.new(self)) if const_defined?('Contracts', false)
16
+ if const_defined?('Topic', false)
17
+ Topic.prepend(self::Topic)
18
+ end
19
+
20
+ if const_defined?('Topics', false)
21
+ Topics.prepend(self::Topics)
22
+ end
23
+
24
+ if const_defined?('ConsumerGroup', false)
25
+ ConsumerGroup.prepend(self::ConsumerGroup)
26
+ end
27
+
28
+ if const_defined?('Proxy', false)
29
+ Proxy.prepend(self::Proxy)
30
+ end
31
+
32
+ if const_defined?('Builder', false)
33
+ Builder.prepend(self::Builder)
34
+ end
35
+
36
+ if const_defined?('Contracts', false)
37
+ Builder.prepend(Base::Expander.new(self))
38
+ end
39
+
40
+ if const_defined?('SubscriptionGroup', false)
41
+ SubscriptionGroup.prepend(self::SubscriptionGroup)
42
+ end
43
+
44
+ if const_defined?('SubscriptionGroupsBuilder', false)
45
+ SubscriptionGroupsBuilder.prepend(self::SubscriptionGroupsBuilder)
46
+ end
22
47
  end
23
48
 
24
- # Loads all the features and activates them
49
+ # Loads all the features and activates them once
25
50
  def load_all
51
+ return if @loaded
52
+
26
53
  features.each(&:activate)
54
+
55
+ @loaded = true
27
56
  end
28
57
 
29
58
  # @param config [Karafka::Core::Configurable::Node] app config that we can alter with
@@ -41,11 +70,18 @@ module Karafka
41
70
 
42
71
  private
43
72
 
44
- # @return [Array<Class>] all available routing features
73
+ # @return [Array<Class>] all available routing features that are direct descendants of
74
+ # the features base.Approach with using `#superclass` prevents us from accidentally
75
+ # loading Pro components
45
76
  def features
46
77
  ObjectSpace
47
78
  .each_object(Class)
48
79
  .select { |klass| klass < self }
80
+ # Ensures, that Pro components are only loaded when we operate in Pro mode. Since
81
+ # outside of specs Zeitwerk does not require them at all, they will not be loaded
82
+ # anyhow, but for specs this needs to be done as RSpec requires all files to be
83
+ # present
84
+ .reject { |klass| Karafka.pro? ? false : klass.superclass != self }
49
85
  .sort_by(&:to_s)
50
86
  end
51
87
 
@@ -11,8 +11,13 @@ module Karafka
11
11
  :max_retries,
12
12
  # To what topic the skipped messages should be moved
13
13
  :topic,
14
+ # Should retries be handled collectively on a batch or independently per message
15
+ :independent,
14
16
  keyword_init: true
15
- ) { alias_method :active?, :active }
17
+ ) do
18
+ alias_method :active?, :active
19
+ alias_method :independent?, :independent
20
+ end
16
21
  end
17
22
  end
18
23
  end
@@ -18,6 +18,7 @@ module Karafka
18
18
 
19
19
  nested :dead_letter_queue do
20
20
  required(:active) { |val| [true, false].include?(val) }
21
+ required(:independent) { |val| [true, false].include?(val) }
21
22
  required(:max_retries) { |val| val.is_a?(Integer) && val >= 0 }
22
23
  end
23
24
 
@@ -14,12 +14,19 @@ module Karafka
14
14
  # @param max_retries [Integer] after how many retries should we move data to dlq
15
15
  # @param topic [String, false] where the messages should be moved if failing or false
16
16
  # if we do not want to move it anywhere and just skip
17
+ # @param independent [Boolean] needs to be true in order for each marking as consumed
18
+ # in a retry flow to reset the errors counter
17
19
  # @return [Config] defined config
18
- def dead_letter_queue(max_retries: DEFAULT_MAX_RETRIES, topic: nil)
20
+ def dead_letter_queue(
21
+ max_retries: DEFAULT_MAX_RETRIES,
22
+ topic: nil,
23
+ independent: false
24
+ )
19
25
  @dead_letter_queue ||= Config.new(
20
26
  active: !topic.nil?,
21
27
  max_retries: max_retries,
22
- topic: topic
28
+ topic: topic,
29
+ independent: independent
23
30
  )
24
31
  end
25
32
 
@@ -37,7 +37,8 @@ module Karafka
37
37
  # @param topics [Karafka::Routing::Topics] all the topics that share the same key settings
38
38
  # @return [SubscriptionGroup] built subscription group
39
39
  def initialize(position, topics)
40
- @name = topics.first.subscription_group_name
40
+ @details = topics.first.subscription_group_details
41
+ @name = @details.fetch(:name)
41
42
  @consumer_group = topics.first.consumer_group
42
43
  # We include the consumer group id here because we want to have unique ids of subscription
43
44
  # groups across the system. Otherwise user could set the same name for multiple
@@ -47,7 +48,6 @@ module Karafka
47
48
  @position = position
48
49
  @topics = topics
49
50
  @kafka = build_kafka
50
- freeze
51
51
  end
52
52
 
53
53
  # @return [String] consumer group id
@@ -19,7 +19,7 @@ module Karafka
19
19
  max_messages
20
20
  max_wait_time
21
21
  initial_offset
22
- subscription_group_name
22
+ subscription_group_details
23
23
  ].freeze
24
24
 
25
25
  private_constant :DISTRIBUTION_KEYS
@@ -37,7 +37,7 @@ module Karafka
37
37
  .group_by(&:first)
38
38
  .values
39
39
  .map { |value| value.map(&:last) }
40
- .map { |topics_array| Routing::Topics.new(topics_array) }
40
+ .flat_map { |value| expand(value) }
41
41
  .map { |grouped_topics| SubscriptionGroup.new(@position += 1, grouped_topics) }
42
42
  .tap do |subscription_groups|
43
43
  subscription_groups.each do |subscription_group|
@@ -60,6 +60,15 @@ module Karafka
60
60
 
61
61
  accu.hash
62
62
  end
63
+
64
+ # Hook for optional expansion of groups based on subscription group features
65
+ #
66
+ # @param topics_array [Array<Routing::Topic>] group of topics that have the same settings
67
+ # and can use the same connection
68
+ # @return [Array<Array<Routing::Topics>>] expanded groups
69
+ def expand(topics_array)
70
+ [Routing::Topics.new(topics_array)]
71
+ end
63
72
  end
64
73
  end
65
74
  end
@@ -9,7 +9,7 @@ module Karafka
9
9
  attr_reader :id, :name, :consumer_group
10
10
  attr_writer :consumer
11
11
 
12
- attr_accessor :subscription_group_name
12
+ attr_accessor :subscription_group_details
13
13
 
14
14
  # Full subscription group reference can be built only when we have knowledge about the
15
15
  # whole routing tree, this is why it is going to be set later on
@@ -46,15 +46,13 @@ module Karafka
46
46
  INHERITABLE_ATTRIBUTES.each do |attribute|
47
47
  attr_writer attribute
48
48
 
49
- define_method attribute do
50
- current_value = instance_variable_get(:"@#{attribute}")
49
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
50
+ def #{attribute}
51
+ return @#{attribute} unless @#{attribute}.nil?
51
52
 
52
- return current_value unless current_value.nil?
53
-
54
- value = Karafka::App.config.send(attribute)
55
-
56
- instance_variable_set(:"@#{attribute}", value)
57
- end
53
+ @#{attribute} = Karafka::App.config.send(:#{attribute})
54
+ end
55
+ RUBY
58
56
  end
59
57
 
60
58
  # @return [String] name of subscription that will go to librdkafka
@@ -117,7 +115,7 @@ module Karafka
117
115
  active: active?,
118
116
  consumer: consumer,
119
117
  consumer_group_id: consumer_group.id,
120
- subscription_group_name: subscription_group_name
118
+ subscription_group_details: subscription_group_details
121
119
  ).freeze
122
120
  end
123
121
  end
@@ -3,6 +3,11 @@
3
3
  module Karafka
4
4
  # Class used to run the Karafka listeners in separate threads
5
5
  class Runner
6
+ def initialize
7
+ @manager = App.config.internal.connection.manager
8
+ @conductor = App.config.internal.connection.conductor
9
+ end
10
+
6
11
  # Starts listening on all the listeners asynchronously and handles the jobs queue closing
7
12
  # after listeners are done with their work.
8
13
  def call
@@ -13,16 +18,21 @@ module Karafka
13
18
  workers = Processing::WorkersBatch.new(jobs_queue)
14
19
  listeners = Connection::ListenersBatch.new(jobs_queue)
15
20
 
21
+ # Register all the listeners so they can be started and managed
22
+ @manager.register(listeners)
23
+
16
24
  workers.each(&:async_call)
17
- listeners.each(&:async_call)
18
25
 
19
26
  # We aggregate threads here for a supervised shutdown process
20
27
  Karafka::Server.workers = workers
21
28
  Karafka::Server.listeners = listeners
22
29
  Karafka::Server.jobs_queue = jobs_queue
23
30
 
24
- # All the listener threads need to finish
25
- listeners.each(&:join)
31
+ until @manager.done?
32
+ @conductor.wait
33
+
34
+ @manager.control
35
+ end
26
36
 
27
37
  # We close the jobs queue only when no listener threads are working.
28
38
  # This ensures, that everything was closed prior to us not accepting anymore jobs and that
@@ -88,7 +88,10 @@ module Karafka
88
88
  # their work and if so, we can just return and normal shutdown process will take place
89
89
  # We divide it by 1000 because we use time in ms.
90
90
  ((timeout / 1_000) * SUPERVISION_CHECK_FACTOR).to_i.times do
91
- return if listeners.count(&:alive?).zero? && workers.count(&:alive?).zero?
91
+ all_listeners_stopped = listeners.all?(&:stopped?)
92
+ all_workers_stopped = workers.none?(&:alive?)
93
+
94
+ return if all_listeners_stopped && all_workers_stopped
92
95
 
93
96
  sleep SUPERVISION_SLEEP
94
97
  end
@@ -104,7 +107,7 @@ module Karafka
104
107
 
105
108
  # We're done waiting, lets kill them!
106
109
  workers.each(&:terminate)
107
- listeners.each(&:terminate)
110
+ listeners.active.each(&:terminate)
108
111
  # We always need to shutdown clients to make sure we do not force the GC to close consumer.
109
112
  # This can cause memory leaks and crashes.
110
113
  listeners.each(&:shutdown)
@@ -137,13 +140,6 @@ module Karafka
137
140
  # We don't have to safe-guard it with check states as the state transitions work only
138
141
  # in one direction
139
142
  Karafka::App.quiet!
140
-
141
- # We need one more thread to monitor the process and move to quieted once everything
142
- # is quiet and no processing is happening anymore
143
- Thread.new do
144
- sleep(0.1) until listeners.coordinators.all?(&:finished?)
145
- Karafka::App.quieted!
146
- end
147
143
  end
148
144
 
149
145
  private