karafka 2.4.18 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. checksums.yaml +4 -4
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/workflows/ci.yml +59 -15
  4. data/.github/workflows/push.yml +35 -0
  5. data/.github/workflows/verify-action-pins.yml +16 -0
  6. data/.ruby-version +1 -1
  7. data/CHANGELOG.md +75 -0
  8. data/Gemfile +2 -2
  9. data/Gemfile.lock +72 -53
  10. data/LICENSE-COMM +2 -2
  11. data/README.md +1 -1
  12. data/Rakefile +4 -0
  13. data/bin/clean_kafka +43 -0
  14. data/bin/integrations +20 -6
  15. data/bin/rspecs +15 -3
  16. data/bin/verify_kafka_warnings +35 -0
  17. data/bin/verify_topics_naming +27 -0
  18. data/config/locales/errors.yml +5 -1
  19. data/config/locales/pro_errors.yml +13 -2
  20. data/docker-compose.yml +1 -1
  21. data/examples/payloads/avro/.gitkeep +0 -0
  22. data/examples/payloads/json/sample_set_01/enrollment_event.json +579 -0
  23. data/examples/payloads/json/sample_set_01/ingestion_event.json +30 -0
  24. data/examples/payloads/json/sample_set_01/transaction_event.json +17 -0
  25. data/examples/payloads/json/sample_set_01/user_event.json +11 -0
  26. data/karafka.gemspec +3 -8
  27. data/lib/karafka/active_job/current_attributes.rb +1 -1
  28. data/lib/karafka/active_job/job_extensions.rb +4 -1
  29. data/lib/karafka/admin/acl.rb +5 -1
  30. data/lib/karafka/admin/configs.rb +5 -1
  31. data/lib/karafka/admin.rb +89 -42
  32. data/lib/karafka/base_consumer.rb +17 -8
  33. data/lib/karafka/cli/base.rb +8 -2
  34. data/lib/karafka/cli/topics/align.rb +7 -4
  35. data/lib/karafka/cli/topics/base.rb +17 -0
  36. data/lib/karafka/cli/topics/create.rb +9 -7
  37. data/lib/karafka/cli/topics/delete.rb +4 -2
  38. data/lib/karafka/cli/topics/help.rb +39 -0
  39. data/lib/karafka/cli/topics/repartition.rb +4 -2
  40. data/lib/karafka/cli/topics.rb +10 -3
  41. data/lib/karafka/cli.rb +2 -0
  42. data/lib/karafka/connection/client.rb +39 -9
  43. data/lib/karafka/connection/listener.rb +24 -12
  44. data/lib/karafka/connection/messages_buffer.rb +1 -1
  45. data/lib/karafka/connection/proxy.rb +4 -1
  46. data/lib/karafka/constraints.rb +3 -3
  47. data/lib/karafka/contracts/base.rb +3 -2
  48. data/lib/karafka/contracts/config.rb +5 -1
  49. data/lib/karafka/contracts/topic.rb +1 -1
  50. data/lib/karafka/errors.rb +46 -2
  51. data/lib/karafka/helpers/async.rb +3 -1
  52. data/lib/karafka/helpers/interval_runner.rb +8 -0
  53. data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
  54. data/lib/karafka/instrumentation/logger_listener.rb +95 -32
  55. data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
  56. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
  57. data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +17 -2
  58. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +29 -6
  59. data/lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb +9 -0
  60. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  61. data/lib/karafka/pro/cleaner.rb +8 -0
  62. data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
  63. data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
  64. data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
  65. data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
  66. data/lib/karafka/pro/connection/manager.rb +5 -8
  67. data/lib/karafka/pro/encryption.rb +12 -1
  68. data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
  69. data/lib/karafka/pro/iterator/expander.rb +5 -3
  70. data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
  71. data/lib/karafka/pro/loader.rb +10 -0
  72. data/lib/karafka/pro/processing/coordinator.rb +4 -1
  73. data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +32 -3
  74. data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
  75. data/lib/karafka/pro/processing/filters/base.rb +10 -2
  76. data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
  77. data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
  78. data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
  79. data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
  80. data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
  81. data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
  82. data/lib/karafka/pro/processing/partitioner.rb +1 -13
  83. data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
  84. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
  85. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
  86. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
  87. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
  88. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
  89. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  90. data/lib/karafka/pro/processing/strategies/default.rb +36 -8
  91. data/lib/karafka/pro/processing/strategies/dlq/default.rb +15 -10
  92. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
  93. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
  94. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
  95. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
  96. data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
  97. data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
  98. data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
  99. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
  100. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  101. data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
  102. data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
  103. data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
  104. data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
  105. data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
  106. data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
  107. data/lib/karafka/pro/recurring_tasks.rb +21 -2
  108. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
  109. data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
  110. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
  111. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
  112. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
  113. data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
  114. data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
  115. data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
  116. data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
  117. data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
  118. data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
  119. data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
  120. data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
  121. data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
  122. data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +3 -2
  123. data/lib/karafka/pro/routing/features/swarm.rb +4 -1
  124. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
  125. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  126. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
  127. data/lib/karafka/pro/scheduled_messages/consumer.rb +61 -26
  128. data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
  129. data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
  130. data/lib/karafka/pro/scheduled_messages/dispatcher.rb +2 -1
  131. data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
  132. data/lib/karafka/pro/scheduled_messages/proxy.rb +15 -3
  133. data/lib/karafka/pro/scheduled_messages/serializer.rb +2 -4
  134. data/lib/karafka/pro/scheduled_messages/state.rb +20 -23
  135. data/lib/karafka/pro/scheduled_messages/tracker.rb +34 -8
  136. data/lib/karafka/pro/scheduled_messages.rb +17 -1
  137. data/lib/karafka/processing/coordinators_buffer.rb +1 -0
  138. data/lib/karafka/processing/strategies/default.rb +4 -4
  139. data/lib/karafka/routing/builder.rb +12 -3
  140. data/lib/karafka/routing/features/base/expander.rb +8 -2
  141. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  142. data/lib/karafka/routing/subscription_group.rb +1 -1
  143. data/lib/karafka/runner.rb +7 -1
  144. data/lib/karafka/server.rb +21 -18
  145. data/lib/karafka/setup/attributes_map.rb +2 -0
  146. data/lib/karafka/setup/config.rb +40 -7
  147. data/lib/karafka/setup/defaults_injector.rb +26 -1
  148. data/lib/karafka/status.rb +6 -1
  149. data/lib/karafka/swarm/node.rb +31 -0
  150. data/lib/karafka/swarm/supervisor.rb +9 -2
  151. data/lib/karafka/templates/karafka.rb.erb +14 -1
  152. data/lib/karafka/version.rb +1 -1
  153. data/lib/karafka.rb +17 -9
  154. data/renovate.json +14 -2
  155. metadata +41 -40
  156. checksums.yaml.gz.sig +0 -0
  157. data/certs/cert.pem +0 -26
  158. data.tar.gz.sig +0 -0
  159. metadata.gz.sig +0 -0
@@ -40,7 +40,7 @@ module Karafka
40
40
  elsif !revoked?
41
41
  # no need to check for manual seek because AJ consumer is internal and
42
42
  # fully controlled by us
43
- seek(seek_offset, false)
43
+ seek(seek_offset, false, reset_offset: false)
44
44
  resume
45
45
  else
46
46
  resume
@@ -43,7 +43,7 @@ module Karafka
43
43
 
44
44
  # no need to check for manual seek because AJ consumer is internal and
45
45
  # fully controlled by us
46
- seek(seek_offset, false) unless revoked?
46
+ seek(seek_offset, false, reset_offset: false) unless revoked?
47
47
 
48
48
  resume
49
49
  else
@@ -55,14 +55,19 @@ module Karafka
55
55
  # seek offset can be nil only in case `#seek` was invoked with offset reset request
56
56
  # In case like this we ignore marking
57
57
  return true if seek_offset.nil?
58
- # Ignore earlier offsets than the one we already committed
59
- return true if seek_offset > message.offset
58
+ # Ignore if it is the same offset as the one that is marked currently
59
+ # We ignore second marking because it changes nothing and in case of people using
60
+ # metadata storage but with automatic offset marking, this would cause metadata to be
61
+ # erased by automatic marking
62
+ return true if (seek_offset - 1) == message.offset
60
63
  return false if revoked?
61
64
 
62
65
  # If we are not inside a transaction but this is a transactional topic, we mark with
63
66
  # artificially created transaction
64
67
  stored = if producer.transactional?
65
68
  mark_with_transaction(message, offset_metadata, true)
69
+ elsif @_transactional_marking
70
+ raise Errors::NonTransactionalMarkingAttemptError
66
71
  else
67
72
  client.mark_as_consumed(message, offset_metadata)
68
73
  end
@@ -92,14 +97,19 @@ module Karafka
92
97
  # seek offset can be nil only in case `#seek` was invoked with offset reset request
93
98
  # In case like this we ignore marking
94
99
  return true if seek_offset.nil?
95
- # Ignore earlier offsets than the one we already committed
96
- return true if seek_offset > message.offset
100
+ # Ignore if it is the same offset as the one that is marked currently
101
+ # We ignore second marking because it changes nothing and in case of people using
102
+ # metadata storage but with automatic offset marking, this would cause metadata to be
103
+ # erased by automatic marking
104
+ return true if (seek_offset - 1) == message.offset
97
105
  return false if revoked?
98
106
 
99
107
  # If we are not inside a transaction but this is a transactional topic, we mark with
100
108
  # artificially created transaction
101
109
  stored = if producer.transactional?
102
110
  mark_with_transaction(message, offset_metadata, false)
111
+ elsif @_transactional_marking
112
+ raise Errors::NonTransactionalMarkingAttemptError
103
113
  else
104
114
  client.mark_as_consumed!(message, offset_metadata)
105
115
  end
@@ -143,6 +153,7 @@ module Karafka
143
153
  self.producer = active_producer
144
154
 
145
155
  transaction_started = false
156
+ transaction_completed = false
146
157
 
147
158
  # Prevent from nested transactions. It would not make any sense
148
159
  raise Errors::TransactionAlreadyInitializedError if @_in_transaction
@@ -159,6 +170,12 @@ module Karafka
159
170
  # transaction. We do it only for transactions that contain offset management as for
160
171
  # producer only, this is not relevant.
161
172
  raise Errors::AssignmentLostError if @_in_transaction_marked && revoked?
173
+
174
+ # If we do not reach this, we should not move seek offsets because it means that
175
+ # either an error occured or transaction was aborted.
176
+ # In case of error, it will bubble up so no issue but in case of abort, while we
177
+ # do not reach this place, the code will continue
178
+ transaction_completed = true
162
179
  end
163
180
 
164
181
  @_in_transaction = false
@@ -180,8 +197,13 @@ module Karafka
180
197
  # to mimic this
181
198
  # - Complex strategies like VPs can use this in VPs to mark in parallel without
182
199
  # having to redefine the transactional flow completely
183
- @_transaction_marked.each do |marking|
184
- marking.pop ? mark_as_consumed(*marking) : mark_as_consumed!(*marking)
200
+ #
201
+ # @note This should be applied only if transaction did not error and if it was not
202
+ # aborted.
203
+ if transaction_completed
204
+ @_transaction_marked.each do |marking|
205
+ marking.pop ? mark_as_consumed(*marking) : mark_as_consumed!(*marking)
206
+ end
185
207
  end
186
208
 
187
209
  true
@@ -213,6 +235,9 @@ module Karafka
213
235
  offset_metadata
214
236
  )
215
237
 
238
+ # This one is long lived and used to make sure, that users do not mix transactional
239
+ # marking with non-transactional. When this happens we should raise error
240
+ @_transactional_marking = true
216
241
  @_in_transaction_marked = true
217
242
  @_transaction_marked ||= []
218
243
  @_transaction_marked << [message, offset_metadata, async]
@@ -252,8 +277,11 @@ module Karafka
252
277
  # seek offset can be nil only in case `#seek` was invoked with offset reset request
253
278
  # In case like this we ignore marking
254
279
  return true if seek_offset.nil?
255
- # Ignore earlier offsets than the one we already committed
256
- return true if seek_offset > message.offset
280
+ # Ignore if it is the same offset as the one that is marked currently
281
+ # We ignore second marking because it changes nothing and in case of people using
282
+ # metadata storage but with automatic offset marking, this would cause metadata to be
283
+ # erased by automatic marking
284
+ return true if (seek_offset - 1) == message.offset
257
285
  return false if revoked?
258
286
 
259
287
  # If we have already marked this successfully in a transaction that was running
@@ -145,19 +145,20 @@ module Karafka
145
145
  # @param skippable_message [Array<Karafka::Messages::Message>]
146
146
  # @return [Hash] dispatch DLQ message
147
147
  def build_dlq_message(skippable_message)
148
- original_partition = skippable_message.partition.to_s
148
+ source_partition = skippable_message.partition.to_s
149
149
 
150
150
  dlq_message = {
151
- topic: topic.dead_letter_queue.topic,
152
- key: original_partition,
151
+ topic: @_dispatch_to_dlq_topic || topic.dead_letter_queue.topic,
152
+ key: skippable_message.raw_key,
153
+ partition_key: source_partition,
153
154
  payload: skippable_message.raw_payload,
154
155
  headers: skippable_message.raw_headers.merge(
155
- 'original_topic' => topic.name,
156
- 'original_partition' => original_partition,
157
- 'original_offset' => skippable_message.offset.to_s,
158
- 'original_consumer_group' => topic.consumer_group.id,
159
- 'original_key' => skippable_message.raw_key.to_s,
160
- 'original_attempts' => attempt.to_s
156
+ 'source_topic' => topic.name,
157
+ 'source_partition' => source_partition,
158
+ 'source_offset' => skippable_message.offset.to_s,
159
+ 'source_consumer_group' => topic.consumer_group.id,
160
+ 'source_attempts' => attempt.to_s,
161
+ 'source_trace_id' => errors_tracker.trace_id
161
162
  )
162
163
  }
163
164
 
@@ -205,7 +206,7 @@ module Karafka
205
206
  # In case of `:skip` and `:dispatch` will run the exact flow provided in a block
206
207
  # In case of `:retry` always `#retry_after_pause` is applied
207
208
  def apply_dlq_flow
208
- flow = topic.dead_letter_queue.strategy.call(errors_tracker, attempt)
209
+ flow, target_topic = topic.dead_letter_queue.strategy.call(errors_tracker, attempt)
209
210
 
210
211
  case flow
211
212
  when :retry
@@ -216,6 +217,8 @@ module Karafka
216
217
  @_dispatch_to_dlq = false
217
218
  when :dispatch
218
219
  @_dispatch_to_dlq = true
220
+ # Use custom topic if it was returned from the strategy
221
+ @_dispatch_to_dlq_topic = target_topic || topic.dead_letter_queue.topic
219
222
  else
220
223
  raise Karafka::UnsupportedCaseError, flow
221
224
  end
@@ -227,6 +230,8 @@ module Karafka
227
230
 
228
231
  # Always backoff after DLQ dispatch even on skip to prevent overloads on errors
229
232
  pause(seek_offset, nil, false)
233
+ ensure
234
+ @_dispatch_to_dlq_topic = nil
230
235
  end
231
236
 
232
237
  # Marks message that went to DLQ (if applicable) based on the requested method
@@ -40,7 +40,7 @@ module Karafka
40
40
  if coordinator.filtered? && !revoked?
41
41
  handle_post_filtering
42
42
  elsif !revoked? && !coordinator.manual_seek?
43
- seek(seek_offset, false)
43
+ seek(seek_offset, false, reset_offset: false)
44
44
  resume
45
45
  else
46
46
  resume
@@ -35,7 +35,7 @@ module Karafka
35
35
  if coordinator.filtered? && !revoked?
36
36
  handle_post_filtering
37
37
  elsif !revoked? && !coordinator.manual_seek?
38
- seek(last_group_message.offset + 1, false)
38
+ seek(last_group_message.offset + 1, false, reset_offset: false)
39
39
  resume
40
40
  else
41
41
  resume
@@ -31,7 +31,9 @@ module Karafka
31
31
 
32
32
  mark_as_consumed(last_group_message) unless revoked?
33
33
  # We should not overwrite user manual seel request with our seek
34
- seek(seek_offset, false) unless revoked? || coordinator.manual_seek?
34
+ unless revoked? || coordinator.manual_seek?
35
+ seek(seek_offset, false, reset_offset: false)
36
+ end
35
37
 
36
38
  resume
37
39
  else
@@ -30,7 +30,7 @@ module Karafka
30
30
  return if coordinator.manual_pause?
31
31
 
32
32
  unless revoked? || coordinator.manual_seek?
33
- seek(last_group_message.offset + 1, false)
33
+ seek(last_group_message.offset + 1, false, reset_offset: false)
34
34
  end
35
35
 
36
36
  resume
@@ -67,7 +67,7 @@ module Karafka
67
67
  if filter.mark_as_consumed?
68
68
  send(
69
69
  filter.marking_method,
70
- filter.cursor
70
+ filter.marking_cursor
71
71
  )
72
72
  end
73
73
 
@@ -43,7 +43,10 @@ module Karafka
43
43
  return if coordinator.manual_pause?
44
44
 
45
45
  mark_as_consumed(last_group_message) unless revoked?
46
- seek(seek_offset, false) unless revoked? || coordinator.manual_seek?
46
+
47
+ unless revoked? || coordinator.manual_seek?
48
+ seek(seek_offset, false, reset_offset: false)
49
+ end
47
50
 
48
51
  resume
49
52
  else
@@ -40,7 +40,7 @@ module Karafka
40
40
  elsif !revoked? && !coordinator.manual_seek?
41
41
  # If not revoked and not throttled, we move to where we were suppose to and
42
42
  # resume
43
- seek(seek_offset, false)
43
+ seek(seek_offset, false, reset_offset: false)
44
44
  resume
45
45
  else
46
46
  resume
@@ -38,7 +38,7 @@ module Karafka
38
38
  elsif !revoked? && !coordinator.manual_seek?
39
39
  # If not revoked and not throttled, we move to where we were suppose to and
40
40
  # resume
41
- seek(last_group_message.offset + 1, false)
41
+ seek(last_group_message.offset + 1, false, reset_offset: false)
42
42
  resume
43
43
  else
44
44
  resume
@@ -40,7 +40,7 @@ module Karafka
40
40
  return if coordinator.manual_pause?
41
41
 
42
42
  unless revoked? || coordinator.manual_seek?
43
- seek(last_group_message.offset + 1, false)
43
+ seek(last_group_message.offset + 1, false, reset_offset: false)
44
44
  end
45
45
 
46
46
  resume
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Processing
9
+ module VirtualPartitions
10
+ module Distributors
11
+ # Balanced distributor that groups messages by partition key
12
+ # and processes larger groups first while maintaining message order within groups
13
+ class Balanced < Base
14
+ # @param messages [Array<Karafka::Messages::Message>] messages to distribute
15
+ # @return [Hash<Integer, Array<Karafka::Messages::Message>>] hash with group ids as
16
+ # keys and message groups as values
17
+ def call(messages)
18
+ # Group messages by partition key
19
+ key_groupings = messages.group_by { |msg| config.partitioner.call(msg) }
20
+
21
+ worker_loads = Array.new(config.max_partitions, 0)
22
+ worker_assignments = Array.new(config.max_partitions) { [] }
23
+
24
+ # Sort keys by workload in descending order
25
+ sorted_keys = key_groupings.keys.sort_by { |key| -key_groupings[key].size }
26
+
27
+ # Assign each key to the worker with the least current load
28
+ sorted_keys.each do |key|
29
+ # Find worker with minimum current load
30
+ min_load_worker = worker_loads.each_with_index.min_by { |load, _| load }[1]
31
+ messages = key_groupings[key]
32
+
33
+ # Assign this key to that worker
34
+ worker_assignments[min_load_worker] += messages
35
+ worker_loads[min_load_worker] += messages.size
36
+ end
37
+
38
+ # Combine messages for each worker and sort by offset
39
+ worker_assignments
40
+ .each_with_index
41
+ .reject { |group_messages, _| group_messages.empty? }
42
+ .map! { |group_messages, index| [index, group_messages.sort_by!(&:offset)] }
43
+ .to_h
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Processing
9
+ # Processing components for virtual partitions
10
+ module VirtualPartitions
11
+ # Distributors for virtual partitions
12
+ module Distributors
13
+ # Base class for all virtual partition distributors
14
+ class Base
15
+ # @param config [Karafka::Pro::Routing::Features::VirtualPartitions::Config]
16
+ def initialize(config)
17
+ @config = config
18
+ end
19
+
20
+ private
21
+
22
+ # @return [Karafka::Pro::Routing::Features::VirtualPartitions::Config]
23
+ attr_reader :config
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Processing
9
+ module VirtualPartitions
10
+ module Distributors
11
+ # Consistent distributor that ensures messages with the same partition key
12
+ # are always processed in the same virtual partition
13
+ class Consistent < Base
14
+ # @param messages [Array<Karafka::Messages::Message>] messages to distribute
15
+ # @return [Hash<Integer, Array<Karafka::Messages::Message>>] hash with group ids as
16
+ # keys and message groups as values
17
+ def call(messages)
18
+ messages
19
+ .group_by { |msg| config.reducer.call(config.partitioner.call(msg)) }
20
+ .to_h
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -29,12 +29,16 @@ module Karafka
29
29
  end
30
30
 
31
31
  nested(:topics) do
32
- required(:schedules) do |val|
33
- val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
32
+ nested(:schedules) do
33
+ required(:name) do |val|
34
+ val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
35
+ end
34
36
  end
35
37
 
36
- required(:logs) do |val|
37
- val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
38
+ nested(:logs) do
39
+ required(:name) do |val|
40
+ val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
41
+ end
38
42
  end
39
43
  end
40
44
  end
@@ -12,7 +12,7 @@ module Karafka
12
12
  # Snapshots to Kafka current schedule state
13
13
  def schedule
14
14
  produce(
15
- topics.schedules,
15
+ topics.schedules.name,
16
16
  'state:schedule',
17
17
  serializer.schedule(::Karafka::Pro::RecurringTasks.schedule)
18
18
  )
@@ -25,7 +25,7 @@ module Karafka
25
25
  # because in the web ui we work with the full name and it is easier. Since
26
26
  def command(name, task_id)
27
27
  produce(
28
- topics.schedules,
28
+ topics.schedules.name,
29
29
  "command:#{name}:#{task_id}",
30
30
  serializer.command(name, task_id)
31
31
  )
@@ -35,7 +35,7 @@ module Karafka
35
35
  # @param event [Karafka::Core::Monitoring::Event]
36
36
  def log(event)
37
37
  produce(
38
- topics.logs,
38
+ topics.logs.name,
39
39
  event[:task].id,
40
40
  serializer.log(event)
41
41
  )
@@ -32,8 +32,13 @@ module Karafka
32
32
  )
33
33
 
34
34
  setting(:topics) do
35
- setting(:schedules, default: 'karafka_recurring_tasks_schedules')
36
- setting(:logs, default: 'karafka_recurring_tasks_logs')
35
+ setting(:schedules) do
36
+ setting(:name, default: 'karafka_recurring_tasks_schedules')
37
+ end
38
+
39
+ setting(:logs) do
40
+ setting(:name, default: 'karafka_recurring_tasks_logs')
41
+ end
37
42
  end
38
43
 
39
44
  configure
@@ -29,7 +29,10 @@ module Karafka
29
29
  @schedule.instance_exec(&block)
30
30
 
31
31
  @schedule.each do |task|
32
- Contracts::Task.new.validate!(task.to_h)
32
+ Contracts::Task.new.validate!(
33
+ task.to_h,
34
+ scope: ['recurring_tasks', task.id]
35
+ )
33
36
  end
34
37
 
35
38
  @schedule
@@ -59,7 +62,10 @@ module Karafka
59
62
 
60
63
  # @param config [Karafka::Core::Configurable::Node] root node config
61
64
  def post_setup(config)
62
- RecurringTasks::Contracts::Config.new.validate!(config.to_h)
65
+ RecurringTasks::Contracts::Config.new.validate!(
66
+ config.to_h,
67
+ scope: %w[config]
68
+ )
63
69
 
64
70
  # Published after task is successfully executed
65
71
  Karafka.monitor.notifications_bus.register_event('recurring_tasks.task.executed')
@@ -73,6 +79,19 @@ module Karafka
73
79
 
74
80
  Karafka.monitor.subscribe(Listener.new)
75
81
  end
82
+
83
+ # Basically since we may have custom producers configured that are not the same as the
84
+ # default one, we hold a reference to old pre-fork producer. This means, that when we
85
+ # initialize it again in post-fork, as long as user uses defaults we should re-inherit
86
+ # it from the default config.
87
+ #
88
+ # @param config [Karafka::Core::Configurable::Node]
89
+ # @param pre_fork_producer [WaterDrop::Producer]
90
+ def post_fork(config, pre_fork_producer)
91
+ return unless config.recurring_tasks.producer == pre_fork_producer
92
+
93
+ config.recurring_tasks.producer = config.producer
94
+ end
76
95
  end
77
96
  end
78
97
  end
@@ -12,7 +12,7 @@ module Karafka
12
12
  module Topic
13
13
  # @param strategy [#call, nil] Strategy we want to use or nil if a default strategy
14
14
  # (same as in OSS) should be applied
15
- # @param args [Hash] OSS DLQ arguments
15
+ # @param args [Hash] Pro DLQ arguments
16
16
  def dead_letter_queue(strategy: nil, **args)
17
17
  return @dead_letter_queue if @dead_letter_queue
18
18
 
@@ -14,6 +14,7 @@ module Karafka
14
14
  :min,
15
15
  :max,
16
16
  :boot,
17
+ :scale_delay,
17
18
  keyword_init: true
18
19
  ) do
19
20
  alias_method :active?, :active
@@ -28,6 +28,7 @@ module Karafka
28
28
  optional(:multiplexing_min) { |val| val.is_a?(Integer) && val >= 1 }
29
29
  optional(:multiplexing_max) { |val| val.is_a?(Integer) && val >= 1 }
30
30
  optional(:multiplexing_boot) { |val| val.is_a?(Integer) && val >= 1 }
31
+ optional(:multiplexing_scale_delay) { |val| val.is_a?(Integer) && val >= 1_000 }
31
32
  end
32
33
 
33
34
  # Makes sure min is not more than max
@@ -78,6 +79,22 @@ module Karafka
78
79
  [[%w[subscription_group_details], :multiplexing_boot_not_dynamic]]
79
80
  end
80
81
 
82
+ # Makes sure we do not run multiplexing with 1 always which does not make much sense
83
+ # because then it behaves like without multiplexing and can create problems for
84
+ # users running multiplexed subscription groups with multiple topics
85
+ virtual do |data, errors|
86
+ next unless errors.empty?
87
+ next unless min(data)
88
+ next unless max(data)
89
+
90
+ min = min(data)
91
+ max = max(data)
92
+
93
+ next unless min == 1 && max == 1
94
+
95
+ [[%w[subscription_group_details], :multiplexing_one_not_enough]]
96
+ end
97
+
81
98
  class << self
82
99
  # @param data [Hash] topic details
83
100
  # @return [Integer, false] min or false if missing
@@ -14,12 +14,15 @@ module Karafka
14
14
  # disabling dynamic multiplexing
15
15
  # @param max [Integer] max multiplexing count
16
16
  # @param boot [Integer] how many listeners should we start during boot by default
17
- def multiplexing(min: nil, max: 1, boot: nil)
17
+ # @param scale_delay [Integer] number of ms of delay before applying any scale
18
+ # operation to a consumer group
19
+ def multiplexing(min: nil, max: 1, boot: nil, scale_delay: 60_000)
18
20
  @target.current_subscription_group_details.merge!(
19
21
  multiplexing_min: min || max,
20
22
  multiplexing_max: max,
21
23
  # Picks half of max by default as long as possible. Otherwise goes with min
22
- multiplexing_boot: boot || [min || max, (max / 2)].max
24
+ multiplexing_boot: boot || [min || max, (max / 2)].max,
25
+ multiplexing_scale_delay: scale_delay
23
26
  )
24
27
  end
25
28
  end
@@ -16,9 +16,16 @@ module Karafka
16
16
  max = @details.fetch(:multiplexing_max, 1)
17
17
  min = @details.fetch(:multiplexing_min, max)
18
18
  boot = @details.fetch(:multiplexing_boot, max / 2)
19
+ scale_delay = @details.fetch(:multiplexing_scale_delay, 60_000)
19
20
  active = max > 1
20
21
 
21
- Config.new(active: active, min: min, max: max, boot: boot)
22
+ Config.new(
23
+ active: active,
24
+ min: min,
25
+ max: max,
26
+ boot: boot,
27
+ scale_delay: scale_delay
28
+ )
22
29
  end
23
30
  end
24
31
 
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Routing
9
+ module Features
10
+ class ParallelSegments < Base
11
+ # Expansions for the routing builder
12
+ module Builder
13
+ # Builds and saves given consumer group
14
+ # @param group_id [String, Symbol] name for consumer group
15
+ # @param block [Proc] proc that should be executed in the proxy context
16
+ def consumer_group(group_id, &block)
17
+ consumer_group = find { |cg| cg.name == group_id.to_s }
18
+
19
+ # Re-opening a CG should not change its parallel setup
20
+ if consumer_group
21
+ super
22
+ else
23
+ # We build a temp consumer group and a target to check if it has parallel segments
24
+ # enabled and if so, we do not add it to the routing but instead we build the
25
+ # appropriate number of parallel segment groups
26
+ temp_consumer_group = ::Karafka::Routing::ConsumerGroup.new(group_id.to_s)
27
+ temp_target = Karafka::Routing::Proxy.new(temp_consumer_group, &block).target
28
+ config = temp_target.parallel_segments
29
+
30
+ if config.active?
31
+ config.count.times do |i|
32
+ sub_name = [group_id, config.merge_key, i.to_s].join
33
+ sub_consumer_group = Karafka::Routing::ConsumerGroup.new(sub_name)
34
+ self << Karafka::Routing::Proxy.new(sub_consumer_group, &block).target
35
+ end
36
+ # If not parallel segments are not active we go with the default flow
37
+ else
38
+ super
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end