karafka 2.4.18 → 2.5.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/workflows/ci.yml +58 -14
  4. data/.github/workflows/push.yml +36 -0
  5. data/.github/workflows/verify-action-pins.yml +16 -0
  6. data/.ruby-version +1 -1
  7. data/CHANGELOG.md +60 -0
  8. data/Gemfile +2 -2
  9. data/Gemfile.lock +69 -50
  10. data/LICENSE-COMM +2 -2
  11. data/README.md +1 -1
  12. data/Rakefile +4 -0
  13. data/bin/clean_kafka +43 -0
  14. data/bin/integrations +19 -6
  15. data/bin/rspecs +15 -3
  16. data/bin/verify_kafka_warnings +35 -0
  17. data/bin/verify_topics_naming +27 -0
  18. data/config/locales/errors.yml +3 -0
  19. data/config/locales/pro_errors.yml +13 -2
  20. data/docker-compose.yml +1 -1
  21. data/examples/payloads/json/enrollment_event.json +579 -0
  22. data/examples/payloads/json/ingestion_event.json +30 -0
  23. data/examples/payloads/json/transaction_event.json +17 -0
  24. data/examples/payloads/json/user_event.json +11 -0
  25. data/karafka.gemspec +3 -8
  26. data/lib/karafka/active_job/current_attributes.rb +1 -1
  27. data/lib/karafka/admin/acl.rb +5 -1
  28. data/lib/karafka/admin/configs.rb +5 -1
  29. data/lib/karafka/admin.rb +69 -34
  30. data/lib/karafka/base_consumer.rb +17 -8
  31. data/lib/karafka/cli/base.rb +8 -2
  32. data/lib/karafka/cli/topics/align.rb +7 -4
  33. data/lib/karafka/cli/topics/base.rb +17 -0
  34. data/lib/karafka/cli/topics/create.rb +9 -7
  35. data/lib/karafka/cli/topics/delete.rb +4 -2
  36. data/lib/karafka/cli/topics/help.rb +39 -0
  37. data/lib/karafka/cli/topics/repartition.rb +4 -2
  38. data/lib/karafka/cli/topics.rb +10 -3
  39. data/lib/karafka/cli.rb +2 -0
  40. data/lib/karafka/connection/client.rb +30 -9
  41. data/lib/karafka/connection/listener.rb +24 -12
  42. data/lib/karafka/connection/messages_buffer.rb +1 -1
  43. data/lib/karafka/connection/proxy.rb +3 -0
  44. data/lib/karafka/constraints.rb +3 -3
  45. data/lib/karafka/contracts/config.rb +3 -0
  46. data/lib/karafka/contracts/topic.rb +1 -1
  47. data/lib/karafka/errors.rb +46 -2
  48. data/lib/karafka/helpers/async.rb +3 -1
  49. data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
  50. data/lib/karafka/instrumentation/logger_listener.rb +86 -23
  51. data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
  52. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
  53. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  54. data/lib/karafka/pro/cleaner.rb +8 -0
  55. data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
  56. data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
  57. data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
  58. data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
  59. data/lib/karafka/pro/connection/manager.rb +5 -8
  60. data/lib/karafka/pro/encryption.rb +8 -0
  61. data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
  62. data/lib/karafka/pro/iterator/expander.rb +5 -3
  63. data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
  64. data/lib/karafka/pro/loader.rb +10 -0
  65. data/lib/karafka/pro/processing/coordinator.rb +4 -1
  66. data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +27 -3
  67. data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
  68. data/lib/karafka/pro/processing/filters/base.rb +10 -2
  69. data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
  70. data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
  71. data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
  72. data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
  73. data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
  74. data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
  75. data/lib/karafka/pro/processing/partitioner.rb +1 -13
  76. data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
  77. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
  78. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
  79. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
  80. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
  81. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
  82. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  83. data/lib/karafka/pro/processing/strategies/default.rb +36 -8
  84. data/lib/karafka/pro/processing/strategies/dlq/default.rb +14 -10
  85. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
  86. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
  87. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
  88. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
  89. data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
  90. data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
  91. data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
  92. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
  93. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  94. data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
  95. data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
  96. data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
  97. data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
  98. data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
  99. data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
  100. data/lib/karafka/pro/recurring_tasks.rb +13 -0
  101. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
  102. data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
  103. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
  104. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
  105. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
  106. data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
  107. data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
  108. data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
  109. data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
  110. data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
  111. data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
  112. data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
  113. data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
  114. data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
  115. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
  116. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  117. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
  118. data/lib/karafka/pro/scheduled_messages/consumer.rb +19 -21
  119. data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
  120. data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
  121. data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
  122. data/lib/karafka/pro/scheduled_messages.rb +13 -0
  123. data/lib/karafka/processing/coordinators_buffer.rb +1 -0
  124. data/lib/karafka/processing/strategies/default.rb +4 -4
  125. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  126. data/lib/karafka/routing/subscription_group.rb +1 -1
  127. data/lib/karafka/runner.rb +7 -1
  128. data/lib/karafka/server.rb +19 -19
  129. data/lib/karafka/setup/attributes_map.rb +2 -0
  130. data/lib/karafka/setup/config.rb +22 -1
  131. data/lib/karafka/setup/defaults_injector.rb +26 -1
  132. data/lib/karafka/status.rb +6 -1
  133. data/lib/karafka/swarm/node.rb +31 -0
  134. data/lib/karafka/swarm/supervisor.rb +4 -0
  135. data/lib/karafka/templates/karafka.rb.erb +14 -1
  136. data/lib/karafka/version.rb +1 -1
  137. data/lib/karafka.rb +17 -9
  138. data/renovate.json +14 -2
  139. metadata +40 -40
  140. checksums.yaml.gz.sig +0 -0
  141. data/certs/cert.pem +0 -26
  142. data.tar.gz.sig +0 -0
  143. metadata.gz.sig +0 -0
@@ -38,7 +38,7 @@ module Karafka
38
38
  elsif !revoked? && !coordinator.manual_seek?
39
39
  # If not revoked and not throttled, we move to where we were suppose to and
40
40
  # resume
41
- seek(last_group_message.offset + 1, false)
41
+ seek(last_group_message.offset + 1, false, reset_offset: false)
42
42
  resume
43
43
  else
44
44
  resume
@@ -40,7 +40,7 @@ module Karafka
40
40
  return if coordinator.manual_pause?
41
41
 
42
42
  unless revoked? || coordinator.manual_seek?
43
- seek(last_group_message.offset + 1, false)
43
+ seek(last_group_message.offset + 1, false, reset_offset: false)
44
44
  end
45
45
 
46
46
  resume
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Processing
9
+ module VirtualPartitions
10
+ module Distributors
11
+ # Balanced distributor that groups messages by partition key
12
+ # and processes larger groups first while maintaining message order within groups
13
+ class Balanced < Base
14
+ # @param messages [Array<Karafka::Messages::Message>] messages to distribute
15
+ # @return [Hash<Integer, Array<Karafka::Messages::Message>>] hash with group ids as
16
+ # keys and message groups as values
17
+ def call(messages)
18
+ # Group messages by partition key
19
+ key_groupings = messages.group_by { |msg| config.partitioner.call(msg) }
20
+
21
+ worker_loads = Array.new(config.max_partitions, 0)
22
+ worker_assignments = Array.new(config.max_partitions) { [] }
23
+
24
+ # Sort keys by workload in descending order
25
+ sorted_keys = key_groupings.keys.sort_by { |key| -key_groupings[key].size }
26
+
27
+ # Assign each key to the worker with the least current load
28
+ sorted_keys.each do |key|
29
+ # Find worker with minimum current load
30
+ min_load_worker = worker_loads.each_with_index.min_by { |load, _| load }[1]
31
+ messages = key_groupings[key]
32
+
33
+ # Assign this key to that worker
34
+ worker_assignments[min_load_worker] += messages
35
+ worker_loads[min_load_worker] += messages.size
36
+ end
37
+
38
+ # Combine messages for each worker and sort by offset
39
+ worker_assignments
40
+ .each_with_index
41
+ .reject { |group_messages, _| group_messages.empty? }
42
+ .map! { |group_messages, index| [index, group_messages.sort_by!(&:offset)] }
43
+ .to_h
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Processing
9
+ # Processing components for virtual partitions
10
+ module VirtualPartitions
11
+ # Distributors for virtual partitions
12
+ module Distributors
13
+ # Base class for all virtual partition distributors
14
+ class Base
15
+ # @param config [Karafka::Pro::Routing::Features::VirtualPartitions::Config]
16
+ def initialize(config)
17
+ @config = config
18
+ end
19
+
20
+ private
21
+
22
+ # @return [Karafka::Pro::Routing::Features::VirtualPartitions::Config]
23
+ attr_reader :config
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Processing
9
+ module VirtualPartitions
10
+ module Distributors
11
+ # Consistent distributor that ensures messages with the same partition key
12
+ # are always processed in the same virtual partition
13
+ class Consistent < Base
14
+ # @param messages [Array<Karafka::Messages::Message>] messages to distribute
15
+ # @return [Hash<Integer, Array<Karafka::Messages::Message>>] hash with group ids as
16
+ # keys and message groups as values
17
+ def call(messages)
18
+ messages
19
+ .group_by { |msg| config.reducer.call(config.partitioner.call(msg)) }
20
+ .to_h
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -29,12 +29,16 @@ module Karafka
29
29
  end
30
30
 
31
31
  nested(:topics) do
32
- required(:schedules) do |val|
33
- val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
32
+ nested(:schedules) do
33
+ required(:name) do |val|
34
+ val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
35
+ end
34
36
  end
35
37
 
36
- required(:logs) do |val|
37
- val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
38
+ nested(:logs) do
39
+ required(:name) do |val|
40
+ val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
41
+ end
38
42
  end
39
43
  end
40
44
  end
@@ -12,7 +12,7 @@ module Karafka
12
12
  # Snapshots to Kafka current schedule state
13
13
  def schedule
14
14
  produce(
15
- topics.schedules,
15
+ topics.schedules.name,
16
16
  'state:schedule',
17
17
  serializer.schedule(::Karafka::Pro::RecurringTasks.schedule)
18
18
  )
@@ -25,7 +25,7 @@ module Karafka
25
25
  # because in the web ui we work with the full name and it is easier. Since
26
26
  def command(name, task_id)
27
27
  produce(
28
- topics.schedules,
28
+ topics.schedules.name,
29
29
  "command:#{name}:#{task_id}",
30
30
  serializer.command(name, task_id)
31
31
  )
@@ -35,7 +35,7 @@ module Karafka
35
35
  # @param event [Karafka::Core::Monitoring::Event]
36
36
  def log(event)
37
37
  produce(
38
- topics.logs,
38
+ topics.logs.name,
39
39
  event[:task].id,
40
40
  serializer.log(event)
41
41
  )
@@ -32,8 +32,13 @@ module Karafka
32
32
  )
33
33
 
34
34
  setting(:topics) do
35
- setting(:schedules, default: 'karafka_recurring_tasks_schedules')
36
- setting(:logs, default: 'karafka_recurring_tasks_logs')
35
+ setting(:schedules) do
36
+ setting(:name, default: 'karafka_recurring_tasks_schedules')
37
+ end
38
+
39
+ setting(:logs) do
40
+ setting(:name, default: 'karafka_recurring_tasks_logs')
41
+ end
37
42
  end
38
43
 
39
44
  configure
@@ -73,6 +73,19 @@ module Karafka
73
73
 
74
74
  Karafka.monitor.subscribe(Listener.new)
75
75
  end
76
+
77
+ # Basically since we may have custom producers configured that are not the same as the
78
+ # default one, we hold a reference to old pre-fork producer. This means, that when we
79
+ # initialize it again in post-fork, as long as user uses defaults we should re-inherit
80
+ # it from the default config.
81
+ #
82
+ # @param config [Karafka::Core::Configurable::Node]
83
+ # @param pre_fork_producer [WaterDrop::Producer]
84
+ def post_fork(config, pre_fork_producer)
85
+ return unless config.recurring_tasks.producer == pre_fork_producer
86
+
87
+ config.recurring_tasks.producer = config.producer
88
+ end
76
89
  end
77
90
  end
78
91
  end
@@ -12,7 +12,7 @@ module Karafka
12
12
  module Topic
13
13
  # @param strategy [#call, nil] Strategy we want to use or nil if a default strategy
14
14
  # (same as in OSS) should be applied
15
- # @param args [Hash] OSS DLQ arguments
15
+ # @param args [Hash] Pro DLQ arguments
16
16
  def dead_letter_queue(strategy: nil, **args)
17
17
  return @dead_letter_queue if @dead_letter_queue
18
18
 
@@ -14,6 +14,7 @@ module Karafka
14
14
  :min,
15
15
  :max,
16
16
  :boot,
17
+ :scale_delay,
17
18
  keyword_init: true
18
19
  ) do
19
20
  alias_method :active?, :active
@@ -28,6 +28,7 @@ module Karafka
28
28
  optional(:multiplexing_min) { |val| val.is_a?(Integer) && val >= 1 }
29
29
  optional(:multiplexing_max) { |val| val.is_a?(Integer) && val >= 1 }
30
30
  optional(:multiplexing_boot) { |val| val.is_a?(Integer) && val >= 1 }
31
+ optional(:multiplexing_scale_delay) { |val| val.is_a?(Integer) && val >= 1_000 }
31
32
  end
32
33
 
33
34
  # Makes sure min is not more than max
@@ -78,6 +79,22 @@ module Karafka
78
79
  [[%w[subscription_group_details], :multiplexing_boot_not_dynamic]]
79
80
  end
80
81
 
82
+ # Makes sure we do not run multiplexing with 1 always which does not make much sense
83
+ # because then it behaves like without multiplexing and can create problems for
84
+ # users running multiplexed subscription groups with multiple topics
85
+ virtual do |data, errors|
86
+ next unless errors.empty?
87
+ next unless min(data)
88
+ next unless max(data)
89
+
90
+ min = min(data)
91
+ max = max(data)
92
+
93
+ next unless min == 1 && max == 1
94
+
95
+ [[%w[subscription_group_details], :multiplexing_one_not_enough]]
96
+ end
97
+
81
98
  class << self
82
99
  # @param data [Hash] topic details
83
100
  # @return [Integer, false] min or false if missing
@@ -14,12 +14,15 @@ module Karafka
14
14
  # disabling dynamic multiplexing
15
15
  # @param max [Integer] max multiplexing count
16
16
  # @param boot [Integer] how many listeners should we start during boot by default
17
- def multiplexing(min: nil, max: 1, boot: nil)
17
+ # @param scale_delay [Integer] number of ms of delay before applying any scale
18
+ # operation to a consumer group
19
+ def multiplexing(min: nil, max: 1, boot: nil, scale_delay: 60_000)
18
20
  @target.current_subscription_group_details.merge!(
19
21
  multiplexing_min: min || max,
20
22
  multiplexing_max: max,
21
23
  # Picks half of max by default as long as possible. Otherwise goes with min
22
- multiplexing_boot: boot || [min || max, (max / 2)].max
24
+ multiplexing_boot: boot || [min || max, (max / 2)].max,
25
+ multiplexing_scale_delay: scale_delay
23
26
  )
24
27
  end
25
28
  end
@@ -16,9 +16,16 @@ module Karafka
16
16
  max = @details.fetch(:multiplexing_max, 1)
17
17
  min = @details.fetch(:multiplexing_min, max)
18
18
  boot = @details.fetch(:multiplexing_boot, max / 2)
19
+ scale_delay = @details.fetch(:multiplexing_scale_delay, 60_000)
19
20
  active = max > 1
20
21
 
21
- Config.new(active: active, min: min, max: max, boot: boot)
22
+ Config.new(
23
+ active: active,
24
+ min: min,
25
+ max: max,
26
+ boot: boot,
27
+ scale_delay: scale_delay
28
+ )
22
29
  end
23
30
  end
24
31
 
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Routing
9
+ module Features
10
+ class ParallelSegments < Base
11
+ # Expansions for the routing builder
12
+ module Builder
13
+ # Builds and saves given consumer group
14
+ # @param group_id [String, Symbol] name for consumer group
15
+ # @param block [Proc] proc that should be executed in the proxy context
16
+ def consumer_group(group_id, &block)
17
+ consumer_group = find { |cg| cg.name == group_id.to_s }
18
+
19
+ # Re-opening a CG should not change its parallel setup
20
+ if consumer_group
21
+ super
22
+ else
23
+ # We build a temp consumer group and a target to check if it has parallel segments
24
+ # enabled and if so, we do not add it to the routing but instead we build the
25
+ # appropriate number of parallel segment groups
26
+ temp_consumer_group = ::Karafka::Routing::ConsumerGroup.new(group_id.to_s)
27
+ temp_target = Karafka::Routing::Proxy.new(temp_consumer_group, &block).target
28
+ config = temp_target.parallel_segments
29
+
30
+ if config.active?
31
+ config.count.times do |i|
32
+ sub_name = [group_id, config.merge_key, i.to_s].join
33
+ sub_consumer_group = Karafka::Routing::ConsumerGroup.new(sub_name)
34
+ self << Karafka::Routing::Proxy.new(sub_consumer_group, &block).target
35
+ end
36
+ # If not parallel segments are not active we go with the default flow
37
+ else
38
+ super
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Routing
9
+ module Features
10
+ class ParallelSegments < Base
11
+ # Config for parallel segments.
12
+ # @note Used on the consumer level, not per topic
13
+ Config = Struct.new(
14
+ :active,
15
+ :count,
16
+ :partitioner,
17
+ :reducer,
18
+ :merge_key,
19
+ keyword_init: true
20
+ ) do
21
+ alias_method :active?, :active
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Routing
9
+ module Features
10
+ class ParallelSegments < Base
11
+ # Parallel segments are defined on the consumer group (since it creates many), thus we
12
+ # define them on the consumer group.
13
+ # This module adds extra methods needed there to make it work
14
+ module ConsumerGroup
15
+ # @return [Config] parallel segments config
16
+ def parallel_segments
17
+ # We initialize it as disabled if not configured by the user
18
+ public_send(:parallel_segments=, count: 1)
19
+ end
20
+
21
+ # Allows setting parallel segments configuration
22
+ #
23
+ # @param count [Integer] number of parallel segments (number of parallel consumer
24
+ # groups that will be created)
25
+ # @param partitioner [nil, #call] nil or callable partitioner
26
+ # @param reducer [nil, #call] reducer for parallel key. It allows for using a custom
27
+ # reducer to achieve enhanced parallelization when the default reducer is not enough.
28
+ # @param merge_key [String] key used to build the parallel segment consumer groups
29
+ #
30
+ # @note This method is an assignor but the API is actually via the `#parallel_segments`
31
+ # method. Our `Routing::Proxy` normalizes that the way we want to have it exposed
32
+ # for the end users.
33
+ def parallel_segments=(
34
+ count: 1,
35
+ partitioner: nil,
36
+ reducer: nil,
37
+ merge_key: '-parallel-'
38
+ )
39
+ @parallel_segments ||= Config.new(
40
+ active: count > 1,
41
+ count: count,
42
+ partitioner: partitioner,
43
+ reducer: reducer || ->(parallel_key) { parallel_key.to_s.sum % count },
44
+ merge_key: merge_key
45
+ )
46
+ end
47
+
48
+ # @return [Boolean] are parallel segments active
49
+ def parallel_segments?
50
+ parallel_segments.active?
51
+ end
52
+
53
+ # @return [Integer] id of the segment (0 or bigger) or -1 if parallel segments are not
54
+ # active
55
+ def segment_id
56
+ return @segment_id if @segment_id
57
+
58
+ @segment_id = if parallel_segments?
59
+ name.split(parallel_segments.merge_key).last.to_i
60
+ else
61
+ -1
62
+ end
63
+ end
64
+
65
+ # @return [String] original segment consumer group name
66
+ def segment_origin
67
+ name.split(parallel_segments.merge_key).first
68
+ end
69
+
70
+ # @return [Hash] consumer group setup with the parallel segments definition in it
71
+ def to_h
72
+ super.merge(
73
+ parallel_segments: parallel_segments.to_h.merge(
74
+ segment_id: segment_id
75
+ )
76
+ ).freeze
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Routing
9
+ module Features
10
+ class ParallelSegments < Base
11
+ # Namespace for parallel segments contracts
12
+ module Contracts
13
+ # Contract to validate configuration of the parallel segments feature
14
+ class ConsumerGroup < Karafka::Contracts::Base
15
+ configure do |config|
16
+ config.error_messages = YAML.safe_load(
17
+ File.read(
18
+ File.join(Karafka.gem_root, 'config', 'locales', 'pro_errors.yml')
19
+ )
20
+ ).fetch('en').fetch('validations').fetch('consumer_group')
21
+
22
+ nested(:parallel_segments) do
23
+ required(:active) { |val| [true, false].include?(val) }
24
+ required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
25
+ required(:reducer) { |val| val.respond_to?(:call) }
26
+ required(:count) { |val| val.is_a?(Integer) && val >= 1 }
27
+ required(:merge_key) { |val| val.is_a?(String) && val.size >= 1 }
28
+ end
29
+
30
+ # When parallel segments are defined, partitioner needs to respond to `#call` and
31
+ # it cannot be nil
32
+ virtual do |data, errors|
33
+ next unless errors.empty?
34
+
35
+ parallel_segments = data[:parallel_segments]
36
+
37
+ next unless parallel_segments[:active]
38
+ next if parallel_segments[:partitioner].respond_to?(:call)
39
+
40
+ [[%i[parallel_segments partitioner], :respond_to_call]]
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Routing
9
+ module Features
10
+ class ParallelSegments < Base
11
+ # Parallel segments related expansions to the topic building flow
12
+ module Topic
13
+ # Injects the parallel segments filter as the first filter during building of each of
14
+ # the topics in case parallel segments are enabled.
15
+ #
16
+ # @param args [Object] anything accepted by the topic initializer
17
+ def initialize(*args)
18
+ super
19
+
20
+ return unless consumer_group.parallel_segments?
21
+
22
+ builder = lambda do |topic, _partition|
23
+ mom = topic.manual_offset_management?
24
+
25
+ # We have two filters for mom and non-mom scenario not to mix this logic
26
+ filter_scope = Karafka::Pro::Processing::ParallelSegments::Filters
27
+ filter_class = mom ? filter_scope::Mom : filter_scope::Default
28
+
29
+ filter_class.new(
30
+ segment_id: consumer_group.segment_id,
31
+ partitioner: consumer_group.parallel_segments.partitioner,
32
+ reducer: consumer_group.parallel_segments.reducer
33
+ )
34
+ end
35
+
36
+ filter(builder)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Routing
9
+ module Features
10
+ # Feature that allows parallelizing message processing within a single consumer group by
11
+ # creating multiple consumer group instances. It enables processing messages from each
12
+ # partition in parallel by distributing them to separate consumer group instances based on
13
+ # a partitioning key. Useful for both CPU and IO bound operations.
14
+ #
15
+ # Each parallel segment operates as an independent consumer group instance, processing
16
+ # messages that are assigned to it based on the configured partitioner and reducer.
17
+ # This allows for better resource utilization and increased processing throughput without
18
+ # requiring changes to the topic's partition count.
19
+ class ParallelSegments < Base
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -45,7 +45,7 @@ module Karafka
45
45
  # topic but this minimizes simple mistakes
46
46
  #
47
47
  # This sub-part of sh1 should be unique enough and short-enough to use it here
48
- digest = Digest::SHA1.hexdigest(safe_regexp.source)[8..16]
48
+ digest = Digest::SHA256.hexdigest(safe_regexp.source)[8..16]
49
49
  @name = name ? name.to_s : "karafka-pattern-#{digest}"
50
50
  @config = config
51
51
  end
@@ -29,7 +29,7 @@ module Karafka
29
29
  consumer_group tasks_cfg.group_id do
30
30
  # Registers the primary topic that we use to control schedules execution. This is
31
31
  # the one that we use to trigger recurring tasks.
32
- schedules_topic = topic(topics_cfg.schedules) do
32
+ schedules_topic = topic(topics_cfg.schedules.name) do
33
33
  consumer tasks_cfg.consumer_class
34
34
  deserializer tasks_cfg.deserializer
35
35
  # Because the topic method name as well as builder proxy method name is the same
@@ -83,7 +83,7 @@ module Karafka
83
83
 
84
84
  # This topic is to store logs that we can then inspect either from the admin or via
85
85
  # the Web UI
86
- logs_topic = topic(topics_cfg.logs) do
86
+ logs_topic = topic(topics_cfg.logs.name) do
87
87
  active(false)
88
88
  deserializer tasks_cfg.deserializer
89
89
  target.recurring_tasks(true)
@@ -12,14 +12,14 @@ module Karafka
12
12
  module Builder
13
13
  # Enabled scheduled messages operations and adds needed topics and other stuff.
14
14
  #
15
- # @param group_name [String, false] name for scheduled messages topic that is also used
15
+ # @param topic_name [String, false] name for scheduled messages topic that is also used
16
16
  # as a group identifier. Users can have multiple schedule topics flows to prevent key
17
17
  # collisions, prioritize and do other stuff. `false` if not active.
18
18
  # @param block [Proc] optional reconfiguration of the topics definitions.
19
19
  # @note Namespace for topics should include the divider as it is not automatically
20
20
  # added.
21
- def scheduled_messages(group_name = false, &block)
22
- return unless group_name
21
+ def scheduled_messages(topic_name = false, &block)
22
+ return unless topic_name
23
23
 
24
24
  # Load zlib only if user enables scheduled messages
25
25
  require 'zlib'
@@ -32,7 +32,7 @@ module Karafka
32
32
  consumer_group msg_cfg.group_id do
33
33
  # Registers the primary topic that we use to control schedules execution. This is
34
34
  # the one that we use to trigger scheduled messages.
35
- messages_topic = topic(group_name) do
35
+ messages_topic = topic(topic_name) do
36
36
  instance_eval(&block) if block && block.arity.zero?
37
37
 
38
38
  consumer msg_cfg.consumer_class
@@ -54,7 +54,11 @@ module Karafka
54
54
  consumer_persistence(true)
55
55
 
56
56
  # This needs to be enabled for the eof to work correctly
57
- kafka('enable.partition.eof': true, inherit: true)
57
+ kafka(
58
+ 'enable.partition.eof': true,
59
+ 'auto.offset.reset': 'earliest',
60
+ inherit: true
61
+ )
58
62
  eofed(true)
59
63
 
60
64
  # Since this is a topic that gets replayed because of schedule management, we do
@@ -96,7 +100,7 @@ module Karafka
96
100
  # Holds states of scheduler per each of the partitions since they tick
97
101
  # independently. We only hold future statistics not to have to deal with
98
102
  # any type of state restoration
99
- states_topic = topic("#{group_name}#{msg_cfg.states_postfix}") do
103
+ states_topic = topic("#{topic_name}#{msg_cfg.states_postfix}") do
100
104
  active(false)
101
105
  target.scheduled_messages(true)
102
106
  config(