karafka 2.4.18 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. checksums.yaml +4 -4
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/workflows/ci.yml +59 -15
  4. data/.github/workflows/push.yml +35 -0
  5. data/.github/workflows/verify-action-pins.yml +16 -0
  6. data/.ruby-version +1 -1
  7. data/CHANGELOG.md +75 -0
  8. data/Gemfile +2 -2
  9. data/Gemfile.lock +72 -53
  10. data/LICENSE-COMM +2 -2
  11. data/README.md +1 -1
  12. data/Rakefile +4 -0
  13. data/bin/clean_kafka +43 -0
  14. data/bin/integrations +20 -6
  15. data/bin/rspecs +15 -3
  16. data/bin/verify_kafka_warnings +35 -0
  17. data/bin/verify_topics_naming +27 -0
  18. data/config/locales/errors.yml +5 -1
  19. data/config/locales/pro_errors.yml +13 -2
  20. data/docker-compose.yml +1 -1
  21. data/examples/payloads/avro/.gitkeep +0 -0
  22. data/examples/payloads/json/sample_set_01/enrollment_event.json +579 -0
  23. data/examples/payloads/json/sample_set_01/ingestion_event.json +30 -0
  24. data/examples/payloads/json/sample_set_01/transaction_event.json +17 -0
  25. data/examples/payloads/json/sample_set_01/user_event.json +11 -0
  26. data/karafka.gemspec +3 -8
  27. data/lib/karafka/active_job/current_attributes.rb +1 -1
  28. data/lib/karafka/active_job/job_extensions.rb +4 -1
  29. data/lib/karafka/admin/acl.rb +5 -1
  30. data/lib/karafka/admin/configs.rb +5 -1
  31. data/lib/karafka/admin.rb +89 -42
  32. data/lib/karafka/base_consumer.rb +17 -8
  33. data/lib/karafka/cli/base.rb +8 -2
  34. data/lib/karafka/cli/topics/align.rb +7 -4
  35. data/lib/karafka/cli/topics/base.rb +17 -0
  36. data/lib/karafka/cli/topics/create.rb +9 -7
  37. data/lib/karafka/cli/topics/delete.rb +4 -2
  38. data/lib/karafka/cli/topics/help.rb +39 -0
  39. data/lib/karafka/cli/topics/repartition.rb +4 -2
  40. data/lib/karafka/cli/topics.rb +10 -3
  41. data/lib/karafka/cli.rb +2 -0
  42. data/lib/karafka/connection/client.rb +39 -9
  43. data/lib/karafka/connection/listener.rb +24 -12
  44. data/lib/karafka/connection/messages_buffer.rb +1 -1
  45. data/lib/karafka/connection/proxy.rb +4 -1
  46. data/lib/karafka/constraints.rb +3 -3
  47. data/lib/karafka/contracts/base.rb +3 -2
  48. data/lib/karafka/contracts/config.rb +5 -1
  49. data/lib/karafka/contracts/topic.rb +1 -1
  50. data/lib/karafka/errors.rb +46 -2
  51. data/lib/karafka/helpers/async.rb +3 -1
  52. data/lib/karafka/helpers/interval_runner.rb +8 -0
  53. data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
  54. data/lib/karafka/instrumentation/logger_listener.rb +95 -32
  55. data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
  56. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
  57. data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +17 -2
  58. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +29 -6
  59. data/lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb +9 -0
  60. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  61. data/lib/karafka/pro/cleaner.rb +8 -0
  62. data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
  63. data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
  64. data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
  65. data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
  66. data/lib/karafka/pro/connection/manager.rb +5 -8
  67. data/lib/karafka/pro/encryption.rb +12 -1
  68. data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
  69. data/lib/karafka/pro/iterator/expander.rb +5 -3
  70. data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
  71. data/lib/karafka/pro/loader.rb +10 -0
  72. data/lib/karafka/pro/processing/coordinator.rb +4 -1
  73. data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +32 -3
  74. data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
  75. data/lib/karafka/pro/processing/filters/base.rb +10 -2
  76. data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
  77. data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
  78. data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
  79. data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
  80. data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
  81. data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
  82. data/lib/karafka/pro/processing/partitioner.rb +1 -13
  83. data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
  84. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
  85. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
  86. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
  87. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
  88. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
  89. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  90. data/lib/karafka/pro/processing/strategies/default.rb +36 -8
  91. data/lib/karafka/pro/processing/strategies/dlq/default.rb +15 -10
  92. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
  93. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
  94. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
  95. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
  96. data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
  97. data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
  98. data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
  99. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
  100. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  101. data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
  102. data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
  103. data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
  104. data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
  105. data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
  106. data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
  107. data/lib/karafka/pro/recurring_tasks.rb +21 -2
  108. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
  109. data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
  110. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
  111. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
  112. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
  113. data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
  114. data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
  115. data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
  116. data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
  117. data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
  118. data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
  119. data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
  120. data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
  121. data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
  122. data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +3 -2
  123. data/lib/karafka/pro/routing/features/swarm.rb +4 -1
  124. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
  125. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  126. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
  127. data/lib/karafka/pro/scheduled_messages/consumer.rb +61 -26
  128. data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
  129. data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
  130. data/lib/karafka/pro/scheduled_messages/dispatcher.rb +2 -1
  131. data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
  132. data/lib/karafka/pro/scheduled_messages/proxy.rb +15 -3
  133. data/lib/karafka/pro/scheduled_messages/serializer.rb +2 -4
  134. data/lib/karafka/pro/scheduled_messages/state.rb +20 -23
  135. data/lib/karafka/pro/scheduled_messages/tracker.rb +34 -8
  136. data/lib/karafka/pro/scheduled_messages.rb +17 -1
  137. data/lib/karafka/processing/coordinators_buffer.rb +1 -0
  138. data/lib/karafka/processing/strategies/default.rb +4 -4
  139. data/lib/karafka/routing/builder.rb +12 -3
  140. data/lib/karafka/routing/features/base/expander.rb +8 -2
  141. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  142. data/lib/karafka/routing/subscription_group.rb +1 -1
  143. data/lib/karafka/runner.rb +7 -1
  144. data/lib/karafka/server.rb +21 -18
  145. data/lib/karafka/setup/attributes_map.rb +2 -0
  146. data/lib/karafka/setup/config.rb +40 -7
  147. data/lib/karafka/setup/defaults_injector.rb +26 -1
  148. data/lib/karafka/status.rb +6 -1
  149. data/lib/karafka/swarm/node.rb +31 -0
  150. data/lib/karafka/swarm/supervisor.rb +9 -2
  151. data/lib/karafka/templates/karafka.rb.erb +14 -1
  152. data/lib/karafka/version.rb +1 -1
  153. data/lib/karafka.rb +17 -9
  154. data/renovate.json +14 -2
  155. metadata +41 -40
  156. checksums.yaml.gz.sig +0 -0
  157. data/certs/cert.pem +0 -26
  158. data.tar.gz.sig +0 -0
  159. metadata.gz.sig +0 -0
@@ -8,11 +8,12 @@ module Karafka
8
8
  # Namespace for instrumentation related with Kubernetes
9
9
  module Kubernetes
10
10
  # Base Kubernetes Listener providing basic HTTP server capabilities to respond with health
11
+ # statuses
11
12
  class BaseListener
12
13
  include ::Karafka::Core::Helpers::Time
13
14
 
14
15
  # All good with Karafka
15
- OK_CODE = '204 No Content'
16
+ OK_CODE = '200 OK'
16
17
 
17
18
  # Some timeouts, fail
18
19
  FAIL_CODE = '500 Internal Server Error'
@@ -38,11 +39,15 @@ module Karafka
38
39
 
39
40
  # Responds to a HTTP request with the process liveness status
40
41
  def respond
42
+ body = JSON.generate(status_body)
43
+
41
44
  client = @server.accept
42
45
  client.gets
43
46
  client.print "HTTP/1.1 #{healthy? ? OK_CODE : FAIL_CODE}\r\n"
44
- client.print "Content-Type: text/plain\r\n"
47
+ client.print "Content-Type: application/json\r\n"
48
+ client.print "Content-Length: #{body.bytesize}\r\n"
45
49
  client.print "\r\n"
50
+ client.print body
46
51
  client.close
47
52
 
48
53
  true
@@ -50,6 +55,16 @@ module Karafka
50
55
  !@server.closed?
51
56
  end
52
57
 
58
+ # @return [Hash] hash that will be the response body
59
+ def status_body
60
+ {
61
+ status: healthy? ? 'healthy' : 'unhealthy',
62
+ timestamp: Time.now.to_i,
63
+ port: @port,
64
+ process_id: ::Process.pid
65
+ }
66
+ end
67
+
53
68
  # Starts background thread with micro-http monitoring
54
69
  def start
55
70
  @server = TCPServer.new(*[@hostname, @port].compact)
@@ -53,7 +53,7 @@ module Karafka
53
53
  consuming_ttl: 5 * 60 * 1_000,
54
54
  polling_ttl: 5 * 60 * 1_000
55
55
  )
56
- # If this is set to true, it indicates unrecoverable error like fencing
56
+ # If this is set to a symbol, it indicates unrecoverable error like fencing
57
57
  # While fencing can be partial (for one of the SGs), we still should consider this
58
58
  # as an undesired state for the whole process because it halts processing in a
59
59
  # non-recoverable manner forever
@@ -116,7 +116,7 @@ module Karafka
116
116
  # We mark as unrecoverable only on certain errors that will not be fixed by retrying
117
117
  return unless UNRECOVERABLE_RDKAFKA_ERRORS.include?(error.code)
118
118
 
119
- @unrecoverable = true
119
+ @unrecoverable = error.code
120
120
  end
121
121
 
122
122
  # Deregister the polling tracker for given listener
@@ -142,17 +142,29 @@ module Karafka
142
142
  # Did we exceed any of the ttls
143
143
  # @return [String] 204 string if ok, 500 otherwise
144
144
  def healthy?
145
- time = monotonic_now
146
-
147
145
  return false if @unrecoverable
148
- return false if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
149
- return false if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
146
+ return false if polling_ttl_exceeded?
147
+ return false if consuming_ttl_exceeded?
150
148
 
151
149
  true
152
150
  end
153
151
 
154
152
  private
155
153
 
154
+ # @return [Boolean] true if the consumer exceeded the polling ttl
155
+ def polling_ttl_exceeded?
156
+ time = monotonic_now
157
+
158
+ @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
159
+ end
160
+
161
+ # @return [Boolean] true if the consumer exceeded the consuming ttl
162
+ def consuming_ttl_exceeded?
163
+ time = monotonic_now
164
+
165
+ @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
166
+ end
167
+
156
168
  # Wraps the logic with a mutex
157
169
  # @param block [Proc] code we want to run in mutex
158
170
  def synchronize(&block)
@@ -191,6 +203,17 @@ module Karafka
191
203
  @consumptions.delete(thread_id)
192
204
  end
193
205
  end
206
+
207
+ # @return [Hash] response body status
208
+ def status_body
209
+ super.merge!(
210
+ errors: {
211
+ polling_ttl_exceeded: polling_ttl_exceeded?,
212
+ consumption_ttl_exceeded: consuming_ttl_exceeded?,
213
+ unrecoverable: @unrecoverable
214
+ }
215
+ )
216
+ end
194
217
  end
195
218
  end
196
219
  end
@@ -47,6 +47,15 @@ module Karafka
47
47
  def healthy?
48
48
  (monotonic_now - @controlling) < @controlling_ttl
49
49
  end
50
+
51
+ # @return [Hash] response body status
52
+ def status_body
53
+ super.merge!(
54
+ errors: {
55
+ controlling_ttl_exceeded: !healthy?
56
+ }
57
+ )
58
+ end
50
59
  end
51
60
  end
52
61
  end
@@ -18,7 +18,7 @@ module Karafka
18
18
  # picked up for processing.
19
19
  def call(messages, topic, partition, scheduled_at)
20
20
  Karafka::Messages::BatchMetadata.new(
21
- size: messages.count,
21
+ size: messages.size,
22
22
  first_offset: messages.first&.offset || -1001,
23
23
  last_offset: messages.last&.offset || -1001,
24
24
  deserializers: topic.deserializers,
@@ -28,6 +28,14 @@ module Karafka
28
28
  def post_setup(_config)
29
29
  true
30
30
  end
31
+
32
+ # This feature does not need any changes post-fork
33
+ #
34
+ # @param _config [Karafka::Core::Configurable::Node]
35
+ # @param _pre_fork_producer [WaterDrop::Producer]
36
+ def post_fork(_config, _pre_fork_producer)
37
+ true
38
+ end
31
39
  end
32
40
  end
33
41
  end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Cli
9
+ class ParallelSegments < Karafka::Cli::Base
10
+ # Base class for all the parallel segments related operations
11
+ class Base
12
+ include Helpers::Colorize
13
+
14
+ # @param options [Hash] cli flags options
15
+ def initialize(options)
16
+ @options = options
17
+ end
18
+
19
+ private
20
+
21
+ # @return [Hash]
22
+ attr_reader :options
23
+
24
+ # Returns consumer groups for parallel segments with which we should be working
25
+ #
26
+ # @return [Hash<String, Array<Karafka::Routing::ConsumerGroup>>] hash with all parallel
27
+ # consumer groups as values and names of segments origin consumer group as the key.
28
+ def applicable_groups
29
+ requested_groups = options[:groups].dup || []
30
+
31
+ workable_groups = ::Karafka::App
32
+ .routes
33
+ .select(&:parallel_segments?)
34
+ .group_by(&:segment_origin)
35
+
36
+ # Use all if none provided
37
+ return workable_groups if requested_groups.empty?
38
+
39
+ applicable_groups = {}
40
+
41
+ requested_groups.each do |requested_group|
42
+ workable_group = workable_groups[requested_group]
43
+
44
+ if workable_group
45
+ requested_groups.delete(requested_group)
46
+ applicable_groups[requested_group] = workable_group
47
+ else
48
+ raise(
49
+ ::Karafka::Errors::ConsumerGroupNotFoundError,
50
+ "Consumer group #{requested_group} was not found"
51
+ )
52
+ end
53
+ end
54
+
55
+ applicable_groups
56
+ end
57
+
58
+ # Collects the offsets for the segment origin consumer group and the parallel segments
59
+ # consumers groups. We use segment origin cg offsets as a baseline for the distribution
60
+ # and use existing (if any) parallel segments cgs offsets for validations.
61
+ #
62
+ # @param segment_origin [String] name of the origin consumer group
63
+ # @param segments [Array<Karafka::Routing::ConsumerGroup>]
64
+ # @return [Hash] fetched offsets for all the cg topics for all the consumer groups
65
+ def collect_offsets(segment_origin, segments)
66
+ topics_names = segments.first.topics.map(&:name)
67
+ consumer_groups = [segment_origin, segments.map(&:name)].flatten
68
+
69
+ consumer_groups_with_topics = consumer_groups
70
+ .map { |name| [name, topics_names] }
71
+ .to_h
72
+
73
+ lags_with_offsets = Karafka::Admin.read_lags_with_offsets(
74
+ consumer_groups_with_topics
75
+ )
76
+
77
+ lags_with_offsets.each do |_cg_name, topics|
78
+ topics.each do |_topic_name, partitions|
79
+ partitions.transform_values! { |details| details[:offset] }
80
+ end
81
+ end
82
+
83
+ lags_with_offsets
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Cli
9
+ class ParallelSegments < Karafka::Cli::Base
10
+ # Takes the committed offset of each parallel segment for each topic and records
11
+ # them back onto the segment origin consumer group. Without `--force` it will raise an
12
+ # error on conflicts. With `--force` it will take the lowest possible offset for each
13
+ # topic partition as the baseline.
14
+ #
15
+ # @note Running this can cause you some double processing if the parallel segments final
16
+ # offsets are not aligned.
17
+ #
18
+ # @note This will **not** remove the parallel segments consumer groups. Please use the
19
+ # Admin API if you want them to be removed.
20
+ class Collapse < Base
21
+ # Runs the collapse operation
22
+ def call
23
+ puts 'Starting parallel segments collapse...'
24
+
25
+ segments_count = applicable_groups.size
26
+
27
+ if segments_count.zero?
28
+ puts "#{red('No')} consumer groups with parallel segments configuration found"
29
+
30
+ return
31
+ end
32
+
33
+ puts(
34
+ "Found #{green(segments_count)} consumer groups with parallel segments configuration"
35
+ )
36
+
37
+ collapses = []
38
+
39
+ applicable_groups.each do |segment_origin, segments|
40
+ puts
41
+ puts "Collecting group #{yellow(segment_origin)} details..."
42
+ offsets = collect_offsets(segment_origin, segments)
43
+
44
+ unless options.key?(:force)
45
+ puts
46
+ puts "Validating offsets positions for #{yellow(segment_origin)} consumer group..."
47
+ validate!(offsets, segment_origin)
48
+ end
49
+
50
+ puts
51
+ puts "Computing collapsed offsets for #{yellow(segment_origin)} consumer group..."
52
+ collapses << collapse(offsets, segments)
53
+ end
54
+
55
+ collapses.each do |collapse|
56
+ apply(collapse)
57
+ end
58
+
59
+ puts
60
+ puts "Collapse completed #{green('successfully')}!"
61
+ end
62
+
63
+ private
64
+
65
+ # Computes the lowest possible offset available for each topic partition and sets it
66
+ # on the segment origin consumer group.
67
+ #
68
+ # @param offsets [Hash]
69
+ # @param segments [Array<Karafka::Routing::ConsumerGroup>]
70
+ # @note This code does **not** apply the offsets, just computes their positions
71
+ def collapse(offsets, segments)
72
+ collapse = Hash.new { |h, k| h[k] = {} }
73
+ segments_names = segments.map(&:name)
74
+
75
+ offsets.each do |cg_name, topics|
76
+ next unless segments_names.include?(cg_name)
77
+
78
+ topics.each do |topic_name, partitions|
79
+ partitions.each do |partition_id, offset|
80
+ current_lowest_offset = collapse[topic_name][partition_id]
81
+
82
+ next if current_lowest_offset && current_lowest_offset < offset
83
+
84
+ collapse[topic_name][partition_id] = offset
85
+ end
86
+ end
87
+ end
88
+
89
+ {
90
+ collapse: collapse,
91
+ segment_origin: segments.first.segment_origin
92
+ }
93
+ end
94
+
95
+ # In order to collapse the offsets of parallel segments back to one, we need to know
96
+ # to what offsets to collapse. The issue (that we solve picking lowest when forced)
97
+ # arises when there are more offsets that are not even in parallel segments for one
98
+ # topic partition. We should let user know about this if this happens so he does not
99
+ # end up with double-processing.
100
+ #
101
+ # @param offsets [Hash]
102
+ # @param segment_origin [String]
103
+ def validate!(offsets, segment_origin)
104
+ collapse = Hash.new { |h, k| h[k] = {} }
105
+
106
+ offsets.each do |cg_name, topics|
107
+ next if cg_name == segment_origin
108
+
109
+ topics.each do |topic_name, partitions|
110
+ partitions.each do |partition_id, offset|
111
+ collapse[topic_name][partition_id] ||= Set.new
112
+ collapse[topic_name][partition_id] << offset
113
+ end
114
+ end
115
+ end
116
+
117
+ inconclusive = false
118
+
119
+ collapse.each do |topic_name, partitions|
120
+ partitions.each do |partition_id, parallel_offsets|
121
+ next if parallel_offsets.size <= 1
122
+
123
+ inconclusive = true
124
+
125
+ puts(
126
+ " Inconclusive offsets for #{red(topic_name)}##{red(partition_id)}:" \
127
+ " #{parallel_offsets.to_a.join(', ')}"
128
+ )
129
+ end
130
+ end
131
+
132
+ return unless inconclusive
133
+
134
+ raise(
135
+ ::Karafka::Errors::CommandValidationError,
136
+ "Parallel segments for #{red(segment_origin)} have #{red('inconclusive')} offsets"
137
+ )
138
+ end
139
+
140
+ # Applies the collapsed lowest offsets onto the segment origin consumer group
141
+ #
142
+ # @param collapse [Hash]
143
+ def apply(collapse)
144
+ segment_origin = collapse[:segment_origin]
145
+ alignments = collapse[:collapse]
146
+
147
+ puts
148
+ puts "Adjusting offsets of segment origin consumer group: #{green(segment_origin)}"
149
+
150
+ alignments.each do |topic_name, partitions|
151
+ puts " Topic #{green(topic_name)}:"
152
+
153
+ partitions.each do |partition_id, offset|
154
+ puts " Partition #{green(partition_id)}: starting offset #{green(offset)}"
155
+ end
156
+ end
157
+
158
+ Karafka::Admin.seek_consumer_group(segment_origin, alignments)
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Cli
9
+ class ParallelSegments < Karafka::Cli::Base
10
+ # Command that makes it easier for users to migrate from regular consumer groups to
11
+ # the parallel segments consumers groups by automatically distributing offsets based on
12
+ # the used "normal" consumer group.
13
+ #
14
+ # Takes the segments origin consumer group offsets for a given set of topics and
15
+ # distributes those offsets onto the parallel segments consumer groups, so they can pick
16
+ # up where the origin group left.
17
+ #
18
+ # To make sure users do not accidentally "re-distribute" their offsets from the original
19
+ # consumer group after the parallel consumer groups had offsets assigned and started to
20
+ # work, we check if the parallel groups have any offsets, if so unless forced we halt.
21
+ #
22
+ # @note This command does not remove the original consumer group from Kafka. We keep it
23
+ # just as a backup. User can remove it himself.
24
+ #
25
+ # @note Kafka has no atomic operations this is why we first collect all the data and run
26
+ # needed validations before applying offsets.
27
+ class Distribute < Base
28
+ # Runs the distribution process
29
+ def call
30
+ puts 'Starting parallel segments distribution...'
31
+
32
+ segments_count = applicable_groups.size
33
+
34
+ if segments_count.zero?
35
+ puts "#{red('No')} consumer groups with parallel segments configuration found"
36
+
37
+ return
38
+ end
39
+
40
+ puts(
41
+ "Found #{green(segments_count)} consumer groups with parallel segments configuration"
42
+ )
43
+
44
+ distributions = []
45
+
46
+ applicable_groups.each do |segment_origin, segments|
47
+ puts
48
+ puts "Collecting group #{yellow(segment_origin)} details..."
49
+ offsets = collect_offsets(segment_origin, segments)
50
+
51
+ unless options.key?(:force)
52
+ puts "Validating group #{yellow(segment_origin)} parallel segments..."
53
+ validate!(offsets, segments)
54
+ end
55
+
56
+ puts "Distributing group #{yellow(segment_origin)} offsets..."
57
+ distributions += distribute(offsets, segments)
58
+ end
59
+
60
+ distributions.each do |distribution|
61
+ apply(distribution)
62
+ end
63
+
64
+ puts
65
+ puts "Distribution completed #{green('successfully')}!"
66
+ end
67
+
68
+ private
69
+
70
+ # Validates the current state of topics offsets assignments.
71
+ # We want to make sure, that users do not run distribution twice, especially for a
72
+ # parallel segments consumers group set that was already actively consumed. This is why
73
+ # we check if there was any offsets already present in the parallel segments consumer
74
+ # groups and if so, we raise an error. This can be disabled with `--force`.
75
+ #
76
+ # It prevents users from overwriting the already set segments distribution.
77
+ # Adding new topics to the same parallel segments consumer group does not require us to
78
+ # run this at all and on top of that users can always use `--consumer_groups` flag to
79
+ # limit the cgs that we will be operating here
80
+ #
81
+ # @param offsets [Hash]
82
+ # @param segments [Array<Karafka::Routing::ConsumerGroup>]
83
+ def validate!(offsets, segments)
84
+ segments_names = segments.map(&:name)
85
+
86
+ offsets.each do |cg_name, topics|
87
+ next unless segments_names.include?(cg_name)
88
+
89
+ topics.each do |topic_name, partitions|
90
+ partitions.each do |partition_id, offset|
91
+ next unless offset.to_i.positive?
92
+
93
+ raise(
94
+ ::Karafka::Errors::CommandValidationError,
95
+ "Parallel segment #{red(cg_name)} already has offset #{red(offset)}" \
96
+ " set for #{red("#{topic_name}##{partition_id}")}"
97
+ )
98
+ end
99
+ end
100
+ end
101
+ end
102
+
103
+ # Computes the offsets distribution for all the segments consumer groups so when user
104
+ # migrates from one CG to parallel segments, those segments know where to start consuming
105
+ # the data.
106
+ #
107
+ # @param offsets [Hash]
108
+ # @param segments [Array<Karafka::Routing::ConsumerGroup>]
109
+ # @note This code does **not** apply the offsets, just computes their positions
110
+ def distribute(offsets, segments)
111
+ distributions = []
112
+ segments_names = segments.map(&:name)
113
+
114
+ offsets.each do |cg_name, topics|
115
+ next if segments_names.include?(cg_name)
116
+
117
+ distribution = {}
118
+
119
+ topics.each do |topic_name, partitions|
120
+ partitions.each do |partition_id, offset|
121
+ distribution[topic_name] ||= {}
122
+ distribution[topic_name][partition_id] = offset
123
+ end
124
+ end
125
+
126
+ next if distribution.empty?
127
+
128
+ segments_names.each do |segment_name|
129
+ distributions << {
130
+ segment_name: segment_name,
131
+ distribution: distribution
132
+ }
133
+ end
134
+ end
135
+
136
+ distributions
137
+ end
138
+
139
+ # Takes the details of the distribution of offsets for a given segment and adjust the
140
+ # starting offsets for all the consumer group topics based on the distribution.
141
+ #
142
+ # @param distribution [Hash]
143
+ def apply(distribution)
144
+ segment_name = distribution[:segment_name]
145
+ alignments = distribution[:distribution]
146
+
147
+ puts
148
+ puts "Adjusting offsets of parallel segments consumer group: #{green(segment_name)}"
149
+
150
+ alignments.each do |topic_name, partitions|
151
+ puts " Topic #{green(topic_name)}:"
152
+
153
+ partitions.each do |partition_id, offset|
154
+ puts " Partition #{green(partition_id)}: starting offset #{green(offset)}"
155
+ end
156
+ end
157
+
158
+ Karafka::Admin.seek_consumer_group(segment_name, alignments)
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ # Pro related CLI commands
9
+ module Cli
10
+ # CLI entry-point for parallel segments management commands
11
+ class ParallelSegments < Karafka::Cli::Base
12
+ include Helpers::Colorize
13
+ include Helpers::ConfigImporter.new(
14
+ kafka_config: %i[kafka]
15
+ )
16
+
17
+ desc 'Allows for parallel segments management'
18
+
19
+ option(
20
+ :groups,
21
+ 'Names of consumer groups on which we want to run the command. All if not provided',
22
+ Array,
23
+ %w[
24
+ --groups
25
+ --consumer_groups
26
+ ]
27
+ )
28
+
29
+ # Some operations may not be allowed to run again after data is set in certain ways.
30
+ # For example if a distribution command is invoked when the parallel group segment
31
+ # consumer groups already have offsets set, we will fail unless user wants to force it.
32
+ # This prevents users from accidentally running the command in such ways that would cause
33
+ # their existing distributed offsets to be reset.
34
+ option(
35
+ :force,
36
+ 'Should an operation on the parallel segments consumer group be forced',
37
+ TrueClass,
38
+ %w[
39
+ --force
40
+ ]
41
+ )
42
+
43
+ # @param action [String] action we want to take
44
+ def call(action = 'distribute')
45
+ case action
46
+ when 'distribute'
47
+ Distribute.new(options).call
48
+ when 'collapse'
49
+ Collapse.new(options).call
50
+ when 'reset'
51
+ Collapse.new(options).call
52
+ Distribute.new(options).call
53
+ else
54
+ raise ::ArgumentError, "Invalid topics action: #{action}"
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -19,14 +19,9 @@ module Karafka
19
19
  class Manager < Karafka::Connection::Manager
20
20
  include Core::Helpers::Time
21
21
 
22
- # How long should we wait after a rebalance before doing anything on a consumer group
23
- #
24
- # @param scale_delay [Integer] How long should we wait before making any changes. Any
25
- # change related to this consumer group will postpone the scaling operations. This is
26
- # done that way to prevent too many friction in the cluster. It is 1 minute by default
27
- def initialize(scale_delay = 60 * 1_000)
22
+ # Creates new manager instance
23
+ def initialize
28
24
  super()
29
- @scale_delay = scale_delay
30
25
  @mutex = Mutex.new
31
26
  @changes = Hash.new do |h, k|
32
27
  h[k] = {
@@ -201,7 +196,7 @@ module Karafka
201
196
 
202
197
  next unless multiplexing.active?
203
198
  next unless multiplexing.dynamic?
204
- # If we cannot downscale, do not
199
+ # If we cannot upscale, do not
205
200
  next if sg_listeners.count(&:active?) >= multiplexing.max
206
201
 
207
202
  sg_listeners.each do |sg_listener|
@@ -234,6 +229,8 @@ module Karafka
234
229
  # are also stable. This is a strong indicator that no rebalances or other operations are
235
230
  # happening at a given moment.
236
231
  def stable?(sg_listeners)
232
+ @scale_delay ||= sg_listeners.first.subscription_group.multiplexing.scale_delay
233
+
237
234
  sg_listeners.all? do |sg_listener|
238
235
  # If a listener is not active, we do not take it into consideration when looking at
239
236
  # the stability data