karafka 2.4.18 → 2.5.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/workflows/ci.yml +58 -14
  4. data/.github/workflows/push.yml +36 -0
  5. data/.github/workflows/verify-action-pins.yml +16 -0
  6. data/.ruby-version +1 -1
  7. data/CHANGELOG.md +60 -0
  8. data/Gemfile +2 -2
  9. data/Gemfile.lock +69 -50
  10. data/LICENSE-COMM +2 -2
  11. data/README.md +1 -1
  12. data/Rakefile +4 -0
  13. data/bin/clean_kafka +43 -0
  14. data/bin/integrations +19 -6
  15. data/bin/rspecs +15 -3
  16. data/bin/verify_kafka_warnings +35 -0
  17. data/bin/verify_topics_naming +27 -0
  18. data/config/locales/errors.yml +3 -0
  19. data/config/locales/pro_errors.yml +13 -2
  20. data/docker-compose.yml +1 -1
  21. data/examples/payloads/json/enrollment_event.json +579 -0
  22. data/examples/payloads/json/ingestion_event.json +30 -0
  23. data/examples/payloads/json/transaction_event.json +17 -0
  24. data/examples/payloads/json/user_event.json +11 -0
  25. data/karafka.gemspec +3 -8
  26. data/lib/karafka/active_job/current_attributes.rb +1 -1
  27. data/lib/karafka/admin/acl.rb +5 -1
  28. data/lib/karafka/admin/configs.rb +5 -1
  29. data/lib/karafka/admin.rb +69 -34
  30. data/lib/karafka/base_consumer.rb +17 -8
  31. data/lib/karafka/cli/base.rb +8 -2
  32. data/lib/karafka/cli/topics/align.rb +7 -4
  33. data/lib/karafka/cli/topics/base.rb +17 -0
  34. data/lib/karafka/cli/topics/create.rb +9 -7
  35. data/lib/karafka/cli/topics/delete.rb +4 -2
  36. data/lib/karafka/cli/topics/help.rb +39 -0
  37. data/lib/karafka/cli/topics/repartition.rb +4 -2
  38. data/lib/karafka/cli/topics.rb +10 -3
  39. data/lib/karafka/cli.rb +2 -0
  40. data/lib/karafka/connection/client.rb +30 -9
  41. data/lib/karafka/connection/listener.rb +24 -12
  42. data/lib/karafka/connection/messages_buffer.rb +1 -1
  43. data/lib/karafka/connection/proxy.rb +3 -0
  44. data/lib/karafka/constraints.rb +3 -3
  45. data/lib/karafka/contracts/config.rb +3 -0
  46. data/lib/karafka/contracts/topic.rb +1 -1
  47. data/lib/karafka/errors.rb +46 -2
  48. data/lib/karafka/helpers/async.rb +3 -1
  49. data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
  50. data/lib/karafka/instrumentation/logger_listener.rb +86 -23
  51. data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
  52. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
  53. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  54. data/lib/karafka/pro/cleaner.rb +8 -0
  55. data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
  56. data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
  57. data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
  58. data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
  59. data/lib/karafka/pro/connection/manager.rb +5 -8
  60. data/lib/karafka/pro/encryption.rb +8 -0
  61. data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
  62. data/lib/karafka/pro/iterator/expander.rb +5 -3
  63. data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
  64. data/lib/karafka/pro/loader.rb +10 -0
  65. data/lib/karafka/pro/processing/coordinator.rb +4 -1
  66. data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +27 -3
  67. data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
  68. data/lib/karafka/pro/processing/filters/base.rb +10 -2
  69. data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
  70. data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
  71. data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
  72. data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
  73. data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
  74. data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
  75. data/lib/karafka/pro/processing/partitioner.rb +1 -13
  76. data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
  77. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
  78. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
  79. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
  80. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
  81. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
  82. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  83. data/lib/karafka/pro/processing/strategies/default.rb +36 -8
  84. data/lib/karafka/pro/processing/strategies/dlq/default.rb +14 -10
  85. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
  86. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
  87. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
  88. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
  89. data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
  90. data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
  91. data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
  92. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
  93. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  94. data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
  95. data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
  96. data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
  97. data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
  98. data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
  99. data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
  100. data/lib/karafka/pro/recurring_tasks.rb +13 -0
  101. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
  102. data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
  103. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
  104. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
  105. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
  106. data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
  107. data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
  108. data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
  109. data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
  110. data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
  111. data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
  112. data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
  113. data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
  114. data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
  115. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
  116. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  117. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
  118. data/lib/karafka/pro/scheduled_messages/consumer.rb +19 -21
  119. data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
  120. data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
  121. data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
  122. data/lib/karafka/pro/scheduled_messages.rb +13 -0
  123. data/lib/karafka/processing/coordinators_buffer.rb +1 -0
  124. data/lib/karafka/processing/strategies/default.rb +4 -4
  125. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  126. data/lib/karafka/routing/subscription_group.rb +1 -1
  127. data/lib/karafka/runner.rb +7 -1
  128. data/lib/karafka/server.rb +19 -19
  129. data/lib/karafka/setup/attributes_map.rb +2 -0
  130. data/lib/karafka/setup/config.rb +22 -1
  131. data/lib/karafka/setup/defaults_injector.rb +26 -1
  132. data/lib/karafka/status.rb +6 -1
  133. data/lib/karafka/swarm/node.rb +31 -0
  134. data/lib/karafka/swarm/supervisor.rb +4 -0
  135. data/lib/karafka/templates/karafka.rb.erb +14 -1
  136. data/lib/karafka/version.rb +1 -1
  137. data/lib/karafka.rb +17 -9
  138. data/renovate.json +14 -2
  139. metadata +40 -40
  140. checksums.yaml.gz.sig +0 -0
  141. data/certs/cert.pem +0 -26
  142. data.tar.gz.sig +0 -0
  143. metadata.gz.sig +0 -0
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Cli
9
+ class ParallelSegments < Karafka::Cli::Base
10
+ # Takes the committed offset of each parallel segment for each topic and records
11
+ # them back onto the segment origin consumer group. Without `--force` it will raise an
12
+ # error on conflicts. With `--force` it will take the lowest possible offset for each
13
+ # topic partition as the baseline.
14
+ #
15
+ # @note Running this can cause you some double processing if the parallel segments final
16
+ # offsets are not aligned.
17
+ #
18
+ # @note This will **not** remove the parallel segments consumer groups. Please use the
19
+ # Admin API if you want them to be removed.
20
+ class Collapse < Base
21
+ # Runs the collapse operation
22
+ def call
23
+ puts 'Starting parallel segments collapse...'
24
+
25
+ segments_count = applicable_groups.size
26
+
27
+ if segments_count.zero?
28
+ puts "#{red('No')} consumer groups with parallel segments configuration found"
29
+
30
+ return
31
+ end
32
+
33
+ puts(
34
+ "Found #{green(segments_count)} consumer groups with parallel segments configuration"
35
+ )
36
+
37
+ collapses = []
38
+
39
+ applicable_groups.each do |segment_origin, segments|
40
+ puts
41
+ puts "Collecting group #{yellow(segment_origin)} details..."
42
+ offsets = collect_offsets(segment_origin, segments)
43
+
44
+ unless options.key?(:force)
45
+ puts
46
+ puts "Validating offsets positions for #{yellow(segment_origin)} consumer group..."
47
+ validate!(offsets, segment_origin)
48
+ end
49
+
50
+ puts
51
+ puts "Computing collapsed offsets for #{yellow(segment_origin)} consumer group..."
52
+ collapses << collapse(offsets, segments)
53
+ end
54
+
55
+ collapses.each do |collapse|
56
+ apply(collapse)
57
+ end
58
+
59
+ puts
60
+ puts "Collapse completed #{green('successfully')}!"
61
+ end
62
+
63
+ private
64
+
65
+ # Computes the lowest possible offset available for each topic partition and sets it
66
+ # on the segment origin consumer group.
67
+ #
68
+ # @param offsets [Hash]
69
+ # @param segments [Array<Karafka::Routing::ConsumerGroup>]
70
+ # @note This code does **not** apply the offsets, just computes their positions
71
+ def collapse(offsets, segments)
72
+ collapse = Hash.new { |h, k| h[k] = {} }
73
+ segments_names = segments.map(&:name)
74
+
75
+ offsets.each do |cg_name, topics|
76
+ next unless segments_names.include?(cg_name)
77
+
78
+ topics.each do |topic_name, partitions|
79
+ partitions.each do |partition_id, offset|
80
+ current_lowest_offset = collapse[topic_name][partition_id]
81
+
82
+ next if current_lowest_offset && current_lowest_offset < offset
83
+
84
+ collapse[topic_name][partition_id] = offset
85
+ end
86
+ end
87
+ end
88
+
89
+ {
90
+ collapse: collapse,
91
+ segment_origin: segments.first.segment_origin
92
+ }
93
+ end
94
+
95
+ # In order to collapse the offsets of parallel segments back to one, we need to know
96
+ # to what offsets to collapse. The issue (that we solve picking lowest when forced)
97
+ # arises when there are more offsets that are not even in parallel segments for one
98
+ # topic partition. We should let user know about this if this happens so he does not
99
+ # end up with double-processing.
100
+ #
101
+ # @param offsets [Hash]
102
+ # @param segment_origin [String]
103
+ def validate!(offsets, segment_origin)
104
+ collapse = Hash.new { |h, k| h[k] = {} }
105
+
106
+ offsets.each do |cg_name, topics|
107
+ next if cg_name == segment_origin
108
+
109
+ topics.each do |topic_name, partitions|
110
+ partitions.each do |partition_id, offset|
111
+ collapse[topic_name][partition_id] ||= Set.new
112
+ collapse[topic_name][partition_id] << offset
113
+ end
114
+ end
115
+ end
116
+
117
+ inconclusive = false
118
+
119
+ collapse.each do |topic_name, partitions|
120
+ partitions.each do |partition_id, parallel_offsets|
121
+ next if parallel_offsets.size <= 1
122
+
123
+ inconclusive = true
124
+
125
+ puts(
126
+ " Inconclusive offsets for #{red(topic_name)}##{red(partition_id)}:" \
127
+ " #{parallel_offsets.to_a.join(', ')}"
128
+ )
129
+ end
130
+ end
131
+
132
+ return unless inconclusive
133
+
134
+ raise(
135
+ ::Karafka::Errors::CommandValidationError,
136
+ "Parallel segments for #{red(segment_origin)} have #{red('inconclusive')} offsets"
137
+ )
138
+ end
139
+
140
+ # Applies the collapsed lowest offsets onto the segment origin consumer group
141
+ #
142
+ # @param collapse [Hash]
143
+ def apply(collapse)
144
+ segment_origin = collapse[:segment_origin]
145
+ alignments = collapse[:collapse]
146
+
147
+ puts
148
+ puts "Adjusting offsets of segment origin consumer group: #{green(segment_origin)}"
149
+
150
+ alignments.each do |topic_name, partitions|
151
+ puts " Topic #{green(topic_name)}:"
152
+
153
+ partitions.each do |partition_id, offset|
154
+ puts " Partition #{green(partition_id)}: starting offset #{green(offset)}"
155
+ end
156
+ end
157
+
158
+ Karafka::Admin.seek_consumer_group(segment_origin, alignments)
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Cli
9
+ class ParallelSegments < Karafka::Cli::Base
10
+ # Command that makes it easier for users to migrate from regular consumer groups to
11
+ # the parallel segments consumers groups by automatically distributing offsets based on
12
+ # the used "normal" consumer group.
13
+ #
14
+ # Takes the segments origin consumer group offsets for a given set of topics and
15
+ # distributes those offsets onto the parallel segments consumer groups, so they can pick
16
+ # up where the origin group left.
17
+ #
18
+ # To make sure users do not accidentally "re-distribute" their offsets from the original
19
+ # consumer group after the parallel consumer groups had offsets assigned and started to
20
+ # work, we check if the parallel groups have any offsets, if so unless forced we halt.
21
+ #
22
+ # @note This command does not remove the original consumer group from Kafka. We keep it
23
+ # just as a backup. User can remove it himself.
24
+ #
25
+ # @note Kafka has no atomic operations this is why we first collect all the data and run
26
+ # needed validations before applying offsets.
27
+ class Distribute < Base
28
+ # Runs the distribution process
29
+ def call
30
+ puts 'Starting parallel segments distribution...'
31
+
32
+ segments_count = applicable_groups.size
33
+
34
+ if segments_count.zero?
35
+ puts "#{red('No')} consumer groups with parallel segments configuration found"
36
+
37
+ return
38
+ end
39
+
40
+ puts(
41
+ "Found #{green(segments_count)} consumer groups with parallel segments configuration"
42
+ )
43
+
44
+ distributions = []
45
+
46
+ applicable_groups.each do |segment_origin, segments|
47
+ puts
48
+ puts "Collecting group #{yellow(segment_origin)} details..."
49
+ offsets = collect_offsets(segment_origin, segments)
50
+
51
+ unless options.key?(:force)
52
+ puts "Validating group #{yellow(segment_origin)} parallel segments..."
53
+ validate!(offsets, segments)
54
+ end
55
+
56
+ puts "Distributing group #{yellow(segment_origin)} offsets..."
57
+ distributions += distribute(offsets, segments)
58
+ end
59
+
60
+ distributions.each do |distribution|
61
+ apply(distribution)
62
+ end
63
+
64
+ puts
65
+ puts "Distribution completed #{green('successfully')}!"
66
+ end
67
+
68
+ private
69
+
70
+ # Validates the current state of topics offsets assignments.
71
+ # We want to make sure, that users do not run distribution twice, especially for a
72
+ # parallel segments consumers group set that was already actively consumed. This is why
73
+ # we check if there was any offsets already present in the parallel segments consumer
74
+ # groups and if so, we raise an error. This can be disabled with `--force`.
75
+ #
76
+ # It prevents users from overwriting the already set segments distribution.
77
+ # Adding new topics to the same parallel segments consumer group does not require us to
78
+ # run this at all and on top of that users can always use `--consumer_groups` flag to
79
+ # limit the cgs that we will be operating here
80
+ #
81
+ # @param offsets [Hash]
82
+ # @param segments [Array<Karafka::Routing::ConsumerGroup>]
83
+ def validate!(offsets, segments)
84
+ segments_names = segments.map(&:name)
85
+
86
+ offsets.each do |cg_name, topics|
87
+ next unless segments_names.include?(cg_name)
88
+
89
+ topics.each do |topic_name, partitions|
90
+ partitions.each do |partition_id, offset|
91
+ next unless offset.to_i.positive?
92
+
93
+ raise(
94
+ ::Karafka::Errors::CommandValidationError,
95
+ "Parallel segment #{red(cg_name)} already has offset #{red(offset)}" \
96
+ " set for #{red("#{topic_name}##{partition_id}")}"
97
+ )
98
+ end
99
+ end
100
+ end
101
+ end
102
+
103
+ # Computes the offsets distribution for all the segments consumer groups so when user
104
+ # migrates from one CG to parallel segments, those segments know where to start consuming
105
+ # the data.
106
+ #
107
+ # @param offsets [Hash]
108
+ # @param segments [Array<Karafka::Routing::ConsumerGroup>]
109
+ # @note This code does **not** apply the offsets, just computes their positions
110
+ def distribute(offsets, segments)
111
+ distributions = []
112
+ segments_names = segments.map(&:name)
113
+
114
+ offsets.each do |cg_name, topics|
115
+ next if segments_names.include?(cg_name)
116
+
117
+ distribution = {}
118
+
119
+ topics.each do |topic_name, partitions|
120
+ partitions.each do |partition_id, offset|
121
+ distribution[topic_name] ||= {}
122
+ distribution[topic_name][partition_id] = offset
123
+ end
124
+ end
125
+
126
+ next if distribution.empty?
127
+
128
+ segments_names.each do |segment_name|
129
+ distributions << {
130
+ segment_name: segment_name,
131
+ distribution: distribution
132
+ }
133
+ end
134
+ end
135
+
136
+ distributions
137
+ end
138
+
139
+ # Takes the details of the distribution of offsets for a given segment and adjust the
140
+ # starting offsets for all the consumer group topics based on the distribution.
141
+ #
142
+ # @param distribution [Hash]
143
+ def apply(distribution)
144
+ segment_name = distribution[:segment_name]
145
+ alignments = distribution[:distribution]
146
+
147
+ puts
148
+ puts "Adjusting offsets of parallel segments consumer group: #{green(segment_name)}"
149
+
150
+ alignments.each do |topic_name, partitions|
151
+ puts " Topic #{green(topic_name)}:"
152
+
153
+ partitions.each do |partition_id, offset|
154
+ puts " Partition #{green(partition_id)}: starting offset #{green(offset)}"
155
+ end
156
+ end
157
+
158
+ Karafka::Admin.seek_consumer_group(segment_name, alignments)
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ # Pro related CLI commands
9
+ module Cli
10
+ # CLI entry-point for parallel segments management commands
11
+ class ParallelSegments < Karafka::Cli::Base
12
+ include Helpers::Colorize
13
+ include Helpers::ConfigImporter.new(
14
+ kafka_config: %i[kafka]
15
+ )
16
+
17
+ desc 'Allows for parallel segments management'
18
+
19
+ option(
20
+ :groups,
21
+ 'Names of consumer groups on which we want to run the command. All if not provided',
22
+ Array,
23
+ %w[
24
+ --groups
25
+ --consumer_groups
26
+ ]
27
+ )
28
+
29
+ # Some operations may not be allowed to run again after data is set in certain ways.
30
+ # For example if a distribution command is invoked when the parallel group segment
31
+ # consumer groups already have offsets set, we will fail unless user wants to force it.
32
+ # This prevents users from accidentally running the command in such ways that would cause
33
+ # their existing distributed offsets to be reset.
34
+ option(
35
+ :force,
36
+ 'Should an operation on the parallel segments consumer group be forced',
37
+ TrueClass,
38
+ %w[
39
+ --force
40
+ ]
41
+ )
42
+
43
+ # @param action [String] action we want to take
44
+ def call(action = 'distribute')
45
+ case action
46
+ when 'distribute'
47
+ Distribute.new(options).call
48
+ when 'collapse'
49
+ Collapse.new(options).call
50
+ when 'reset'
51
+ Collapse.new(options).call
52
+ Distribute.new(options).call
53
+ else
54
+ raise ::ArgumentError, "Invalid topics action: #{action}"
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -19,14 +19,9 @@ module Karafka
19
19
  class Manager < Karafka::Connection::Manager
20
20
  include Core::Helpers::Time
21
21
 
22
- # How long should we wait after a rebalance before doing anything on a consumer group
23
- #
24
- # @param scale_delay [Integer] How long should we wait before making any changes. Any
25
- # change related to this consumer group will postpone the scaling operations. This is
26
- # done that way to prevent too many friction in the cluster. It is 1 minute by default
27
- def initialize(scale_delay = 60 * 1_000)
22
+ # Creates new manager instance
23
+ def initialize
28
24
  super()
29
- @scale_delay = scale_delay
30
25
  @mutex = Mutex.new
31
26
  @changes = Hash.new do |h, k|
32
27
  h[k] = {
@@ -201,7 +196,7 @@ module Karafka
201
196
 
202
197
  next unless multiplexing.active?
203
198
  next unless multiplexing.dynamic?
204
- # If we cannot downscale, do not
199
+ # If we cannot upscale, do not
205
200
  next if sg_listeners.count(&:active?) >= multiplexing.max
206
201
 
207
202
  sg_listeners.each do |sg_listener|
@@ -234,6 +229,8 @@ module Karafka
234
229
  # are also stable. This is a strong indicator that no rebalances or other operations are
235
230
  # happening at a given moment.
236
231
  def stable?(sg_listeners)
232
+ @scale_delay ||= sg_listeners.first.subscription_group.multiplexing.scale_delay
233
+
237
234
  sg_listeners.all? do |sg_listener|
238
235
  # If a listener is not active, we do not take it into consideration when looking at
239
236
  # the stability data
@@ -33,6 +33,14 @@ module Karafka
33
33
  # Encryption for WaterDrop
34
34
  config.producer.middleware.append(Messages::Middleware.new)
35
35
  end
36
+
37
+ # This feature does not need any changes post-fork
38
+ #
39
+ # @param _config [Karafka::Core::Configurable::Node]
40
+ # @param _pre_fork_producer [WaterDrop::Producer]
41
+ def post_fork(_config, _pre_fork_producer)
42
+ true
43
+ end
36
44
  end
37
45
  end
38
46
  end
@@ -50,7 +50,7 @@ module Karafka
50
50
  partition = messages.metadata.partition
51
51
 
52
52
  samples = @processing_times[topic][partition]
53
- samples << event[:time] / messages.count
53
+ samples << event[:time] / messages.size
54
54
 
55
55
  return unless samples.size > SAMPLES_COUNT
56
56
 
@@ -21,8 +21,10 @@ module Karafka
21
21
  # - { 'topic1' => 100 } - means we run all partitions from the offset 100
22
22
  # - { 'topic1' => Time.now - 60 } - we run all partitions from the message from 60s ago
23
23
  # - { 'topic1' => { 1 => Time.now - 60 } } - partition1 from message 60s ago
24
- # - { 'topic1' => { 1 => true } } - will pick first offset not consumed on this CG for p 1
25
- # - { 'topic1' => true } - will pick first offset not consumed on this CG for all p
24
+ # - { 'topic1' => { 1 => true } } - will pick first offset on this CG for partition 1
25
+ # - { 'topic1' => true } - will pick first offset for all partitions
26
+ # - { 'topic1' => :earliest } - will pick earliest offset for all partitions
27
+ # - { 'topic1' => :latest } - will pick latest (high-watermark) for all partitions
26
28
  class Expander
27
29
  # Expands topics to which we want to subscribe with partitions information in case this
28
30
  # info is not provided.
@@ -80,7 +82,7 @@ module Karafka
80
82
  .find { |topic| topic.fetch(:topic_name) == name }
81
83
  .tap { |topic| topic || raise(Errors::TopicNotFoundError, name) }
82
84
  .fetch(:partitions)
83
- .count
85
+ .size
84
86
  end
85
87
  end
86
88
  end
@@ -14,6 +14,11 @@ module Karafka
14
14
  # This builder resolves that and builds a tpl to which we can safely subscribe the way
15
15
  # we want it.
16
16
  class TplBuilder
17
+ # Supported named offset positions that we can reference via their name
18
+ SUPPORTED_NAMED_POSITIONS = %w[earliest latest].freeze
19
+
20
+ private_constant :SUPPORTED_NAMED_POSITIONS
21
+
17
22
  # @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
18
23
  # @param expanded_topics [Hash] hash with expanded and normalized topics data
19
24
  def initialize(consumer, expanded_topics)
@@ -28,6 +33,7 @@ module Karafka
28
33
  resolve_partitions_with_exact_offsets
29
34
  resolve_partitions_with_negative_offsets
30
35
  resolve_partitions_with_time_offsets
36
+ resolve_partitions_with_named_offsets
31
37
  resolve_partitions_with_cg_expectations
32
38
 
33
39
  # Final tpl with all the data
@@ -143,6 +149,23 @@ module Karafka
143
149
  end
144
150
  end
145
151
 
152
+ # If we get named offsets, we can just remap them to librdkafka special offset positions
153
+ def resolve_partitions_with_named_offsets
154
+ @expanded_topics.each do |name, partitions|
155
+ next unless partitions.is_a?(Hash)
156
+
157
+ partitions.each do |partition, offset|
158
+ # Skip offsets that do not match our named expectations
159
+ named_offset = offset.to_s
160
+
161
+ next unless SUPPORTED_NAMED_POSITIONS.include?(named_offset)
162
+
163
+ @mapped_topics[name][partition] = -1 if named_offset == 'latest'
164
+ @mapped_topics[name][partition] = -2 if named_offset == 'earliest'
165
+ end
166
+ end
167
+ end
168
+
146
169
  # Fetches last used offsets for those partitions for which we want to consume from last
147
170
  # moment where given consumer group has finished
148
171
  # This is indicated by given partition value being set to `true`.
@@ -60,6 +60,15 @@ module Karafka
60
60
  Processing::SubscriptionGroupsCoordinator.instance
61
61
  end
62
62
 
63
+ # Runs operations needed after fork in swarm for features that need it
64
+ #
65
+ # @param config [Karafka::Core::Configurable::Node]
66
+ # @param pre_fork_producer [WaterDrop::Producer] pre fork producer instance that may be
67
+ # needed to be replaced with newly changed one post-fork.
68
+ def post_fork(config, pre_fork_producer)
69
+ features.each { |feature| feature.post_fork(config, pre_fork_producer) }
70
+ end
71
+
63
72
  private
64
73
 
65
74
  # @return [Array<Module>] extra non-routing related pro features and routing components
@@ -84,6 +93,7 @@ module Karafka
84
93
  icfg.connection.manager = Connection::Manager.new
85
94
 
86
95
  icfg.processing.coordinator_class = Processing::Coordinator
96
+ icfg.processing.errors_tracker_class = Processing::Coordinators::ErrorsTracker
87
97
  icfg.processing.partitioner_class = Processing::Partitioner
88
98
  icfg.processing.scheduler_class = Processing::Schedulers::Default
89
99
  icfg.processing.jobs_queue_class = Processing::JobsQueue
@@ -10,6 +10,9 @@ module Karafka
10
10
  # within the same partition
11
11
  class Coordinator < ::Karafka::Processing::Coordinator
12
12
  extend Forwardable
13
+ include Helpers::ConfigImporter.new(
14
+ errors_tracker_class: %i[internal processing errors_tracker_class]
15
+ )
13
16
 
14
17
  def_delegators :@collapser, :collapsed?, :collapse_until!
15
18
 
@@ -20,7 +23,7 @@ module Karafka
20
23
  super
21
24
 
22
25
  @executed = []
23
- @errors_tracker = Coordinators::ErrorsTracker.new
26
+ @errors_tracker = errors_tracker_class.new(topic, partition)
24
27
  @flow_mutex = Mutex.new
25
28
  # Lock for user code synchronization
26
29
  # We do not want to mix coordinator lock with the user lock not to create cases where
@@ -13,25 +13,47 @@ module Karafka
13
13
  class ErrorsTracker
14
14
  include Enumerable
15
15
 
16
+ # @return [Karafka::Routing::Topic] topic of this error tracker
17
+ attr_reader :topic
18
+
19
+ # @return [Integer] partition of this error tracker
20
+ attr_reader :partition
21
+
22
+ # @return [Hash]
23
+ attr_reader :counts
24
+
16
25
  # Max errors we keep in memory.
17
26
  # We do not want to keep more because for DLQ-less this would cause memory-leaks.
27
+ # We do however count per class for granular error counting
18
28
  STORAGE_LIMIT = 100
19
29
 
20
30
  private_constant :STORAGE_LIMIT
21
31
 
22
- def initialize
32
+ # @param topic [Karafka::Routing::Topic]
33
+ # @param partition [Integer]
34
+ # @param limit [Integer] max number of errors we want to keep for reference when
35
+ # implementing custom error handling.
36
+ # @note `limit` does not apply to the counts. They will work beyond the number of errors
37
+ # occurring
38
+ def initialize(topic, partition, limit: STORAGE_LIMIT)
23
39
  @errors = []
40
+ @counts = Hash.new { |hash, key| hash[key] = 0 }
41
+ @topic = topic
42
+ @partition = partition
43
+ @limit = limit
24
44
  end
25
45
 
26
46
  # Clears all the errors
27
47
  def clear
28
48
  @errors.clear
49
+ @counts.clear
29
50
  end
30
51
 
31
52
  # @param error [StandardError] adds the error to the tracker
32
53
  def <<(error)
33
- @errors.shift if @errors.size >= STORAGE_LIMIT
54
+ @errors.shift if @errors.size >= @limit
34
55
  @errors << error
56
+ @counts[error.class] += 1
35
57
  end
36
58
 
37
59
  # @return [Boolean] is the error tracker empty
@@ -41,7 +63,9 @@ module Karafka
41
63
 
42
64
  # @return [Integer] number of elements
43
65
  def size
44
- count
66
+ # We use counts reference of all errors and not the `@errors` array because it allows
67
+ # us to go beyond the whole errors storage limit
68
+ @counts.values.sum
45
69
  end
46
70
 
47
71
  # @return [StandardError, nil] last error that occurred or nil if no errors
@@ -98,6 +98,17 @@ module Karafka
98
98
  :mark_as_consumed
99
99
  end
100
100
 
101
+ # The first (lowest) message we want to mark as consumed in marking. By default it uses
102
+ # same position as cursor in case user wants to mark same message as consumed as the
103
+ # one on which cursor action is applied.
104
+ # @return [Karafka::Messages::Message, nil] cursor marking message or nil if none
105
+ # @note It should not return position in time format, only numerical offset
106
+ def marking_cursor
107
+ return nil unless active?
108
+
109
+ applied.map(&:marking_cursor).compact.min_by(&:offset)
110
+ end
111
+
101
112
  private
102
113
 
103
114
  # @return [Boolean] is filtering active
@@ -42,9 +42,11 @@ module Karafka
42
42
  @applied
43
43
  end
44
44
 
45
- # @return [Integer] default timeout for pausing (if applicable)
45
+ # @return [Integer, nil] default timeout for pausing (if applicable) or nil if not
46
+ # @note Please do not return `0` when your filter is not pausing as it may interact
47
+ # with other filters that want to pause.
46
48
  def timeout
47
- 0
49
+ nil
48
50
  end
49
51
 
50
52
  # @return [Boolean] should we use the cursor value to mark as consumed. If any of the
@@ -58,6 +60,12 @@ module Karafka
58
60
  def marking_method
59
61
  :mark_as_consumed
60
62
  end
63
+
64
+ # @return [Karafka::Messages::Message, nil] cursor message for marking or nil if no
65
+ # marking
66
+ def marking_cursor
67
+ cursor
68
+ end
61
69
  end
62
70
  end
63
71
  end
@@ -36,6 +36,11 @@ module Karafka
36
36
  too_old
37
37
  end
38
38
  end
39
+
40
+ # @return [nil] this filter does not deal with timeouts
41
+ def timeout
42
+ nil
43
+ end
39
44
  end
40
45
  end
41
46
  end