karafka 2.5.3 → 2.5.4.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/config/locales/errors.yml +14 -0
- data/karafka.gemspec +13 -2
- data/lib/karafka/admin/contracts/replication.rb +149 -0
- data/lib/karafka/admin/replication.rb +462 -0
- data/lib/karafka/admin.rb +47 -2
- data/lib/karafka/instrumentation/logger_listener.rb +0 -2
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +4 -0
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +31 -15
- data/lib/karafka/licenser.rb +1 -1
- data/lib/karafka/messages/messages.rb +32 -0
- data/lib/karafka/pro/cleaner/messages/messages.rb +1 -1
- data/lib/karafka/pro/processing/jobs_queue.rb +0 -2
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/vp/default.rb +1 -1
- data/lib/karafka/processing/strategies/dlq.rb +1 -1
- data/lib/karafka/routing/consumer_group.rb +19 -1
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/routing/subscription_groups_builder.rb +17 -2
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +0 -1
- metadata +3 -62
- data/.coditsu/ci.yml +0 -3
- data/.console_irbrc +0 -11
- data/.github/CODEOWNERS +0 -3
- data/.github/FUNDING.yml +0 -1
- data/.github/ISSUE_TEMPLATE/bug_report.md +0 -43
- data/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
- data/.github/workflows/ci_linux_ubuntu_x86_64_gnu.yml +0 -296
- data/.github/workflows/ci_macos_arm64.yml +0 -151
- data/.github/workflows/push.yml +0 -35
- data/.github/workflows/trigger-wiki-refresh.yml +0 -30
- data/.github/workflows/verify-action-pins.yml +0 -16
- data/.gitignore +0 -69
- data/.rspec +0 -7
- data/.ruby-gemset +0 -1
- data/.ruby-version +0 -1
- data/.yard-lint.yml +0 -174
- data/CODE_OF_CONDUCT.md +0 -46
- data/CONTRIBUTING.md +0 -32
- data/Gemfile +0 -29
- data/Gemfile.lock +0 -178
- data/Rakefile +0 -4
- data/SECURITY.md +0 -23
- data/bin/benchmarks +0 -99
- data/bin/clean_kafka +0 -43
- data/bin/create_token +0 -22
- data/bin/integrations +0 -341
- data/bin/record_rss +0 -50
- data/bin/rspecs +0 -26
- data/bin/scenario +0 -29
- data/bin/stress_many +0 -13
- data/bin/stress_one +0 -13
- data/bin/verify_kafka_warnings +0 -36
- data/bin/verify_license_integrity +0 -37
- data/bin/verify_topics_naming +0 -27
- data/bin/wait_for_kafka +0 -24
- data/docker-compose.yml +0 -25
- data/examples/payloads/avro/.gitkeep +0 -0
- data/examples/payloads/json/sample_set_01/enrollment_event.json +0 -579
- data/examples/payloads/json/sample_set_01/ingestion_event.json +0 -30
- data/examples/payloads/json/sample_set_01/transaction_event.json +0 -17
- data/examples/payloads/json/sample_set_01/user_event.json +0 -11
- data/examples/payloads/json/sample_set_02/download.json +0 -191
- data/examples/payloads/json/sample_set_03/event_type_1.json +0 -18
- data/examples/payloads/json/sample_set_03/event_type_2.json +0 -263
- data/examples/payloads/json/sample_set_03/event_type_3.json +0 -41
- data/log/.gitkeep +0 -0
- data/renovate.json +0 -21
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
class Admin
|
|
5
|
+
# Replication administration operations helper
|
|
6
|
+
#
|
|
7
|
+
# Generates partition reassignment plans for increasing topic replication factor.
|
|
8
|
+
# Since librdkafka does not support changing replication factors directly, this class
|
|
9
|
+
# generates the necessary JSON configuration that can be executed using Kafka's Java-based
|
|
10
|
+
# reassignment tools.
|
|
11
|
+
#
|
|
12
|
+
# ## Important Considerations
|
|
13
|
+
#
|
|
14
|
+
# Replication factor changes are among the most resource-intensive operations in Kafka.
|
|
15
|
+
#
|
|
16
|
+
# ## Prerequisites
|
|
17
|
+
#
|
|
18
|
+
# 1. **Sufficient Disk Space**: Ensure target brokers have enough space for new replicas
|
|
19
|
+
# 2. **Network Capacity**: Verify network can handle additional replication traffic
|
|
20
|
+
# 3. **Broker Count**: Cannot exceed the number of available brokers
|
|
21
|
+
# 4. **Java Tools**: Kafka's reassignment tools must be available
|
|
22
|
+
#
|
|
23
|
+
# ## Best Practices
|
|
24
|
+
#
|
|
25
|
+
# - **Test First**: Always test on small topics or in staging environments
|
|
26
|
+
# - **Monitor Resources**: Watch disk space, network, and CPU during replication
|
|
27
|
+
# - **Incremental Changes**: Increase replication factor by 1 at a time for large topics
|
|
28
|
+
# - **Off-Peak Hours**: Execute during low-traffic periods to minimize impact
|
|
29
|
+
#
|
|
30
|
+
# @example Basic usage - increase replication factor
|
|
31
|
+
# # Generate plan to increase replication from 2 to 3
|
|
32
|
+
# plan = Karafka::Admin::Replication.plan(topic: 'events', to: 3)
|
|
33
|
+
#
|
|
34
|
+
# # Review what will happen
|
|
35
|
+
# puts plan.summary
|
|
36
|
+
#
|
|
37
|
+
# # Export for execution
|
|
38
|
+
# plan.export_to_file('/tmp/increase_replication.json')
|
|
39
|
+
#
|
|
40
|
+
# # Execute with Kafka tools (outside of Ruby)
|
|
41
|
+
# # kafka-reassign-partitions.sh --bootstrap-server localhost:9092 \
|
|
42
|
+
# # --reassignment-json-file /tmp/increase_replication.json --execute
|
|
43
|
+
#
|
|
44
|
+
# @example Rebalancing replicas across brokers
|
|
45
|
+
# # Rebalance existing replicas without changing replication factor
|
|
46
|
+
# plan = Karafka::Admin::Replication.rebalance(topic: 'events')
|
|
47
|
+
# plan.export_to_file('/tmp/rebalance.json')
|
|
48
|
+
#
|
|
49
|
+
# @note This class only generates plans - actual execution requires Kafka's Java tools
|
|
50
|
+
# @note Always verify broker capacity before increasing replication
|
|
51
|
+
class Replication < Admin
|
|
52
|
+
attr_reader(
|
|
53
|
+
:topic,
|
|
54
|
+
:current_replication_factor,
|
|
55
|
+
:target_replication_factor,
|
|
56
|
+
:partitions_assignment,
|
|
57
|
+
:reassignment_json,
|
|
58
|
+
:execution_commands,
|
|
59
|
+
:steps
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Builds the replication plan
|
|
63
|
+
#
|
|
64
|
+
# @param topic [String] topic name
|
|
65
|
+
# @param current_replication_factor [Integer] current replication factor
|
|
66
|
+
# @param target_replication_factor [Integer] target replication factor
|
|
67
|
+
# @param partitions_assignment [Hash] partition to brokers assignment
|
|
68
|
+
# @param cluster_info [Hash] broker information
|
|
69
|
+
def initialize(
|
|
70
|
+
topic:,
|
|
71
|
+
current_replication_factor:,
|
|
72
|
+
target_replication_factor:,
|
|
73
|
+
partitions_assignment:,
|
|
74
|
+
cluster_info:
|
|
75
|
+
)
|
|
76
|
+
super()
|
|
77
|
+
|
|
78
|
+
@topic = topic
|
|
79
|
+
@current_replication_factor = current_replication_factor
|
|
80
|
+
@target_replication_factor = target_replication_factor
|
|
81
|
+
@partitions_assignment = partitions_assignment
|
|
82
|
+
@cluster_info = cluster_info
|
|
83
|
+
|
|
84
|
+
generate_reassignment_json
|
|
85
|
+
generate_execution_commands
|
|
86
|
+
generate_steps
|
|
87
|
+
|
|
88
|
+
freeze
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Export the reassignment JSON to a file
|
|
92
|
+
# @param file_path [String] path where to save the JSON file
|
|
93
|
+
def export_to_file(file_path)
|
|
94
|
+
File.write(file_path, @reassignment_json)
|
|
95
|
+
file_path
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# @return [String] human-readable summary of the plan
|
|
99
|
+
def summary
|
|
100
|
+
broker_count = @cluster_info[:brokers].size
|
|
101
|
+
change = @target_replication_factor - @current_replication_factor
|
|
102
|
+
broker_nodes = @cluster_info[:brokers].map do |broker_info|
|
|
103
|
+
broker_info[:node_id]
|
|
104
|
+
end.join(', ')
|
|
105
|
+
|
|
106
|
+
<<~SUMMARY
|
|
107
|
+
Replication Increase Plan for Topic: #{@topic}
|
|
108
|
+
=====================================
|
|
109
|
+
Current replication factor: #{@current_replication_factor}
|
|
110
|
+
Target replication factor: #{@target_replication_factor}
|
|
111
|
+
Total partitions: #{@partitions_assignment.size}
|
|
112
|
+
Available brokers: #{broker_count} (#{broker_nodes})
|
|
113
|
+
|
|
114
|
+
This plan will increase replication by adding #{change} replica(s) to each partition.
|
|
115
|
+
SUMMARY
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
class << self
|
|
119
|
+
# Plans replication factor increase for a given topic
|
|
120
|
+
#
|
|
121
|
+
# Generates a detailed reassignment plan that preserves existing replica assignments
|
|
122
|
+
# while adding new replicas to meet the target replication factor. The plan uses
|
|
123
|
+
# round-robin distribution to balance new replicas across available brokers.
|
|
124
|
+
#
|
|
125
|
+
# @param topic [String] name of the topic
|
|
126
|
+
# @param to [Integer] target replication factor (must be higher than current)
|
|
127
|
+
# @param brokers [Hash{Integer => Array<Integer>}] optional manual broker assignments
|
|
128
|
+
# per partition. Keys are partition IDs, values are arrays of broker IDs. If not provided
|
|
129
|
+
# automatic distribution (usually fine) will be used
|
|
130
|
+
# @return [Replication] plan object containing JSON, commands, and instructions
|
|
131
|
+
#
|
|
132
|
+
# @raise [ArgumentError] if target replication factor is not higher than current
|
|
133
|
+
# @raise [ArgumentError] if target replication factor exceeds available broker count
|
|
134
|
+
# @raise [Rdkafka::RdkafkaError] if topic metadata cannot be fetched
|
|
135
|
+
#
|
|
136
|
+
# @example Increase replication from 1 to 3 with automatic distribution
|
|
137
|
+
# plan = Replication.plan(topic: 'events', to: 3)
|
|
138
|
+
#
|
|
139
|
+
# # Inspect the plan
|
|
140
|
+
# puts plan.summary
|
|
141
|
+
# puts plan.reassignment_json
|
|
142
|
+
#
|
|
143
|
+
# # Check which brokers will get new replicas
|
|
144
|
+
# plan.partitions_assignment.each do |partition_id, broker_ids|
|
|
145
|
+
# puts "Partition #{partition_id}: #{broker_ids.join(', ')}"
|
|
146
|
+
# end
|
|
147
|
+
#
|
|
148
|
+
# # Save and execute
|
|
149
|
+
# plan.export_to_file('increase_rf.json')
|
|
150
|
+
#
|
|
151
|
+
# @example Increase replication with manual broker placement
|
|
152
|
+
# # Specify exactly which brokers should host each partition
|
|
153
|
+
# plan = Replication.plan(
|
|
154
|
+
# topic: 'events',
|
|
155
|
+
# to: 3,
|
|
156
|
+
# brokers: {
|
|
157
|
+
# 0 => [1, 2, 4], # Partition 0 on brokers 1, 2, 4
|
|
158
|
+
# 1 => [2, 3, 4], # Partition 1 on brokers 2, 3, 4
|
|
159
|
+
# 2 => [1, 3, 5] # Partition 2 on brokers 1, 3, 5
|
|
160
|
+
# }
|
|
161
|
+
# )
|
|
162
|
+
#
|
|
163
|
+
# # The plan will use your exact broker specifications
|
|
164
|
+
# puts plan.partitions_assignment
|
|
165
|
+
# # => {0=>[1, 2, 4], 1=>[2, 3, 4], 2=>[1, 3, 5]}
|
|
166
|
+
#
|
|
167
|
+
# @note When using manual placement, ensure all partitions are specified
|
|
168
|
+
# @note Manual placement overrides automatic distribution entirely
|
|
169
|
+
def plan(topic:, to:, brokers: nil)
|
|
170
|
+
topic_info = fetch_topic_info(topic)
|
|
171
|
+
first_partition = topic_info[:partitions].first
|
|
172
|
+
current_rf = first_partition[:replica_count] || first_partition[:replicas]&.size
|
|
173
|
+
cluster_info = fetch_cluster_info
|
|
174
|
+
|
|
175
|
+
# Use contract for validation
|
|
176
|
+
validation_data = {
|
|
177
|
+
topic: topic,
|
|
178
|
+
to: to,
|
|
179
|
+
brokers: brokers,
|
|
180
|
+
current_rf: current_rf,
|
|
181
|
+
broker_count: cluster_info[:brokers].size,
|
|
182
|
+
topic_info: topic_info,
|
|
183
|
+
cluster_info: cluster_info
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
Contracts::Replication.new.validate!(validation_data)
|
|
187
|
+
|
|
188
|
+
partitions_assignment = brokers || generate_partitions_assignment(
|
|
189
|
+
topic_info: topic_info,
|
|
190
|
+
target_replication_factor: to,
|
|
191
|
+
cluster_info: cluster_info
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
new(
|
|
195
|
+
topic: topic,
|
|
196
|
+
current_replication_factor: current_rf,
|
|
197
|
+
target_replication_factor: to,
|
|
198
|
+
partitions_assignment: partitions_assignment,
|
|
199
|
+
cluster_info: cluster_info
|
|
200
|
+
)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Plans rebalancing of existing replicas across brokers
|
|
204
|
+
#
|
|
205
|
+
# Generates a reassignment plan that redistributes existing replicas more evenly
|
|
206
|
+
# across the cluster without changing the replication factor. Useful for:
|
|
207
|
+
#
|
|
208
|
+
# - Balancing load after adding new brokers to the cluster
|
|
209
|
+
# - Redistributing replicas after broker failures and recovery
|
|
210
|
+
# - Optimizing replica placement for better resource utilization
|
|
211
|
+
# - Moving replicas away from overloaded brokers
|
|
212
|
+
#
|
|
213
|
+
# @param topic [String] name of the topic to rebalance
|
|
214
|
+
# @return [Replication] rebalancing plan
|
|
215
|
+
#
|
|
216
|
+
# @example Rebalance after adding new brokers
|
|
217
|
+
# # After adding brokers 4 and 5 to a 3-broker cluster
|
|
218
|
+
# plan = Replication.rebalance(topic: 'events')
|
|
219
|
+
#
|
|
220
|
+
# # Review how replicas will be redistributed
|
|
221
|
+
# puts plan.summary
|
|
222
|
+
#
|
|
223
|
+
# # Execute if distribution looks good
|
|
224
|
+
# plan.export_to_file('rebalance.json')
|
|
225
|
+
# # Then run: kafka-reassign-partitions.sh --execute ...
|
|
226
|
+
#
|
|
227
|
+
# @note This maintains the same replication factor
|
|
228
|
+
# @note All data will be copied to new locations during rebalancing
|
|
229
|
+
# @note Consider impact on cluster resources during rebalancing
|
|
230
|
+
def rebalance(topic:)
|
|
231
|
+
topic_info = fetch_topic_info(topic)
|
|
232
|
+
first_partition = topic_info[:partitions].first
|
|
233
|
+
current_rf = first_partition[:replica_count] || first_partition[:replicas]&.size
|
|
234
|
+
cluster_info = fetch_cluster_info
|
|
235
|
+
|
|
236
|
+
partitions_assignment = generate_partitions_assignment(
|
|
237
|
+
topic_info: topic_info,
|
|
238
|
+
target_replication_factor: current_rf,
|
|
239
|
+
cluster_info: cluster_info,
|
|
240
|
+
rebalance_only: true
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
new(
|
|
244
|
+
topic: topic,
|
|
245
|
+
current_replication_factor: current_rf,
|
|
246
|
+
target_replication_factor: current_rf,
|
|
247
|
+
partitions_assignment: partitions_assignment,
|
|
248
|
+
cluster_info: cluster_info
|
|
249
|
+
)
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
private
|
|
253
|
+
|
|
254
|
+
# Fetches topic metadata including partitions and replica information
|
|
255
|
+
# @param topic [String] name of the topic
|
|
256
|
+
# @return [Hash] topic information with partitions metadata
|
|
257
|
+
def fetch_topic_info(topic)
|
|
258
|
+
Topics.info(topic)
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Fetches cluster broker information from Kafka metadata
|
|
262
|
+
# @return [Hash] cluster information with broker details (node_id, host:port)
|
|
263
|
+
def fetch_cluster_info
|
|
264
|
+
cluster_metadata = cluster_info
|
|
265
|
+
{
|
|
266
|
+
brokers: cluster_metadata.brokers.map do |broker|
|
|
267
|
+
# Handle both hash and object formats from metadata
|
|
268
|
+
# rdkafka returns hashes with broker_id, broker_name, broker_port
|
|
269
|
+
if broker.is_a?(Hash)
|
|
270
|
+
node_id = broker[:broker_id] || broker[:node_id]
|
|
271
|
+
host = broker[:broker_name] || broker[:host]
|
|
272
|
+
port = broker[:broker_port] || broker[:port]
|
|
273
|
+
{ node_id: node_id, host: "#{host}:#{port}" }
|
|
274
|
+
else
|
|
275
|
+
{ node_id: broker.node_id, host: "#{broker.host}:#{broker.port}" }
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
}
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
# Generates partition-to-broker assignments for replication changes
|
|
282
|
+
# Handles both replication factor increases and rebalancing scenarios
|
|
283
|
+
# @param topic_info [Hash] topic metadata with partition information
|
|
284
|
+
# @param target_replication_factor [Integer] desired replication factor
|
|
285
|
+
# @param cluster_info [Hash] cluster metadata with broker information
|
|
286
|
+
# @param rebalance_only [Boolean] true for rebalancing, false for increase
|
|
287
|
+
# @return [Hash{Integer => Array<Integer>}] assignments (partition_id => broker_ids)
|
|
288
|
+
def generate_partitions_assignment(
|
|
289
|
+
topic_info:,
|
|
290
|
+
target_replication_factor:,
|
|
291
|
+
cluster_info:,
|
|
292
|
+
rebalance_only: false
|
|
293
|
+
)
|
|
294
|
+
partitions = topic_info[:partitions]
|
|
295
|
+
brokers = cluster_info[:brokers].map { |broker_info| broker_info[:node_id] }.sort
|
|
296
|
+
assignments = {}
|
|
297
|
+
|
|
298
|
+
partitions.each do |partition_info|
|
|
299
|
+
partition_id = partition_info[:partition_id]
|
|
300
|
+
|
|
301
|
+
# Handle both :replicas (array of objects) and :replica_brokers (array of IDs)
|
|
302
|
+
replicas = partition_info[:replicas] || partition_info[:replica_brokers] || []
|
|
303
|
+
current_replicas = if replicas.first.respond_to?(:node_id)
|
|
304
|
+
replicas.map(&:node_id).sort
|
|
305
|
+
else
|
|
306
|
+
replicas.sort
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
if rebalance_only
|
|
310
|
+
# For rebalancing, redistribute current replicas optimally
|
|
311
|
+
new_replicas = select_brokers_for_partition(
|
|
312
|
+
partition_id: partition_id,
|
|
313
|
+
brokers: brokers,
|
|
314
|
+
replica_count: target_replication_factor,
|
|
315
|
+
avoid_brokers: []
|
|
316
|
+
)
|
|
317
|
+
else
|
|
318
|
+
# For replication increase, keep existing replicas and add new ones
|
|
319
|
+
additional_needed = target_replication_factor - current_replicas.size
|
|
320
|
+
available_brokers = brokers - current_replicas
|
|
321
|
+
|
|
322
|
+
additional_replicas = select_additional_brokers(
|
|
323
|
+
available_brokers: available_brokers,
|
|
324
|
+
needed_count: additional_needed,
|
|
325
|
+
partition_id: partition_id
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
new_replicas = (current_replicas + additional_replicas).sort
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
assignments[partition_id] = new_replicas
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
assignments
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
# Selects brokers for a partition using round-robin distribution
|
|
338
|
+
# Distributes replicas evenly across available brokers
|
|
339
|
+
# @param partition_id [Integer] partition identifier for offset calculation
|
|
340
|
+
# @param brokers [Array<Integer>] available broker node IDs
|
|
341
|
+
# @param replica_count [Integer] number of replicas needed
|
|
342
|
+
# @param avoid_brokers [Array<Integer>] broker IDs to exclude from selection
|
|
343
|
+
# @return [Array<Integer>] sorted array of selected broker node IDs
|
|
344
|
+
def select_brokers_for_partition(
|
|
345
|
+
partition_id:,
|
|
346
|
+
brokers:,
|
|
347
|
+
replica_count:,
|
|
348
|
+
avoid_brokers: []
|
|
349
|
+
)
|
|
350
|
+
available_brokers = brokers - avoid_brokers
|
|
351
|
+
|
|
352
|
+
# Simple round-robin selection starting from a different offset per partition
|
|
353
|
+
# This helps distribute replicas more evenly across brokers
|
|
354
|
+
start_index = partition_id % available_brokers.size
|
|
355
|
+
selected = []
|
|
356
|
+
|
|
357
|
+
replica_count.times do |replica_index|
|
|
358
|
+
broker_index = (start_index + replica_index) % available_brokers.size
|
|
359
|
+
selected << available_brokers[broker_index]
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
selected.sort
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
# Selects additional brokers for increasing replication factor
|
|
366
|
+
# Uses round-robin selection for even distribution across available brokers
|
|
367
|
+
# @param available_brokers [Array<Integer>] broker IDs available for new replicas
|
|
368
|
+
# @param needed_count [Integer] number of additional brokers needed
|
|
369
|
+
# @param partition_id [Integer] partition identifier for offset calculation
|
|
370
|
+
# @return [Array<Integer>] sorted array of selected broker node IDs
|
|
371
|
+
def select_additional_brokers(available_brokers:, needed_count:, partition_id:)
|
|
372
|
+
# Use round-robin starting from partition-specific offset
|
|
373
|
+
start_index = partition_id % available_brokers.size
|
|
374
|
+
selected = []
|
|
375
|
+
|
|
376
|
+
needed_count.times do |additional_replica_index|
|
|
377
|
+
broker_index = (start_index + additional_replica_index) % available_brokers.size
|
|
378
|
+
selected << available_brokers[broker_index]
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
selected.sort
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
private
|
|
386
|
+
|
|
387
|
+
# Generates the JSON structure required by kafka-reassign-partitions.sh
|
|
388
|
+
# Creates Kafka-compatible reassignment plan with version and partitions data
|
|
389
|
+
# @return [void]
|
|
390
|
+
def generate_reassignment_json
|
|
391
|
+
partitions_data = @partitions_assignment.map do |partition_id, replica_broker_ids|
|
|
392
|
+
{
|
|
393
|
+
topic: @topic,
|
|
394
|
+
partition: partition_id,
|
|
395
|
+
replicas: replica_broker_ids
|
|
396
|
+
}
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
reassignment_data = {
|
|
400
|
+
version: 1,
|
|
401
|
+
partitions: partitions_data
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
@reassignment_json = JSON.pretty_generate(reassignment_data)
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
# Generates command templates for executing the reassignment plan
|
|
408
|
+
# Builds generate, execute, and verify command templates with placeholders
|
|
409
|
+
# @return [void]
|
|
410
|
+
def generate_execution_commands
|
|
411
|
+
@execution_commands = {
|
|
412
|
+
generate: build_generate_command,
|
|
413
|
+
execute: build_execute_command,
|
|
414
|
+
verify: build_verify_command
|
|
415
|
+
}
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
# Builds the kafka-reassign-partitions.sh command for generating reassignment plan
|
|
419
|
+
# @return [String] command template with placeholder for broker addresses
|
|
420
|
+
def build_generate_command
|
|
421
|
+
'kafka-reassign-partitions.sh --bootstrap-server <KAFKA_BROKERS> ' \
|
|
422
|
+
'--reassignment-json-file reassignment.json --generate'
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
# Builds the kafka-reassign-partitions.sh command for executing reassignment
|
|
426
|
+
# @return [String] command template with placeholder for broker addresses
|
|
427
|
+
def build_execute_command
|
|
428
|
+
'kafka-reassign-partitions.sh --bootstrap-server <KAFKA_BROKERS> ' \
|
|
429
|
+
'--reassignment-json-file reassignment.json --execute'
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
# Builds the kafka-reassign-partitions.sh command for verifying reassignment progress
|
|
433
|
+
# @return [String] command template with placeholder for broker addresses
|
|
434
|
+
def build_verify_command
|
|
435
|
+
'kafka-reassign-partitions.sh --bootstrap-server <KAFKA_BROKERS> ' \
|
|
436
|
+
'--reassignment-json-file reassignment.json --verify'
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
# Generates detailed step-by-step instructions for executing the reassignment
|
|
440
|
+
# Creates human-readable guide with commands and important safety notes
|
|
441
|
+
# @return [void]
|
|
442
|
+
def generate_steps
|
|
443
|
+
@steps = [
|
|
444
|
+
"1. Export the reassignment JSON using: plan.export_to_file('reassignment.json')",
|
|
445
|
+
"2. Validate the plan (optional): #{@execution_commands[:generate]}",
|
|
446
|
+
"3. Execute the reassignment: #{@execution_commands[:execute]}",
|
|
447
|
+
"4. Monitor progress: #{@execution_commands[:verify]}",
|
|
448
|
+
'5. Verify completion by checking topic metadata',
|
|
449
|
+
'',
|
|
450
|
+
'IMPORTANT NOTES:',
|
|
451
|
+
'- Replace <KAFKA_BROKERS> with your actual Kafka broker addresses',
|
|
452
|
+
'- The reassignment process may take time depending on data size',
|
|
453
|
+
'- Monitor disk space and network I/O during reassignment',
|
|
454
|
+
'- Consider running during low-traffic periods',
|
|
455
|
+
'- For large topics, consider throttling replica transfer rate',
|
|
456
|
+
'- Ensure sufficient disk space on target brokers before starting',
|
|
457
|
+
'- Keep monitoring until all replicas are in-sync (ISR)'
|
|
458
|
+
]
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
end
|
|
462
|
+
end
|
data/lib/karafka/admin.rb
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative 'admin/consumer_groups'
|
|
4
|
-
|
|
5
3
|
module Karafka
|
|
6
4
|
# Admin actions that we can perform via Karafka on our Kafka cluster
|
|
7
5
|
#
|
|
@@ -139,6 +137,53 @@ module Karafka
|
|
|
139
137
|
)
|
|
140
138
|
end
|
|
141
139
|
|
|
140
|
+
# Plans a replication factor increase for a topic that can be used with Kafka's
|
|
141
|
+
# reassignment tools. Since librdkafka does not support increasing replication factor
|
|
142
|
+
# directly, this method generates the necessary JSON and commands for manual execution.
|
|
143
|
+
#
|
|
144
|
+
# @param topic [String] name of the topic to plan replication for
|
|
145
|
+
# @param replication_factor [Integer] target replication factor (must be higher than current)
|
|
146
|
+
# @param brokers [Hash{Integer => Array<Integer>}] optional manual broker assignments
|
|
147
|
+
# per partition. Keys are partition IDs, values are arrays of broker IDs. If not provided,
|
|
148
|
+
# assignments distribution will happen automatically.
|
|
149
|
+
# @return [Replication] plan object with JSON, commands, and instructions
|
|
150
|
+
#
|
|
151
|
+
# @example Plan replication increase with automatic broker distribution
|
|
152
|
+
# plan = Karafka::Admin.plan_topic_replication(topic: 'events', replication_factor: 3)
|
|
153
|
+
#
|
|
154
|
+
# # Review the plan
|
|
155
|
+
# puts plan.summary
|
|
156
|
+
#
|
|
157
|
+
# # Export JSON for Kafka's reassignment tools
|
|
158
|
+
# plan.export_to_file('reassignment.json')
|
|
159
|
+
#
|
|
160
|
+
# # Execute the plan (replace <KAFKA_BROKERS> with actual brokers)
|
|
161
|
+
# system(plan.execution_commands[:execute].gsub('<KAFKA_BROKERS>', 'localhost:9092'))
|
|
162
|
+
#
|
|
163
|
+
# @example Plan replication with manual broker placement - specify brokers
|
|
164
|
+
# plan = Karafka::Admin.plan_topic_replication(
|
|
165
|
+
# topic: 'events',
|
|
166
|
+
# replication_factor: 3,
|
|
167
|
+
# brokers: {
|
|
168
|
+
# 0 => [1, 2, 4], # Partition 0 on brokers 1, 2, 4
|
|
169
|
+
# 1 => [2, 3, 4], # Partition 1 on brokers 2, 3, 4
|
|
170
|
+
# 2 => [1, 3, 5] # Partition 2 on brokers 1, 3, 5
|
|
171
|
+
# }
|
|
172
|
+
# )
|
|
173
|
+
#
|
|
174
|
+
# # The plan will use your exact broker specifications
|
|
175
|
+
# puts plan.partitions_assignment
|
|
176
|
+
# # => { 0=>[1, 2, 4], 1=>[2, 3, 4], 2=>[1, 3, 5] }
|
|
177
|
+
#
|
|
178
|
+
# @see Replication.plan for more details
|
|
179
|
+
def plan_topic_replication(topic:, replication_factor:, brokers: nil)
|
|
180
|
+
Replication.plan(
|
|
181
|
+
topic: topic,
|
|
182
|
+
to: replication_factor,
|
|
183
|
+
brokers: brokers
|
|
184
|
+
)
|
|
185
|
+
end
|
|
186
|
+
|
|
142
187
|
# @return [Rdkafka::Metadata] cluster metadata info
|
|
143
188
|
def cluster_info
|
|
144
189
|
with_admin(&:metadata)
|
|
@@ -44,11 +44,15 @@ module Karafka
|
|
|
44
44
|
|
|
45
45
|
# Types of errors originating from user code in the consumer flow
|
|
46
46
|
USER_CONSUMER_ERROR_TYPES = %w[
|
|
47
|
+
consumer.initialized.error
|
|
48
|
+
consumer.wrap.error
|
|
47
49
|
consumer.consume.error
|
|
48
50
|
consumer.revoked.error
|
|
51
|
+
consumer.idle.error
|
|
49
52
|
consumer.shutdown.error
|
|
50
53
|
consumer.tick.error
|
|
51
54
|
consumer.eofed.error
|
|
55
|
+
consumer.after_consume.error
|
|
52
56
|
].freeze
|
|
53
57
|
|
|
54
58
|
private_constant :USER_CONSUMER_ERROR_TYPES
|
|
@@ -90,42 +90,58 @@ module Karafka
|
|
|
90
90
|
client.active_span&.set_error(error)
|
|
91
91
|
|
|
92
92
|
case event[:type]
|
|
93
|
+
when 'consumer.initialized.error'
|
|
94
|
+
error "Consumer initialized error: #{error}"
|
|
95
|
+
when 'consumer.wrap.error'
|
|
96
|
+
error "Consumer wrap failed due to an error: #{error}"
|
|
93
97
|
when 'consumer.consume.error'
|
|
94
98
|
error "Consumer consuming error: #{error}"
|
|
95
99
|
when 'consumer.revoked.error'
|
|
96
100
|
error "Consumer on revoked failed due to an error: #{error}"
|
|
97
|
-
when 'consumer.
|
|
98
|
-
error "Consumer
|
|
99
|
-
when 'consumer.before_consume.error'
|
|
100
|
-
error "Consumer before consume failed due to an error: #{error}"
|
|
101
|
-
when 'consumer.after_consume.error'
|
|
102
|
-
error "Consumer after consume failed due to an error: #{error}"
|
|
101
|
+
when 'consumer.idle.error'
|
|
102
|
+
error "Consumer idle failed due to an error: #{error}"
|
|
103
103
|
when 'consumer.shutdown.error'
|
|
104
104
|
error "Consumer on shutdown failed due to an error: #{error}"
|
|
105
105
|
when 'consumer.tick.error'
|
|
106
|
-
error "Consumer tick failed due to an error: #{error}"
|
|
106
|
+
error "Consumer on tick failed due to an error: #{error}"
|
|
107
107
|
when 'consumer.eofed.error'
|
|
108
|
-
error "Consumer eofed failed due to an error: #{error}"
|
|
108
|
+
error "Consumer on eofed failed due to an error: #{error}"
|
|
109
|
+
when 'consumer.after_consume.error'
|
|
110
|
+
error "Consumer on after_consume failed due to an error: #{error}"
|
|
109
111
|
when 'worker.process.error'
|
|
110
112
|
fatal "Worker processing failed due to an error: #{error}"
|
|
111
113
|
when 'connection.listener.fetch_loop.error'
|
|
112
114
|
error "Listener fetch loop error: #{error}"
|
|
115
|
+
when 'swarm.supervisor.error'
|
|
116
|
+
fatal "Swarm supervisor crashed due to an error: #{error}"
|
|
113
117
|
when 'runner.call.error'
|
|
114
118
|
fatal "Runner crashed due to an error: #{error}"
|
|
115
119
|
when 'app.stopping.error'
|
|
116
120
|
error 'Forceful Karafka server stop'
|
|
117
|
-
when '
|
|
118
|
-
|
|
121
|
+
when 'app.forceful_stopping.error'
|
|
122
|
+
error "Forceful shutdown error occurred: #{error}"
|
|
119
123
|
when 'librdkafka.error'
|
|
120
124
|
error "librdkafka internal error occurred: #{error}"
|
|
121
|
-
|
|
122
|
-
|
|
125
|
+
when 'callbacks.statistics.error'
|
|
126
|
+
error "callbacks.statistics processing failed due to an error: #{error}"
|
|
127
|
+
when 'callbacks.error.error'
|
|
128
|
+
error "callbacks.error processing failed due to an error: #{error}"
|
|
129
|
+
# Those will only occur when retries in the client fail and when they did not stop
|
|
130
|
+
# after back-offs
|
|
123
131
|
when 'connection.client.poll.error'
|
|
124
132
|
error "Data polling error occurred: #{error}"
|
|
133
|
+
when 'connection.client.rebalance_callback.error'
|
|
134
|
+
error "Rebalance callback error occurred: #{error}"
|
|
135
|
+
when 'connection.client.unsubscribe.error'
|
|
136
|
+
error "Client unsubscribe error occurred: #{error}"
|
|
137
|
+
when 'parallel_segments.reducer.error'
|
|
138
|
+
error "Parallel segments reducer error occurred: #{error}"
|
|
139
|
+
when 'parallel_segments.partitioner.error'
|
|
140
|
+
error "Parallel segments partitioner error occurred: #{error}"
|
|
141
|
+
when 'virtual_partitions.partitioner.error'
|
|
142
|
+
error "Virtual partitions partitioner error occurred: #{error}"
|
|
125
143
|
else
|
|
126
|
-
|
|
127
|
-
# This should never happen. Please contact the maintainers
|
|
128
|
-
raise Errors::UnsupportedCaseError, event
|
|
144
|
+
error "#{event[:type]} error occurred: #{error}"
|
|
129
145
|
end
|
|
130
146
|
|
|
131
147
|
pop_tags
|
data/lib/karafka/licenser.rb
CHANGED
|
@@ -46,7 +46,7 @@ module Karafka
|
|
|
46
46
|
|
|
47
47
|
# We gsub and strip in case someone copy-pasted it as a multi line string
|
|
48
48
|
formatted_token = license_config.token.strip.delete("\n").delete(' ')
|
|
49
|
-
decoded_token =
|
|
49
|
+
decoded_token = formatted_token.unpack1('m') # decode from base64
|
|
50
50
|
|
|
51
51
|
begin
|
|
52
52
|
data = public_key.public_decrypt(decoded_token)
|
|
@@ -64,6 +64,38 @@ module Karafka
|
|
|
64
64
|
@messages_array.dup
|
|
65
65
|
end
|
|
66
66
|
|
|
67
|
+
# Returns the underlying messages array directly without duplication.
|
|
68
|
+
#
|
|
69
|
+
# This method exists to provide Karafka internals with direct access to the messages array,
|
|
70
|
+
# bypassing any monkey patches that external libraries may apply to enumerable methods.
|
|
71
|
+
#
|
|
72
|
+
# ## Why this method exists
|
|
73
|
+
#
|
|
74
|
+
# External instrumentation libraries like DataDog's `dd-trace-rb` patch the `#each` method
|
|
75
|
+
# on this class to create tracing spans around message iteration. While this is desirable
|
|
76
|
+
# for user code (to trace message processing), it causes problems when Karafka's internal
|
|
77
|
+
# infrastructure iterates over messages for housekeeping tasks (offset tracking,
|
|
78
|
+
# deserialization, etc.) - creating empty/unwanted spans.
|
|
79
|
+
#
|
|
80
|
+
# By using `raw.map` or `raw.each` instead of `map` or `each` directly, internal code
|
|
81
|
+
# bypasses the patched `#each` method since it operates on the raw Array, not this class.
|
|
82
|
+
#
|
|
83
|
+
# ## Usage
|
|
84
|
+
#
|
|
85
|
+
# This method should ONLY be used by Karafka internals. User-facing code (consumers,
|
|
86
|
+
# ActiveJob processors, etc.) should use regular `#each`/`#map` so that instrumentation
|
|
87
|
+
# libraries can properly trace message processing.
|
|
88
|
+
#
|
|
89
|
+
# @return [Array<Karafka::Messages::Message>] the underlying messages array (not a copy)
|
|
90
|
+
#
|
|
91
|
+
# @note This returns the actual internal array, not a copy. Do not modify it.
|
|
92
|
+
# @see https://github.com/karafka/karafka/issues/2939
|
|
93
|
+
#
|
|
94
|
+
# @private
|
|
95
|
+
def raw
|
|
96
|
+
@messages_array
|
|
97
|
+
end
|
|
98
|
+
|
|
67
99
|
alias count size
|
|
68
100
|
end
|
|
69
101
|
end
|
|
@@ -13,7 +13,7 @@ module Karafka
|
|
|
13
13
|
# This module is prepended to Karafka::Messages::Messages to add cleaning functionality.
|
|
14
14
|
# The implementation calls super() to maintain compatibility with other libraries that
|
|
15
15
|
# also prepend modules to modify the #each method (e.g., DataDog tracing).
|
|
16
|
-
#
|
|
16
|
+
# @see https://github.com/DataDog/dd-trace-rb/issues/4867
|
|
17
17
|
module Messages
|
|
18
18
|
# @param clean [Boolean] do we want to clean each message after we're done working with
|
|
19
19
|
# it.
|