karafka 2.5.3 → 2.5.4.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/config/locales/errors.yml +14 -0
  4. data/karafka.gemspec +13 -2
  5. data/lib/karafka/admin/contracts/replication.rb +149 -0
  6. data/lib/karafka/admin/replication.rb +462 -0
  7. data/lib/karafka/admin.rb +47 -2
  8. data/lib/karafka/instrumentation/logger_listener.rb +0 -2
  9. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +4 -0
  10. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +31 -15
  11. data/lib/karafka/licenser.rb +1 -1
  12. data/lib/karafka/messages/messages.rb +32 -0
  13. data/lib/karafka/pro/cleaner/messages/messages.rb +1 -1
  14. data/lib/karafka/pro/processing/jobs_queue.rb +0 -2
  15. data/lib/karafka/pro/processing/strategies/dlq/default.rb +1 -1
  16. data/lib/karafka/pro/processing/strategies/vp/default.rb +1 -1
  17. data/lib/karafka/processing/strategies/dlq.rb +1 -1
  18. data/lib/karafka/routing/consumer_group.rb +19 -1
  19. data/lib/karafka/routing/subscription_group.rb +1 -1
  20. data/lib/karafka/routing/subscription_groups_builder.rb +17 -2
  21. data/lib/karafka/version.rb +1 -1
  22. data/lib/karafka.rb +0 -1
  23. metadata +3 -62
  24. data/.coditsu/ci.yml +0 -3
  25. data/.console_irbrc +0 -11
  26. data/.github/CODEOWNERS +0 -3
  27. data/.github/FUNDING.yml +0 -1
  28. data/.github/ISSUE_TEMPLATE/bug_report.md +0 -43
  29. data/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  30. data/.github/workflows/ci_linux_ubuntu_x86_64_gnu.yml +0 -296
  31. data/.github/workflows/ci_macos_arm64.yml +0 -151
  32. data/.github/workflows/push.yml +0 -35
  33. data/.github/workflows/trigger-wiki-refresh.yml +0 -30
  34. data/.github/workflows/verify-action-pins.yml +0 -16
  35. data/.gitignore +0 -69
  36. data/.rspec +0 -7
  37. data/.ruby-gemset +0 -1
  38. data/.ruby-version +0 -1
  39. data/.yard-lint.yml +0 -174
  40. data/CODE_OF_CONDUCT.md +0 -46
  41. data/CONTRIBUTING.md +0 -32
  42. data/Gemfile +0 -29
  43. data/Gemfile.lock +0 -178
  44. data/Rakefile +0 -4
  45. data/SECURITY.md +0 -23
  46. data/bin/benchmarks +0 -99
  47. data/bin/clean_kafka +0 -43
  48. data/bin/create_token +0 -22
  49. data/bin/integrations +0 -341
  50. data/bin/record_rss +0 -50
  51. data/bin/rspecs +0 -26
  52. data/bin/scenario +0 -29
  53. data/bin/stress_many +0 -13
  54. data/bin/stress_one +0 -13
  55. data/bin/verify_kafka_warnings +0 -36
  56. data/bin/verify_license_integrity +0 -37
  57. data/bin/verify_topics_naming +0 -27
  58. data/bin/wait_for_kafka +0 -24
  59. data/docker-compose.yml +0 -25
  60. data/examples/payloads/avro/.gitkeep +0 -0
  61. data/examples/payloads/json/sample_set_01/enrollment_event.json +0 -579
  62. data/examples/payloads/json/sample_set_01/ingestion_event.json +0 -30
  63. data/examples/payloads/json/sample_set_01/transaction_event.json +0 -17
  64. data/examples/payloads/json/sample_set_01/user_event.json +0 -11
  65. data/examples/payloads/json/sample_set_02/download.json +0 -191
  66. data/examples/payloads/json/sample_set_03/event_type_1.json +0 -18
  67. data/examples/payloads/json/sample_set_03/event_type_2.json +0 -263
  68. data/examples/payloads/json/sample_set_03/event_type_3.json +0 -41
  69. data/log/.gitkeep +0 -0
  70. data/renovate.json +0 -21
@@ -0,0 +1,462 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ class Admin
5
+ # Replication administration operations helper
6
+ #
7
+ # Generates partition reassignment plans for increasing topic replication factor.
8
+ # Since librdkafka does not support changing replication factors directly, this class
9
+ # generates the necessary JSON configuration that can be executed using Kafka's Java-based
10
+ # reassignment tools.
11
+ #
12
+ # ## Important Considerations
13
+ #
14
+ # Replication factor changes are among the most resource-intensive operations in Kafka.
15
+ #
16
+ # ## Prerequisites
17
+ #
18
+ # 1. **Sufficient Disk Space**: Ensure target brokers have enough space for new replicas
19
+ # 2. **Network Capacity**: Verify network can handle additional replication traffic
20
+ # 3. **Broker Count**: Cannot exceed the number of available brokers
21
+ # 4. **Java Tools**: Kafka's reassignment tools must be available
22
+ #
23
+ # ## Best Practices
24
+ #
25
+ # - **Test First**: Always test on small topics or in staging environments
26
+ # - **Monitor Resources**: Watch disk space, network, and CPU during replication
27
+ # - **Incremental Changes**: Increase replication factor by 1 at a time for large topics
28
+ # - **Off-Peak Hours**: Execute during low-traffic periods to minimize impact
29
+ #
30
+ # @example Basic usage - increase replication factor
31
+ # # Generate plan to increase replication from 2 to 3
32
+ # plan = Karafka::Admin::Replication.plan(topic: 'events', to: 3)
33
+ #
34
+ # # Review what will happen
35
+ # puts plan.summary
36
+ #
37
+ # # Export for execution
38
+ # plan.export_to_file('/tmp/increase_replication.json')
39
+ #
40
+ # # Execute with Kafka tools (outside of Ruby)
41
+ # # kafka-reassign-partitions.sh --bootstrap-server localhost:9092 \
42
+ # # --reassignment-json-file /tmp/increase_replication.json --execute
43
+ #
44
+ # @example Rebalancing replicas across brokers
45
+ # # Rebalance existing replicas without changing replication factor
46
+ # plan = Karafka::Admin::Replication.rebalance(topic: 'events')
47
+ # plan.export_to_file('/tmp/rebalance.json')
48
+ #
49
+ # @note This class only generates plans - actual execution requires Kafka's Java tools
50
+ # @note Always verify broker capacity before increasing replication
51
+ class Replication < Admin
52
+ attr_reader(
53
+ :topic,
54
+ :current_replication_factor,
55
+ :target_replication_factor,
56
+ :partitions_assignment,
57
+ :reassignment_json,
58
+ :execution_commands,
59
+ :steps
60
+ )
61
+
62
+ # Builds the replication plan
63
+ #
64
+ # @param topic [String] topic name
65
+ # @param current_replication_factor [Integer] current replication factor
66
+ # @param target_replication_factor [Integer] target replication factor
67
+ # @param partitions_assignment [Hash] partition to brokers assignment
68
+ # @param cluster_info [Hash] broker information
69
+ def initialize(
70
+ topic:,
71
+ current_replication_factor:,
72
+ target_replication_factor:,
73
+ partitions_assignment:,
74
+ cluster_info:
75
+ )
76
+ super()
77
+
78
+ @topic = topic
79
+ @current_replication_factor = current_replication_factor
80
+ @target_replication_factor = target_replication_factor
81
+ @partitions_assignment = partitions_assignment
82
+ @cluster_info = cluster_info
83
+
84
+ generate_reassignment_json
85
+ generate_execution_commands
86
+ generate_steps
87
+
88
+ freeze
89
+ end
90
+
91
+ # Export the reassignment JSON to a file
92
+ # @param file_path [String] path where to save the JSON file
93
+ def export_to_file(file_path)
94
+ File.write(file_path, @reassignment_json)
95
+ file_path
96
+ end
97
+
98
+ # @return [String] human-readable summary of the plan
99
+ def summary
100
+ broker_count = @cluster_info[:brokers].size
101
+ change = @target_replication_factor - @current_replication_factor
102
+ broker_nodes = @cluster_info[:brokers].map do |broker_info|
103
+ broker_info[:node_id]
104
+ end.join(', ')
105
+
106
+ <<~SUMMARY
107
+ Replication Increase Plan for Topic: #{@topic}
108
+ =====================================
109
+ Current replication factor: #{@current_replication_factor}
110
+ Target replication factor: #{@target_replication_factor}
111
+ Total partitions: #{@partitions_assignment.size}
112
+ Available brokers: #{broker_count} (#{broker_nodes})
113
+
114
+ This plan will increase replication by adding #{change} replica(s) to each partition.
115
+ SUMMARY
116
+ end
117
+
118
+ class << self
119
+ # Plans replication factor increase for a given topic
120
+ #
121
+ # Generates a detailed reassignment plan that preserves existing replica assignments
122
+ # while adding new replicas to meet the target replication factor. The plan uses
123
+ # round-robin distribution to balance new replicas across available brokers.
124
+ #
125
+ # @param topic [String] name of the topic
126
+ # @param to [Integer] target replication factor (must be higher than current)
127
+ # @param brokers [Hash{Integer => Array<Integer>}] optional manual broker assignments
128
+ # per partition. Keys are partition IDs, values are arrays of broker IDs. If not provided
129
+ # automatic distribution (usually fine) will be used
130
+ # @return [Replication] plan object containing JSON, commands, and instructions
131
+ #
132
+ # @raise [ArgumentError] if target replication factor is not higher than current
133
+ # @raise [ArgumentError] if target replication factor exceeds available broker count
134
+ # @raise [Rdkafka::RdkafkaError] if topic metadata cannot be fetched
135
+ #
136
+ # @example Increase replication from 1 to 3 with automatic distribution
137
+ # plan = Replication.plan(topic: 'events', to: 3)
138
+ #
139
+ # # Inspect the plan
140
+ # puts plan.summary
141
+ # puts plan.reassignment_json
142
+ #
143
+ # # Check which brokers will get new replicas
144
+ # plan.partitions_assignment.each do |partition_id, broker_ids|
145
+ # puts "Partition #{partition_id}: #{broker_ids.join(', ')}"
146
+ # end
147
+ #
148
+ # # Save and execute
149
+ # plan.export_to_file('increase_rf.json')
150
+ #
151
+ # @example Increase replication with manual broker placement
152
+ # # Specify exactly which brokers should host each partition
153
+ # plan = Replication.plan(
154
+ # topic: 'events',
155
+ # to: 3,
156
+ # brokers: {
157
+ # 0 => [1, 2, 4], # Partition 0 on brokers 1, 2, 4
158
+ # 1 => [2, 3, 4], # Partition 1 on brokers 2, 3, 4
159
+ # 2 => [1, 3, 5] # Partition 2 on brokers 1, 3, 5
160
+ # }
161
+ # )
162
+ #
163
+ # # The plan will use your exact broker specifications
164
+ # puts plan.partitions_assignment
165
+ # # => {0=>[1, 2, 4], 1=>[2, 3, 4], 2=>[1, 3, 5]}
166
+ #
167
+ # @note When using manual placement, ensure all partitions are specified
168
+ # @note Manual placement overrides automatic distribution entirely
169
+ def plan(topic:, to:, brokers: nil)
170
+ topic_info = fetch_topic_info(topic)
171
+ first_partition = topic_info[:partitions].first
172
+ current_rf = first_partition[:replica_count] || first_partition[:replicas]&.size
173
+ cluster_info = fetch_cluster_info
174
+
175
+ # Use contract for validation
176
+ validation_data = {
177
+ topic: topic,
178
+ to: to,
179
+ brokers: brokers,
180
+ current_rf: current_rf,
181
+ broker_count: cluster_info[:brokers].size,
182
+ topic_info: topic_info,
183
+ cluster_info: cluster_info
184
+ }
185
+
186
+ Contracts::Replication.new.validate!(validation_data)
187
+
188
+ partitions_assignment = brokers || generate_partitions_assignment(
189
+ topic_info: topic_info,
190
+ target_replication_factor: to,
191
+ cluster_info: cluster_info
192
+ )
193
+
194
+ new(
195
+ topic: topic,
196
+ current_replication_factor: current_rf,
197
+ target_replication_factor: to,
198
+ partitions_assignment: partitions_assignment,
199
+ cluster_info: cluster_info
200
+ )
201
+ end
202
+
203
+ # Plans rebalancing of existing replicas across brokers
204
+ #
205
+ # Generates a reassignment plan that redistributes existing replicas more evenly
206
+ # across the cluster without changing the replication factor. Useful for:
207
+ #
208
+ # - Balancing load after adding new brokers to the cluster
209
+ # - Redistributing replicas after broker failures and recovery
210
+ # - Optimizing replica placement for better resource utilization
211
+ # - Moving replicas away from overloaded brokers
212
+ #
213
+ # @param topic [String] name of the topic to rebalance
214
+ # @return [Replication] rebalancing plan
215
+ #
216
+ # @example Rebalance after adding new brokers
217
+ # # After adding brokers 4 and 5 to a 3-broker cluster
218
+ # plan = Replication.rebalance(topic: 'events')
219
+ #
220
+ # # Review how replicas will be redistributed
221
+ # puts plan.summary
222
+ #
223
+ # # Execute if distribution looks good
224
+ # plan.export_to_file('rebalance.json')
225
+ # # Then run: kafka-reassign-partitions.sh --execute ...
226
+ #
227
+ # @note This maintains the same replication factor
228
+ # @note All data will be copied to new locations during rebalancing
229
+ # @note Consider impact on cluster resources during rebalancing
230
+ def rebalance(topic:)
231
+ topic_info = fetch_topic_info(topic)
232
+ first_partition = topic_info[:partitions].first
233
+ current_rf = first_partition[:replica_count] || first_partition[:replicas]&.size
234
+ cluster_info = fetch_cluster_info
235
+
236
+ partitions_assignment = generate_partitions_assignment(
237
+ topic_info: topic_info,
238
+ target_replication_factor: current_rf,
239
+ cluster_info: cluster_info,
240
+ rebalance_only: true
241
+ )
242
+
243
+ new(
244
+ topic: topic,
245
+ current_replication_factor: current_rf,
246
+ target_replication_factor: current_rf,
247
+ partitions_assignment: partitions_assignment,
248
+ cluster_info: cluster_info
249
+ )
250
+ end
251
+
252
+ private
253
+
254
+ # Fetches topic metadata including partitions and replica information
255
+ # @param topic [String] name of the topic
256
+ # @return [Hash] topic information with partitions metadata
257
+ def fetch_topic_info(topic)
258
+ Topics.info(topic)
259
+ end
260
+
261
+ # Fetches cluster broker information from Kafka metadata
262
+ # @return [Hash] cluster information with broker details (node_id, host:port)
263
+ def fetch_cluster_info
264
+ cluster_metadata = cluster_info
265
+ {
266
+ brokers: cluster_metadata.brokers.map do |broker|
267
+ # Handle both hash and object formats from metadata
268
+ # rdkafka returns hashes with broker_id, broker_name, broker_port
269
+ if broker.is_a?(Hash)
270
+ node_id = broker[:broker_id] || broker[:node_id]
271
+ host = broker[:broker_name] || broker[:host]
272
+ port = broker[:broker_port] || broker[:port]
273
+ { node_id: node_id, host: "#{host}:#{port}" }
274
+ else
275
+ { node_id: broker.node_id, host: "#{broker.host}:#{broker.port}" }
276
+ end
277
+ end
278
+ }
279
+ end
280
+
281
+ # Generates partition-to-broker assignments for replication changes
282
+ # Handles both replication factor increases and rebalancing scenarios
283
+ # @param topic_info [Hash] topic metadata with partition information
284
+ # @param target_replication_factor [Integer] desired replication factor
285
+ # @param cluster_info [Hash] cluster metadata with broker information
286
+ # @param rebalance_only [Boolean] true for rebalancing, false for increase
287
+ # @return [Hash{Integer => Array<Integer>}] assignments (partition_id => broker_ids)
288
+ def generate_partitions_assignment(
289
+ topic_info:,
290
+ target_replication_factor:,
291
+ cluster_info:,
292
+ rebalance_only: false
293
+ )
294
+ partitions = topic_info[:partitions]
295
+ brokers = cluster_info[:brokers].map { |broker_info| broker_info[:node_id] }.sort
296
+ assignments = {}
297
+
298
+ partitions.each do |partition_info|
299
+ partition_id = partition_info[:partition_id]
300
+
301
+ # Handle both :replicas (array of objects) and :replica_brokers (array of IDs)
302
+ replicas = partition_info[:replicas] || partition_info[:replica_brokers] || []
303
+ current_replicas = if replicas.first.respond_to?(:node_id)
304
+ replicas.map(&:node_id).sort
305
+ else
306
+ replicas.sort
307
+ end
308
+
309
+ if rebalance_only
310
+ # For rebalancing, redistribute current replicas optimally
311
+ new_replicas = select_brokers_for_partition(
312
+ partition_id: partition_id,
313
+ brokers: brokers,
314
+ replica_count: target_replication_factor,
315
+ avoid_brokers: []
316
+ )
317
+ else
318
+ # For replication increase, keep existing replicas and add new ones
319
+ additional_needed = target_replication_factor - current_replicas.size
320
+ available_brokers = brokers - current_replicas
321
+
322
+ additional_replicas = select_additional_brokers(
323
+ available_brokers: available_brokers,
324
+ needed_count: additional_needed,
325
+ partition_id: partition_id
326
+ )
327
+
328
+ new_replicas = (current_replicas + additional_replicas).sort
329
+ end
330
+
331
+ assignments[partition_id] = new_replicas
332
+ end
333
+
334
+ assignments
335
+ end
336
+
337
+ # Selects brokers for a partition using round-robin distribution
338
+ # Distributes replicas evenly across available brokers
339
+ # @param partition_id [Integer] partition identifier for offset calculation
340
+ # @param brokers [Array<Integer>] available broker node IDs
341
+ # @param replica_count [Integer] number of replicas needed
342
+ # @param avoid_brokers [Array<Integer>] broker IDs to exclude from selection
343
+ # @return [Array<Integer>] sorted array of selected broker node IDs
344
+ def select_brokers_for_partition(
345
+ partition_id:,
346
+ brokers:,
347
+ replica_count:,
348
+ avoid_brokers: []
349
+ )
350
+ available_brokers = brokers - avoid_brokers
351
+
352
+ # Simple round-robin selection starting from a different offset per partition
353
+ # This helps distribute replicas more evenly across brokers
354
+ start_index = partition_id % available_brokers.size
355
+ selected = []
356
+
357
+ replica_count.times do |replica_index|
358
+ broker_index = (start_index + replica_index) % available_brokers.size
359
+ selected << available_brokers[broker_index]
360
+ end
361
+
362
+ selected.sort
363
+ end
364
+
365
+ # Selects additional brokers for increasing replication factor
366
+ # Uses round-robin selection for even distribution across available brokers
367
+ # @param available_brokers [Array<Integer>] broker IDs available for new replicas
368
+ # @param needed_count [Integer] number of additional brokers needed
369
+ # @param partition_id [Integer] partition identifier for offset calculation
370
+ # @return [Array<Integer>] sorted array of selected broker node IDs
371
+ def select_additional_brokers(available_brokers:, needed_count:, partition_id:)
372
+ # Use round-robin starting from partition-specific offset
373
+ start_index = partition_id % available_brokers.size
374
+ selected = []
375
+
376
+ needed_count.times do |additional_replica_index|
377
+ broker_index = (start_index + additional_replica_index) % available_brokers.size
378
+ selected << available_brokers[broker_index]
379
+ end
380
+
381
+ selected.sort
382
+ end
383
+ end
384
+
385
+ private
386
+
387
+ # Generates the JSON structure required by kafka-reassign-partitions.sh
388
+ # Creates Kafka-compatible reassignment plan with version and partitions data
389
+ # @return [void]
390
+ def generate_reassignment_json
391
+ partitions_data = @partitions_assignment.map do |partition_id, replica_broker_ids|
392
+ {
393
+ topic: @topic,
394
+ partition: partition_id,
395
+ replicas: replica_broker_ids
396
+ }
397
+ end
398
+
399
+ reassignment_data = {
400
+ version: 1,
401
+ partitions: partitions_data
402
+ }
403
+
404
+ @reassignment_json = JSON.pretty_generate(reassignment_data)
405
+ end
406
+
407
+ # Generates command templates for executing the reassignment plan
408
+ # Builds generate, execute, and verify command templates with placeholders
409
+ # @return [void]
410
+ def generate_execution_commands
411
+ @execution_commands = {
412
+ generate: build_generate_command,
413
+ execute: build_execute_command,
414
+ verify: build_verify_command
415
+ }
416
+ end
417
+
418
+ # Builds the kafka-reassign-partitions.sh command for generating reassignment plan
419
+ # @return [String] command template with placeholder for broker addresses
420
+ def build_generate_command
421
+ 'kafka-reassign-partitions.sh --bootstrap-server <KAFKA_BROKERS> ' \
422
+ '--reassignment-json-file reassignment.json --generate'
423
+ end
424
+
425
+ # Builds the kafka-reassign-partitions.sh command for executing reassignment
426
+ # @return [String] command template with placeholder for broker addresses
427
+ def build_execute_command
428
+ 'kafka-reassign-partitions.sh --bootstrap-server <KAFKA_BROKERS> ' \
429
+ '--reassignment-json-file reassignment.json --execute'
430
+ end
431
+
432
+ # Builds the kafka-reassign-partitions.sh command for verifying reassignment progress
433
+ # @return [String] command template with placeholder for broker addresses
434
+ def build_verify_command
435
+ 'kafka-reassign-partitions.sh --bootstrap-server <KAFKA_BROKERS> ' \
436
+ '--reassignment-json-file reassignment.json --verify'
437
+ end
438
+
439
+ # Generates detailed step-by-step instructions for executing the reassignment
440
+ # Creates human-readable guide with commands and important safety notes
441
+ # @return [void]
442
+ def generate_steps
443
+ @steps = [
444
+ "1. Export the reassignment JSON using: plan.export_to_file('reassignment.json')",
445
+ "2. Validate the plan (optional): #{@execution_commands[:generate]}",
446
+ "3. Execute the reassignment: #{@execution_commands[:execute]}",
447
+ "4. Monitor progress: #{@execution_commands[:verify]}",
448
+ '5. Verify completion by checking topic metadata',
449
+ '',
450
+ 'IMPORTANT NOTES:',
451
+ '- Replace <KAFKA_BROKERS> with your actual Kafka broker addresses',
452
+ '- The reassignment process may take time depending on data size',
453
+ '- Monitor disk space and network I/O during reassignment',
454
+ '- Consider running during low-traffic periods',
455
+ '- For large topics, consider throttling replica transfer rate',
456
+ '- Ensure sufficient disk space on target brokers before starting',
457
+ '- Keep monitoring until all replicas are in-sync (ISR)'
458
+ ]
459
+ end
460
+ end
461
+ end
462
+ end
data/lib/karafka/admin.rb CHANGED
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'admin/consumer_groups'
4
-
5
3
  module Karafka
6
4
  # Admin actions that we can perform via Karafka on our Kafka cluster
7
5
  #
@@ -139,6 +137,53 @@ module Karafka
139
137
  )
140
138
  end
141
139
 
140
+ # Plans a replication factor increase for a topic that can be used with Kafka's
141
+ # reassignment tools. Since librdkafka does not support increasing replication factor
142
+ # directly, this method generates the necessary JSON and commands for manual execution.
143
+ #
144
+ # @param topic [String] name of the topic to plan replication for
145
+ # @param replication_factor [Integer] target replication factor (must be higher than current)
146
+ # @param brokers [Hash{Integer => Array<Integer>}] optional manual broker assignments
147
+ # per partition. Keys are partition IDs, values are arrays of broker IDs. If not provided,
148
+ # assignments distribution will happen automatically.
149
+ # @return [Replication] plan object with JSON, commands, and instructions
150
+ #
151
+ # @example Plan replication increase with automatic broker distribution
152
+ # plan = Karafka::Admin.plan_topic_replication(topic: 'events', replication_factor: 3)
153
+ #
154
+ # # Review the plan
155
+ # puts plan.summary
156
+ #
157
+ # # Export JSON for Kafka's reassignment tools
158
+ # plan.export_to_file('reassignment.json')
159
+ #
160
+ # # Execute the plan (replace <KAFKA_BROKERS> with actual brokers)
161
+ # system(plan.execution_commands[:execute].gsub('<KAFKA_BROKERS>', 'localhost:9092'))
162
+ #
163
+ # @example Plan replication with manual broker placement - specify brokers
164
+ # plan = Karafka::Admin.plan_topic_replication(
165
+ # topic: 'events',
166
+ # replication_factor: 3,
167
+ # brokers: {
168
+ # 0 => [1, 2, 4], # Partition 0 on brokers 1, 2, 4
169
+ # 1 => [2, 3, 4], # Partition 1 on brokers 2, 3, 4
170
+ # 2 => [1, 3, 5] # Partition 2 on brokers 1, 3, 5
171
+ # }
172
+ # )
173
+ #
174
+ # # The plan will use your exact broker specifications
175
+ # puts plan.partitions_assignment
176
+ # # => { 0=>[1, 2, 4], 1=>[2, 3, 4], 2=>[1, 3, 5] }
177
+ #
178
+ # @see Replication.plan for more details
179
+ def plan_topic_replication(topic:, replication_factor:, brokers: nil)
180
+ Replication.plan(
181
+ topic: topic,
182
+ to: replication_factor,
183
+ brokers: brokers
184
+ )
185
+ end
186
+
142
187
  # @return [Rdkafka::Metadata] cluster metadata info
143
188
  def cluster_info
144
189
  with_admin(&:metadata)
@@ -24,8 +24,6 @@ module Karafka
24
24
  @log_polling = log_polling
25
25
  end
26
26
 
27
- #
28
- #
29
27
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
30
28
  def on_connection_listener_before_fetch_loop(event)
31
29
  listener_id = event[:caller].id
@@ -44,11 +44,15 @@ module Karafka
44
44
 
45
45
  # Types of errors originating from user code in the consumer flow
46
46
  USER_CONSUMER_ERROR_TYPES = %w[
47
+ consumer.initialized.error
48
+ consumer.wrap.error
47
49
  consumer.consume.error
48
50
  consumer.revoked.error
51
+ consumer.idle.error
49
52
  consumer.shutdown.error
50
53
  consumer.tick.error
51
54
  consumer.eofed.error
55
+ consumer.after_consume.error
52
56
  ].freeze
53
57
 
54
58
  private_constant :USER_CONSUMER_ERROR_TYPES
@@ -90,42 +90,58 @@ module Karafka
90
90
  client.active_span&.set_error(error)
91
91
 
92
92
  case event[:type]
93
+ when 'consumer.initialized.error'
94
+ error "Consumer initialized error: #{error}"
95
+ when 'consumer.wrap.error'
96
+ error "Consumer wrap failed due to an error: #{error}"
93
97
  when 'consumer.consume.error'
94
98
  error "Consumer consuming error: #{error}"
95
99
  when 'consumer.revoked.error'
96
100
  error "Consumer on revoked failed due to an error: #{error}"
97
- when 'consumer.before_schedule.error'
98
- error "Consumer before schedule failed due to an error: #{error}"
99
- when 'consumer.before_consume.error'
100
- error "Consumer before consume failed due to an error: #{error}"
101
- when 'consumer.after_consume.error'
102
- error "Consumer after consume failed due to an error: #{error}"
101
+ when 'consumer.idle.error'
102
+ error "Consumer idle failed due to an error: #{error}"
103
103
  when 'consumer.shutdown.error'
104
104
  error "Consumer on shutdown failed due to an error: #{error}"
105
105
  when 'consumer.tick.error'
106
- error "Consumer tick failed due to an error: #{error}"
106
+ error "Consumer on tick failed due to an error: #{error}"
107
107
  when 'consumer.eofed.error'
108
- error "Consumer eofed failed due to an error: #{error}"
108
+ error "Consumer on eofed failed due to an error: #{error}"
109
+ when 'consumer.after_consume.error'
110
+ error "Consumer on after_consume failed due to an error: #{error}"
109
111
  when 'worker.process.error'
110
112
  fatal "Worker processing failed due to an error: #{error}"
111
113
  when 'connection.listener.fetch_loop.error'
112
114
  error "Listener fetch loop error: #{error}"
115
+ when 'swarm.supervisor.error'
116
+ fatal "Swarm supervisor crashed due to an error: #{error}"
113
117
  when 'runner.call.error'
114
118
  fatal "Runner crashed due to an error: #{error}"
115
119
  when 'app.stopping.error'
116
120
  error 'Forceful Karafka server stop'
117
- when 'swarm.supervisor.error'
118
- fatal "Swarm supervisor crashed due to an error: #{error}"
121
+ when 'app.forceful_stopping.error'
122
+ error "Forceful shutdown error occurred: #{error}"
119
123
  when 'librdkafka.error'
120
124
  error "librdkafka internal error occurred: #{error}"
121
- # Those will only occur when retries in the client fail and when they did not stop
122
- # after back-offs
125
+ when 'callbacks.statistics.error'
126
+ error "callbacks.statistics processing failed due to an error: #{error}"
127
+ when 'callbacks.error.error'
128
+ error "callbacks.error processing failed due to an error: #{error}"
129
+ # Those will only occur when retries in the client fail and when they did not stop
130
+ # after back-offs
123
131
  when 'connection.client.poll.error'
124
132
  error "Data polling error occurred: #{error}"
133
+ when 'connection.client.rebalance_callback.error'
134
+ error "Rebalance callback error occurred: #{error}"
135
+ when 'connection.client.unsubscribe.error'
136
+ error "Client unsubscribe error occurred: #{error}"
137
+ when 'parallel_segments.reducer.error'
138
+ error "Parallel segments reducer error occurred: #{error}"
139
+ when 'parallel_segments.partitioner.error'
140
+ error "Parallel segments partitioner error occurred: #{error}"
141
+ when 'virtual_partitions.partitioner.error'
142
+ error "Virtual partitions partitioner error occurred: #{error}"
125
143
  else
126
- pop_tags
127
- # This should never happen. Please contact the maintainers
128
- raise Errors::UnsupportedCaseError, event
144
+ error "#{event[:type]} error occurred: #{error}"
129
145
  end
130
146
 
131
147
  pop_tags
@@ -46,7 +46,7 @@ module Karafka
46
46
 
47
47
  # We gsub and strip in case someone copy-pasted it as a multi line string
48
48
  formatted_token = license_config.token.strip.delete("\n").delete(' ')
49
- decoded_token = Base64.decode64(formatted_token)
49
+ decoded_token = formatted_token.unpack1('m') # decode from base64
50
50
 
51
51
  begin
52
52
  data = public_key.public_decrypt(decoded_token)
@@ -64,6 +64,38 @@ module Karafka
64
64
  @messages_array.dup
65
65
  end
66
66
 
67
+ # Returns the underlying messages array directly without duplication.
68
+ #
69
+ # This method exists to provide Karafka internals with direct access to the messages array,
70
+ # bypassing any monkey patches that external libraries may apply to enumerable methods.
71
+ #
72
+ # ## Why this method exists
73
+ #
74
+ # External instrumentation libraries like DataDog's `dd-trace-rb` patch the `#each` method
75
+ # on this class to create tracing spans around message iteration. While this is desirable
76
+ # for user code (to trace message processing), it causes problems when Karafka's internal
77
+ # infrastructure iterates over messages for housekeeping tasks (offset tracking,
78
+ # deserialization, etc.) - creating empty/unwanted spans.
79
+ #
80
+ # By using `raw.map` or `raw.each` instead of `map` or `each` directly, internal code
81
+ # bypasses the patched `#each` method since it operates on the raw Array, not this class.
82
+ #
83
+ # ## Usage
84
+ #
85
+ # This method should ONLY be used by Karafka internals. User-facing code (consumers,
86
+ # ActiveJob processors, etc.) should use regular `#each`/`#map` so that instrumentation
87
+ # libraries can properly trace message processing.
88
+ #
89
+ # @return [Array<Karafka::Messages::Message>] the underlying messages array (not a copy)
90
+ #
91
+ # @note This returns the actual internal array, not a copy. Do not modify it.
92
+ # @see https://github.com/karafka/karafka/issues/2939
93
+ #
94
+ # @private
95
+ def raw
96
+ @messages_array
97
+ end
98
+
67
99
  alias count size
68
100
  end
69
101
  end
@@ -13,7 +13,7 @@ module Karafka
13
13
  # This module is prepended to Karafka::Messages::Messages to add cleaning functionality.
14
14
  # The implementation calls super() to maintain compatibility with other libraries that
15
15
  # also prepend modules to modify the #each method (e.g., DataDog tracing).
16
- # See: https://github.com/DataDog/dd-trace-rb/issues/4867
16
+ # @see https://github.com/DataDog/dd-trace-rb/issues/4867
17
17
  module Messages
18
18
  # @param clean [Boolean] do we want to clean each message after we're done working with
19
19
  # it.