karafka 2.5.2 → 2.5.4.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/config/locales/errors.yml +14 -0
  4. data/karafka.gemspec +15 -4
  5. data/lib/active_job/queue_adapters/karafka_adapter.rb +2 -2
  6. data/lib/karafka/active_job/consumer.rb +2 -2
  7. data/lib/karafka/active_job/current_attributes.rb +2 -2
  8. data/lib/karafka/active_job/deserializer.rb +1 -1
  9. data/lib/karafka/active_job/dispatcher.rb +2 -2
  10. data/lib/karafka/admin/configs/resource.rb +7 -1
  11. data/lib/karafka/admin/consumer_groups.rb +6 -8
  12. data/lib/karafka/admin/contracts/replication.rb +149 -0
  13. data/lib/karafka/admin/replication.rb +462 -0
  14. data/lib/karafka/admin/topics.rb +5 -4
  15. data/lib/karafka/admin.rb +57 -12
  16. data/lib/karafka/app.rb +3 -3
  17. data/lib/karafka/base_consumer.rb +1 -1
  18. data/lib/karafka/cli/base.rb +1 -1
  19. data/lib/karafka/cli/console.rb +1 -1
  20. data/lib/karafka/cli/contracts/server.rb +1 -1
  21. data/lib/karafka/cli/help.rb +1 -1
  22. data/lib/karafka/cli/install.rb +2 -1
  23. data/lib/karafka/cli/server.rb +1 -1
  24. data/lib/karafka/cli/swarm.rb +1 -1
  25. data/lib/karafka/connection/client.rb +19 -18
  26. data/lib/karafka/connection/manager.rb +1 -0
  27. data/lib/karafka/connection/proxy.rb +1 -1
  28. data/lib/karafka/connection/rebalance_manager.rb +1 -1
  29. data/lib/karafka/connection/status.rb +1 -0
  30. data/lib/karafka/constraints.rb +1 -1
  31. data/lib/karafka/contracts/base.rb +1 -1
  32. data/lib/karafka/deserializers/payload.rb +1 -1
  33. data/lib/karafka/helpers/async.rb +1 -1
  34. data/lib/karafka/helpers/config_importer.rb +3 -3
  35. data/lib/karafka/helpers/multi_delegator.rb +3 -0
  36. data/lib/karafka/instrumentation/assignments_tracker.rb +2 -1
  37. data/lib/karafka/instrumentation/callbacks/error.rb +2 -2
  38. data/lib/karafka/instrumentation/callbacks/statistics.rb +3 -3
  39. data/lib/karafka/instrumentation/logger.rb +6 -6
  40. data/lib/karafka/instrumentation/logger_listener.rb +0 -2
  41. data/lib/karafka/instrumentation/monitor.rb +2 -2
  42. data/lib/karafka/instrumentation/vendors/appsignal/base.rb +1 -1
  43. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +4 -0
  44. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +32 -16
  45. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
  46. data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +1 -1
  47. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +3 -15
  48. data/lib/karafka/licenser.rb +1 -1
  49. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  50. data/lib/karafka/messages/messages.rb +32 -0
  51. data/lib/karafka/pro/active_job/consumer.rb +2 -2
  52. data/lib/karafka/pro/active_job/dispatcher.rb +3 -3
  53. data/lib/karafka/pro/cleaner/messages/messages.rb +1 -1
  54. data/lib/karafka/pro/cleaner.rb +3 -3
  55. data/lib/karafka/pro/cli/contracts/server.rb +1 -1
  56. data/lib/karafka/pro/cli/parallel_segments/base.rb +4 -3
  57. data/lib/karafka/pro/cli/parallel_segments/collapse.rb +1 -1
  58. data/lib/karafka/pro/cli/parallel_segments/distribute.rb +1 -1
  59. data/lib/karafka/pro/cli/parallel_segments.rb +1 -1
  60. data/lib/karafka/pro/connection/manager.rb +1 -2
  61. data/lib/karafka/pro/connection/multiplexing/listener.rb +1 -0
  62. data/lib/karafka/pro/contracts/base.rb +1 -1
  63. data/lib/karafka/pro/encryption/cipher.rb +3 -2
  64. data/lib/karafka/pro/encryption/contracts/config.rb +1 -1
  65. data/lib/karafka/pro/encryption/messages/parser.rb +1 -1
  66. data/lib/karafka/pro/encryption/setup/config.rb +1 -1
  67. data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
  68. data/lib/karafka/pro/iterator.rb +1 -1
  69. data/lib/karafka/pro/loader.rb +1 -1
  70. data/lib/karafka/pro/processing/coordinator.rb +1 -1
  71. data/lib/karafka/pro/processing/filters/base.rb +1 -0
  72. data/lib/karafka/pro/processing/filters/delayer.rb +1 -1
  73. data/lib/karafka/pro/processing/filters/expirer.rb +1 -1
  74. data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +1 -1
  75. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
  76. data/lib/karafka/pro/processing/jobs/eofed_non_blocking.rb +1 -1
  77. data/lib/karafka/pro/processing/jobs/periodic.rb +1 -1
  78. data/lib/karafka/pro/processing/jobs/revoked_non_blocking.rb +1 -1
  79. data/lib/karafka/pro/processing/jobs_builder.rb +1 -1
  80. data/lib/karafka/pro/processing/jobs_queue.rb +0 -2
  81. data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +1 -0
  82. data/lib/karafka/pro/processing/partitioner.rb +1 -1
  83. data/lib/karafka/pro/processing/strategies/base.rb +1 -1
  84. data/lib/karafka/pro/processing/strategies/default.rb +2 -2
  85. data/lib/karafka/pro/processing/strategies/dlq/default.rb +1 -1
  86. data/lib/karafka/pro/processing/strategies/vp/default.rb +1 -1
  87. data/lib/karafka/pro/processing/strategy_selector.rb +1 -0
  88. data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +4 -2
  89. data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +4 -2
  90. data/lib/karafka/pro/recurring_tasks/consumer.rb +3 -2
  91. data/lib/karafka/pro/recurring_tasks/contracts/config.rb +2 -2
  92. data/lib/karafka/pro/recurring_tasks/contracts/task.rb +1 -1
  93. data/lib/karafka/pro/recurring_tasks/deserializer.rb +1 -1
  94. data/lib/karafka/pro/recurring_tasks/dispatcher.rb +1 -1
  95. data/lib/karafka/pro/recurring_tasks/executor.rb +2 -1
  96. data/lib/karafka/pro/recurring_tasks/schedule.rb +5 -2
  97. data/lib/karafka/pro/recurring_tasks/serializer.rb +6 -5
  98. data/lib/karafka/pro/recurring_tasks/setup/config.rb +2 -2
  99. data/lib/karafka/pro/recurring_tasks/task.rb +1 -1
  100. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +3 -0
  101. data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +1 -1
  102. data/lib/karafka/pro/routing/features/multiplexing.rb +5 -5
  103. data/lib/karafka/pro/routing/features/offset_metadata.rb +4 -4
  104. data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +1 -1
  105. data/lib/karafka/pro/routing/features/patterns/patterns.rb +1 -1
  106. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +1 -1
  107. data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +1 -1
  108. data/lib/karafka/pro/routing/features/swarm.rb +1 -1
  109. data/lib/karafka/pro/routing/features/throttling/topic.rb +3 -1
  110. data/lib/karafka/pro/scheduled_messages/consumer.rb +1 -1
  111. data/lib/karafka/pro/scheduled_messages/contracts/config.rb +2 -2
  112. data/lib/karafka/pro/scheduled_messages/contracts/message.rb +1 -1
  113. data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +3 -2
  114. data/lib/karafka/pro/scheduled_messages/day.rb +1 -0
  115. data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +1 -1
  116. data/lib/karafka/pro/scheduled_messages/deserializers/payload.rb +1 -1
  117. data/lib/karafka/pro/scheduled_messages/max_epoch.rb +1 -0
  118. data/lib/karafka/pro/scheduled_messages/proxy.rb +1 -1
  119. data/lib/karafka/pro/scheduled_messages/serializer.rb +3 -3
  120. data/lib/karafka/pro/scheduled_messages/setup/config.rb +2 -2
  121. data/lib/karafka/pro/scheduled_messages/state.rb +1 -0
  122. data/lib/karafka/pro/scheduled_messages/tracker.rb +1 -0
  123. data/lib/karafka/process.rb +4 -4
  124. data/lib/karafka/processing/executor.rb +1 -1
  125. data/lib/karafka/processing/inline_insights/tracker.rb +1 -0
  126. data/lib/karafka/processing/jobs_queue.rb +1 -1
  127. data/lib/karafka/processing/result.rb +1 -0
  128. data/lib/karafka/processing/strategies/dlq.rb +1 -1
  129. data/lib/karafka/processing/strategy_selector.rb +1 -0
  130. data/lib/karafka/routing/activity_manager.rb +1 -0
  131. data/lib/karafka/routing/builder.rb +3 -1
  132. data/lib/karafka/routing/consumer_group.rb +19 -1
  133. data/lib/karafka/routing/contracts/consumer_group.rb +3 -2
  134. data/lib/karafka/routing/contracts/topic.rb +5 -2
  135. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -1
  136. data/lib/karafka/routing/features/declaratives/topic.rb +5 -2
  137. data/lib/karafka/routing/features/deserializers/topic.rb +3 -3
  138. data/lib/karafka/routing/features/inline_insights.rb +5 -5
  139. data/lib/karafka/routing/router.rb +1 -1
  140. data/lib/karafka/routing/subscription_group.rb +2 -2
  141. data/lib/karafka/routing/subscription_groups_builder.rb +18 -2
  142. data/lib/karafka/routing/topic.rb +3 -3
  143. data/lib/karafka/server.rb +1 -1
  144. data/lib/karafka/setup/attributes_map.rb +4 -2
  145. data/lib/karafka/setup/config.rb +21 -10
  146. data/lib/karafka/setup/config_proxy.rb +209 -0
  147. data/lib/karafka/setup/contracts/config.rb +1 -1
  148. data/lib/karafka/swarm/liveness_listener.rb +1 -0
  149. data/lib/karafka/swarm/manager.rb +7 -6
  150. data/lib/karafka/swarm/node.rb +1 -1
  151. data/lib/karafka/swarm/supervisor.rb +1 -0
  152. data/lib/karafka/time_trackers/base.rb +1 -1
  153. data/lib/karafka/version.rb +1 -1
  154. data/lib/karafka.rb +2 -3
  155. metadata +8 -65
  156. data/.coditsu/ci.yml +0 -3
  157. data/.console_irbrc +0 -11
  158. data/.github/CODEOWNERS +0 -3
  159. data/.github/FUNDING.yml +0 -1
  160. data/.github/ISSUE_TEMPLATE/bug_report.md +0 -43
  161. data/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  162. data/.github/workflows/ci_linux_ubuntu_x86_64_gnu.yml +0 -278
  163. data/.github/workflows/ci_macos_arm64.yml +0 -151
  164. data/.github/workflows/push.yml +0 -35
  165. data/.github/workflows/trigger-wiki-refresh.yml +0 -30
  166. data/.github/workflows/verify-action-pins.yml +0 -16
  167. data/.gitignore +0 -69
  168. data/.rspec +0 -7
  169. data/.ruby-gemset +0 -1
  170. data/.ruby-version +0 -1
  171. data/CODE_OF_CONDUCT.md +0 -46
  172. data/CONTRIBUTING.md +0 -32
  173. data/Gemfile +0 -28
  174. data/Gemfile.lock +0 -173
  175. data/Rakefile +0 -4
  176. data/SECURITY.md +0 -23
  177. data/bin/benchmarks +0 -99
  178. data/bin/clean_kafka +0 -43
  179. data/bin/create_token +0 -22
  180. data/bin/integrations +0 -341
  181. data/bin/record_rss +0 -50
  182. data/bin/rspecs +0 -26
  183. data/bin/scenario +0 -29
  184. data/bin/stress_many +0 -13
  185. data/bin/stress_one +0 -13
  186. data/bin/verify_kafka_warnings +0 -36
  187. data/bin/verify_license_integrity +0 -37
  188. data/bin/verify_topics_naming +0 -27
  189. data/bin/wait_for_kafka +0 -24
  190. data/docker-compose.yml +0 -25
  191. data/examples/payloads/avro/.gitkeep +0 -0
  192. data/examples/payloads/json/sample_set_01/enrollment_event.json +0 -579
  193. data/examples/payloads/json/sample_set_01/ingestion_event.json +0 -30
  194. data/examples/payloads/json/sample_set_01/transaction_event.json +0 -17
  195. data/examples/payloads/json/sample_set_01/user_event.json +0 -11
  196. data/examples/payloads/json/sample_set_02/download.json +0 -191
  197. data/examples/payloads/json/sample_set_03/event_type_1.json +0 -18
  198. data/examples/payloads/json/sample_set_03/event_type_2.json +0 -263
  199. data/examples/payloads/json/sample_set_03/event_type_3.json +0 -41
  200. data/log/.gitkeep +0 -0
  201. data/renovate.json +0 -21
@@ -0,0 +1,462 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ class Admin
5
+ # Replication administration operations helper
6
+ #
7
+ # Generates partition reassignment plans for increasing topic replication factor.
8
+ # Since librdkafka does not support changing replication factors directly, this class
9
+ # generates the necessary JSON configuration that can be executed using Kafka's Java-based
10
+ # reassignment tools.
11
+ #
12
+ # ## Important Considerations
13
+ #
14
+ # Replication factor changes are among the most resource-intensive operations in Kafka.
15
+ #
16
+ # ## Prerequisites
17
+ #
18
+ # 1. **Sufficient Disk Space**: Ensure target brokers have enough space for new replicas
19
+ # 2. **Network Capacity**: Verify network can handle additional replication traffic
20
+ # 3. **Broker Count**: Cannot exceed the number of available brokers
21
+ # 4. **Java Tools**: Kafka's reassignment tools must be available
22
+ #
23
+ # ## Best Practices
24
+ #
25
+ # - **Test First**: Always test on small topics or in staging environments
26
+ # - **Monitor Resources**: Watch disk space, network, and CPU during replication
27
+ # - **Incremental Changes**: Increase replication factor by 1 at a time for large topics
28
+ # - **Off-Peak Hours**: Execute during low-traffic periods to minimize impact
29
+ #
30
+ # @example Basic usage - increase replication factor
31
+ # # Generate plan to increase replication from 2 to 3
32
+ # plan = Karafka::Admin::Replication.plan(topic: 'events', to: 3)
33
+ #
34
+ # # Review what will happen
35
+ # puts plan.summary
36
+ #
37
+ # # Export for execution
38
+ # plan.export_to_file('/tmp/increase_replication.json')
39
+ #
40
+ # # Execute with Kafka tools (outside of Ruby)
41
+ # # kafka-reassign-partitions.sh --bootstrap-server localhost:9092 \
42
+ # # --reassignment-json-file /tmp/increase_replication.json --execute
43
+ #
44
+ # @example Rebalancing replicas across brokers
45
+ # # Rebalance existing replicas without changing replication factor
46
+ # plan = Karafka::Admin::Replication.rebalance(topic: 'events')
47
+ # plan.export_to_file('/tmp/rebalance.json')
48
+ #
49
+ # @note This class only generates plans - actual execution requires Kafka's Java tools
50
+ # @note Always verify broker capacity before increasing replication
51
+ class Replication < Admin
52
+ attr_reader(
53
+ :topic,
54
+ :current_replication_factor,
55
+ :target_replication_factor,
56
+ :partitions_assignment,
57
+ :reassignment_json,
58
+ :execution_commands,
59
+ :steps
60
+ )
61
+
62
+ # Builds the replication plan
63
+ #
64
+ # @param topic [String] topic name
65
+ # @param current_replication_factor [Integer] current replication factor
66
+ # @param target_replication_factor [Integer] target replication factor
67
+ # @param partitions_assignment [Hash] partition to brokers assignment
68
+ # @param cluster_info [Hash] broker information
69
+ def initialize(
70
+ topic:,
71
+ current_replication_factor:,
72
+ target_replication_factor:,
73
+ partitions_assignment:,
74
+ cluster_info:
75
+ )
76
+ super()
77
+
78
+ @topic = topic
79
+ @current_replication_factor = current_replication_factor
80
+ @target_replication_factor = target_replication_factor
81
+ @partitions_assignment = partitions_assignment
82
+ @cluster_info = cluster_info
83
+
84
+ generate_reassignment_json
85
+ generate_execution_commands
86
+ generate_steps
87
+
88
+ freeze
89
+ end
90
+
91
+ # Export the reassignment JSON to a file
92
+ # @param file_path [String] path where to save the JSON file
93
+ def export_to_file(file_path)
94
+ File.write(file_path, @reassignment_json)
95
+ file_path
96
+ end
97
+
98
+ # @return [String] human-readable summary of the plan
99
+ def summary
100
+ broker_count = @cluster_info[:brokers].size
101
+ change = @target_replication_factor - @current_replication_factor
102
+ broker_nodes = @cluster_info[:brokers].map do |broker_info|
103
+ broker_info[:node_id]
104
+ end.join(', ')
105
+
106
+ <<~SUMMARY
107
+ Replication Increase Plan for Topic: #{@topic}
108
+ =====================================
109
+ Current replication factor: #{@current_replication_factor}
110
+ Target replication factor: #{@target_replication_factor}
111
+ Total partitions: #{@partitions_assignment.size}
112
+ Available brokers: #{broker_count} (#{broker_nodes})
113
+
114
+ This plan will increase replication by adding #{change} replica(s) to each partition.
115
+ SUMMARY
116
+ end
117
+
118
+ class << self
119
+ # Plans replication factor increase for a given topic
120
+ #
121
+ # Generates a detailed reassignment plan that preserves existing replica assignments
122
+ # while adding new replicas to meet the target replication factor. The plan uses
123
+ # round-robin distribution to balance new replicas across available brokers.
124
+ #
125
+ # @param topic [String] name of the topic
126
+ # @param to [Integer] target replication factor (must be higher than current)
127
+ # @param brokers [Hash{Integer => Array<Integer>}] optional manual broker assignments
128
+ # per partition. Keys are partition IDs, values are arrays of broker IDs. If not provided
129
+ # automatic distribution (usually fine) will be used
130
+ # @return [Replication] plan object containing JSON, commands, and instructions
131
+ #
132
+ # @raise [ArgumentError] if target replication factor is not higher than current
133
+ # @raise [ArgumentError] if target replication factor exceeds available broker count
134
+ # @raise [Rdkafka::RdkafkaError] if topic metadata cannot be fetched
135
+ #
136
+ # @example Increase replication from 1 to 3 with automatic distribution
137
+ # plan = Replication.plan(topic: 'events', to: 3)
138
+ #
139
+ # # Inspect the plan
140
+ # puts plan.summary
141
+ # puts plan.reassignment_json
142
+ #
143
+ # # Check which brokers will get new replicas
144
+ # plan.partitions_assignment.each do |partition_id, broker_ids|
145
+ # puts "Partition #{partition_id}: #{broker_ids.join(', ')}"
146
+ # end
147
+ #
148
+ # # Save and execute
149
+ # plan.export_to_file('increase_rf.json')
150
+ #
151
+ # @example Increase replication with manual broker placement
152
+ # # Specify exactly which brokers should host each partition
153
+ # plan = Replication.plan(
154
+ # topic: 'events',
155
+ # to: 3,
156
+ # brokers: {
157
+ # 0 => [1, 2, 4], # Partition 0 on brokers 1, 2, 4
158
+ # 1 => [2, 3, 4], # Partition 1 on brokers 2, 3, 4
159
+ # 2 => [1, 3, 5] # Partition 2 on brokers 1, 3, 5
160
+ # }
161
+ # )
162
+ #
163
+ # # The plan will use your exact broker specifications
164
+ # puts plan.partitions_assignment
165
+ # # => {0=>[1, 2, 4], 1=>[2, 3, 4], 2=>[1, 3, 5]}
166
+ #
167
+ # @note When using manual placement, ensure all partitions are specified
168
+ # @note Manual placement overrides automatic distribution entirely
169
+ def plan(topic:, to:, brokers: nil)
170
+ topic_info = fetch_topic_info(topic)
171
+ first_partition = topic_info[:partitions].first
172
+ current_rf = first_partition[:replica_count] || first_partition[:replicas]&.size
173
+ cluster_info = fetch_cluster_info
174
+
175
+ # Use contract for validation
176
+ validation_data = {
177
+ topic: topic,
178
+ to: to,
179
+ brokers: brokers,
180
+ current_rf: current_rf,
181
+ broker_count: cluster_info[:brokers].size,
182
+ topic_info: topic_info,
183
+ cluster_info: cluster_info
184
+ }
185
+
186
+ Contracts::Replication.new.validate!(validation_data)
187
+
188
+ partitions_assignment = brokers || generate_partitions_assignment(
189
+ topic_info: topic_info,
190
+ target_replication_factor: to,
191
+ cluster_info: cluster_info
192
+ )
193
+
194
+ new(
195
+ topic: topic,
196
+ current_replication_factor: current_rf,
197
+ target_replication_factor: to,
198
+ partitions_assignment: partitions_assignment,
199
+ cluster_info: cluster_info
200
+ )
201
+ end
202
+
203
+ # Plans rebalancing of existing replicas across brokers
204
+ #
205
+ # Generates a reassignment plan that redistributes existing replicas more evenly
206
+ # across the cluster without changing the replication factor. Useful for:
207
+ #
208
+ # - Balancing load after adding new brokers to the cluster
209
+ # - Redistributing replicas after broker failures and recovery
210
+ # - Optimizing replica placement for better resource utilization
211
+ # - Moving replicas away from overloaded brokers
212
+ #
213
+ # @param topic [String] name of the topic to rebalance
214
+ # @return [Replication] rebalancing plan
215
+ #
216
+ # @example Rebalance after adding new brokers
217
+ # # After adding brokers 4 and 5 to a 3-broker cluster
218
+ # plan = Replication.rebalance(topic: 'events')
219
+ #
220
+ # # Review how replicas will be redistributed
221
+ # puts plan.summary
222
+ #
223
+ # # Execute if distribution looks good
224
+ # plan.export_to_file('rebalance.json')
225
+ # # Then run: kafka-reassign-partitions.sh --execute ...
226
+ #
227
+ # @note This maintains the same replication factor
228
+ # @note All data will be copied to new locations during rebalancing
229
+ # @note Consider impact on cluster resources during rebalancing
230
+ def rebalance(topic:)
231
+ topic_info = fetch_topic_info(topic)
232
+ first_partition = topic_info[:partitions].first
233
+ current_rf = first_partition[:replica_count] || first_partition[:replicas]&.size
234
+ cluster_info = fetch_cluster_info
235
+
236
+ partitions_assignment = generate_partitions_assignment(
237
+ topic_info: topic_info,
238
+ target_replication_factor: current_rf,
239
+ cluster_info: cluster_info,
240
+ rebalance_only: true
241
+ )
242
+
243
+ new(
244
+ topic: topic,
245
+ current_replication_factor: current_rf,
246
+ target_replication_factor: current_rf,
247
+ partitions_assignment: partitions_assignment,
248
+ cluster_info: cluster_info
249
+ )
250
+ end
251
+
252
+ private
253
+
254
+ # Fetches topic metadata including partitions and replica information
255
+ # @param topic [String] name of the topic
256
+ # @return [Hash] topic information with partitions metadata
257
+ def fetch_topic_info(topic)
258
+ Topics.info(topic)
259
+ end
260
+
261
+ # Fetches cluster broker information from Kafka metadata
262
+ # @return [Hash] cluster information with broker details (node_id, host:port)
263
+ def fetch_cluster_info
264
+ cluster_metadata = cluster_info
265
+ {
266
+ brokers: cluster_metadata.brokers.map do |broker|
267
+ # Handle both hash and object formats from metadata
268
+ # rdkafka returns hashes with broker_id, broker_name, broker_port
269
+ if broker.is_a?(Hash)
270
+ node_id = broker[:broker_id] || broker[:node_id]
271
+ host = broker[:broker_name] || broker[:host]
272
+ port = broker[:broker_port] || broker[:port]
273
+ { node_id: node_id, host: "#{host}:#{port}" }
274
+ else
275
+ { node_id: broker.node_id, host: "#{broker.host}:#{broker.port}" }
276
+ end
277
+ end
278
+ }
279
+ end
280
+
281
+ # Generates partition-to-broker assignments for replication changes
282
+ # Handles both replication factor increases and rebalancing scenarios
283
+ # @param topic_info [Hash] topic metadata with partition information
284
+ # @param target_replication_factor [Integer] desired replication factor
285
+ # @param cluster_info [Hash] cluster metadata with broker information
286
+ # @param rebalance_only [Boolean] true for rebalancing, false for increase
287
+ # @return [Hash{Integer => Array<Integer>}] assignments (partition_id => broker_ids)
288
+ def generate_partitions_assignment(
289
+ topic_info:,
290
+ target_replication_factor:,
291
+ cluster_info:,
292
+ rebalance_only: false
293
+ )
294
+ partitions = topic_info[:partitions]
295
+ brokers = cluster_info[:brokers].map { |broker_info| broker_info[:node_id] }.sort
296
+ assignments = {}
297
+
298
+ partitions.each do |partition_info|
299
+ partition_id = partition_info[:partition_id]
300
+
301
+ # Handle both :replicas (array of objects) and :replica_brokers (array of IDs)
302
+ replicas = partition_info[:replicas] || partition_info[:replica_brokers] || []
303
+ current_replicas = if replicas.first.respond_to?(:node_id)
304
+ replicas.map(&:node_id).sort
305
+ else
306
+ replicas.sort
307
+ end
308
+
309
+ if rebalance_only
310
+ # For rebalancing, redistribute current replicas optimally
311
+ new_replicas = select_brokers_for_partition(
312
+ partition_id: partition_id,
313
+ brokers: brokers,
314
+ replica_count: target_replication_factor,
315
+ avoid_brokers: []
316
+ )
317
+ else
318
+ # For replication increase, keep existing replicas and add new ones
319
+ additional_needed = target_replication_factor - current_replicas.size
320
+ available_brokers = brokers - current_replicas
321
+
322
+ additional_replicas = select_additional_brokers(
323
+ available_brokers: available_brokers,
324
+ needed_count: additional_needed,
325
+ partition_id: partition_id
326
+ )
327
+
328
+ new_replicas = (current_replicas + additional_replicas).sort
329
+ end
330
+
331
+ assignments[partition_id] = new_replicas
332
+ end
333
+
334
+ assignments
335
+ end
336
+
337
+ # Selects brokers for a partition using round-robin distribution
338
+ # Distributes replicas evenly across available brokers
339
+ # @param partition_id [Integer] partition identifier for offset calculation
340
+ # @param brokers [Array<Integer>] available broker node IDs
341
+ # @param replica_count [Integer] number of replicas needed
342
+ # @param avoid_brokers [Array<Integer>] broker IDs to exclude from selection
343
+ # @return [Array<Integer>] sorted array of selected broker node IDs
344
+ def select_brokers_for_partition(
345
+ partition_id:,
346
+ brokers:,
347
+ replica_count:,
348
+ avoid_brokers: []
349
+ )
350
+ available_brokers = brokers - avoid_brokers
351
+
352
+ # Simple round-robin selection starting from a different offset per partition
353
+ # This helps distribute replicas more evenly across brokers
354
+ start_index = partition_id % available_brokers.size
355
+ selected = []
356
+
357
+ replica_count.times do |replica_index|
358
+ broker_index = (start_index + replica_index) % available_brokers.size
359
+ selected << available_brokers[broker_index]
360
+ end
361
+
362
+ selected.sort
363
+ end
364
+
365
+ # Selects additional brokers for increasing replication factor
366
+ # Uses round-robin selection for even distribution across available brokers
367
+ # @param available_brokers [Array<Integer>] broker IDs available for new replicas
368
+ # @param needed_count [Integer] number of additional brokers needed
369
+ # @param partition_id [Integer] partition identifier for offset calculation
370
+ # @return [Array<Integer>] sorted array of selected broker node IDs
371
+ def select_additional_brokers(available_brokers:, needed_count:, partition_id:)
372
+ # Use round-robin starting from partition-specific offset
373
+ start_index = partition_id % available_brokers.size
374
+ selected = []
375
+
376
+ needed_count.times do |additional_replica_index|
377
+ broker_index = (start_index + additional_replica_index) % available_brokers.size
378
+ selected << available_brokers[broker_index]
379
+ end
380
+
381
+ selected.sort
382
+ end
383
+ end
384
+
385
+ private
386
+
387
+ # Generates the JSON structure required by kafka-reassign-partitions.sh
388
+ # Creates Kafka-compatible reassignment plan with version and partitions data
389
+ # @return [void]
390
+ def generate_reassignment_json
391
+ partitions_data = @partitions_assignment.map do |partition_id, replica_broker_ids|
392
+ {
393
+ topic: @topic,
394
+ partition: partition_id,
395
+ replicas: replica_broker_ids
396
+ }
397
+ end
398
+
399
+ reassignment_data = {
400
+ version: 1,
401
+ partitions: partitions_data
402
+ }
403
+
404
+ @reassignment_json = JSON.pretty_generate(reassignment_data)
405
+ end
406
+
407
+ # Generates command templates for executing the reassignment plan
408
+ # Builds generate, execute, and verify command templates with placeholders
409
+ # @return [void]
410
+ def generate_execution_commands
411
+ @execution_commands = {
412
+ generate: build_generate_command,
413
+ execute: build_execute_command,
414
+ verify: build_verify_command
415
+ }
416
+ end
417
+
418
+ # Builds the kafka-reassign-partitions.sh command for generating reassignment plan
419
+ # @return [String] command template with placeholder for broker addresses
420
+ def build_generate_command
421
+ 'kafka-reassign-partitions.sh --bootstrap-server <KAFKA_BROKERS> ' \
422
+ '--reassignment-json-file reassignment.json --generate'
423
+ end
424
+
425
+ # Builds the kafka-reassign-partitions.sh command for executing reassignment
426
+ # @return [String] command template with placeholder for broker addresses
427
+ def build_execute_command
428
+ 'kafka-reassign-partitions.sh --bootstrap-server <KAFKA_BROKERS> ' \
429
+ '--reassignment-json-file reassignment.json --execute'
430
+ end
431
+
432
+ # Builds the kafka-reassign-partitions.sh command for verifying reassignment progress
433
+ # @return [String] command template with placeholder for broker addresses
434
+ def build_verify_command
435
+ 'kafka-reassign-partitions.sh --bootstrap-server <KAFKA_BROKERS> ' \
436
+ '--reassignment-json-file reassignment.json --verify'
437
+ end
438
+
439
+ # Generates detailed step-by-step instructions for executing the reassignment
440
+ # Creates human-readable guide with commands and important safety notes
441
+ # @return [void]
442
+ def generate_steps
443
+ @steps = [
444
+ "1. Export the reassignment JSON using: plan.export_to_file('reassignment.json')",
445
+ "2. Validate the plan (optional): #{@execution_commands[:generate]}",
446
+ "3. Execute the reassignment: #{@execution_commands[:execute]}",
447
+ "4. Monitor progress: #{@execution_commands[:verify]}",
448
+ '5. Verify completion by checking topic metadata',
449
+ '',
450
+ 'IMPORTANT NOTES:',
451
+ '- Replace <KAFKA_BROKERS> with your actual Kafka broker addresses',
452
+ '- The reassignment process may take time depending on data size',
453
+ '- Monitor disk space and network I/O during reassignment',
454
+ '- Consider running during low-traffic periods',
455
+ '- For large topics, consider throttling replica transfer rate',
456
+ '- Ensure sufficient disk space on target brokers before starting',
457
+ '- Keep monitoring until all replicas are in-sync (ISR)'
458
+ ]
459
+ end
460
+ end
461
+ end
462
+ end
@@ -36,7 +36,7 @@ module Karafka
36
36
 
37
37
  # Build the requested range - since first element is on the start offset we need to
38
38
  # subtract one from requested count to end up with expected number of elements
39
- requested_range = (start_offset..(start_offset + (count - 1)))
39
+ requested_range = (start_offset..(start_offset + count - 1))
40
40
  # Establish theoretical available range. Note, that this does not handle cases related
41
41
  # to log retention or compaction
42
42
  available_range = (low_offset..(high_offset - 1))
@@ -75,7 +75,7 @@ module Karafka
75
75
  # Use topic from routes if we can match it or create a dummy one
76
76
  # Dummy one is used in case we cannot match the topic with routes. This can happen
77
77
  # when admin API is used to read topics that are not part of the routing
78
- topic = ::Karafka::Routing::Router.find_or_initialize_by_name(name)
78
+ topic = Karafka::Routing::Router.find_or_initialize_by_name(name)
79
79
 
80
80
  messages.map! do |message|
81
81
  Messages::Builders::Message.call(
@@ -143,7 +143,8 @@ module Karafka
143
143
  # partitions
144
144
  #
145
145
  # @param name_or_hash [String, Symbol, Hash] topic name or hash with topics and partitions
146
- # @param partition [Integer, nil] partition number (required when first param is topic name)
146
+ # @param partition [Integer, nil] partition number
147
+ # (required when first param is topic name)
147
148
  #
148
149
  # @return [Array<Integer, Integer>, Hash] when querying single partition returns array with
149
150
  # low and high watermark offsets, when querying multiple returns nested hash
@@ -217,7 +218,7 @@ module Karafka
217
218
  # @return [Integer] expected offset
218
219
  def resolve_offset(consumer, name, partition, offset)
219
220
  if offset.is_a?(Time)
220
- tpl = ::Rdkafka::Consumer::TopicPartitionList.new
221
+ tpl = Rdkafka::Consumer::TopicPartitionList.new
221
222
  tpl.add_topic_and_partitions_with_offsets(
222
223
  name, partition => offset
223
224
  )
data/lib/karafka/admin.rb CHANGED
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'admin/consumer_groups'
4
-
5
3
  module Karafka
6
4
  # Admin actions that we can perform via Karafka on our Kafka cluster
7
5
  #
@@ -124,13 +122,13 @@ module Karafka
124
122
 
125
123
  # Reads lags and offsets for given topics in the context of consumer groups defined in the
126
124
  # routing
127
- # @param consumer_groups_with_topics [Hash<String, Array<String>>] hash with consumer groups
128
- # names with array of topics to query per consumer group inside
125
+ # @param consumer_groups_with_topics [Hash{String => Array<String>}] hash with consumer
126
+ # groups names with array of topics to query per consumer group inside
129
127
  # @param active_topics_only [Boolean] if set to false, when we use routing topics, will
130
128
  # select also topics that are marked as inactive in routing
131
- # @return [Hash<String, Hash<Integer, <Hash<Integer>>>>] hash where the top level keys are
132
- # the consumer groups and values are hashes with topics and inside partitions with lags
133
- # and offsets
129
+ # @return [Hash{String => Hash{Integer => Hash{Integer => Object}}}] hash where the top
130
+ # level keys are the consumer groups and values are hashes with topics and inside
131
+ # partitions with lags and offsets
134
132
  # @see ConsumerGroups.read_lags_with_offsets
135
133
  def read_lags_with_offsets(consumer_groups_with_topics = {}, active_topics_only: true)
136
134
  ConsumerGroups.read_lags_with_offsets(
@@ -139,6 +137,53 @@ module Karafka
139
137
  )
140
138
  end
141
139
 
140
+ # Plans a replication factor increase for a topic that can be used with Kafka's
141
+ # reassignment tools. Since librdkafka does not support increasing replication factor
142
+ # directly, this method generates the necessary JSON and commands for manual execution.
143
+ #
144
+ # @param topic [String] name of the topic to plan replication for
145
+ # @param replication_factor [Integer] target replication factor (must be higher than current)
146
+ # @param brokers [Hash{Integer => Array<Integer>}] optional manual broker assignments
147
+ # per partition. Keys are partition IDs, values are arrays of broker IDs. If not provided,
148
+ # assignments distribution will happen automatically.
149
+ # @return [Replication] plan object with JSON, commands, and instructions
150
+ #
151
+ # @example Plan replication increase with automatic broker distribution
152
+ # plan = Karafka::Admin.plan_topic_replication(topic: 'events', replication_factor: 3)
153
+ #
154
+ # # Review the plan
155
+ # puts plan.summary
156
+ #
157
+ # # Export JSON for Kafka's reassignment tools
158
+ # plan.export_to_file('reassignment.json')
159
+ #
160
+ # # Execute the plan (replace <KAFKA_BROKERS> with actual brokers)
161
+ # system(plan.execution_commands[:execute].gsub('<KAFKA_BROKERS>', 'localhost:9092'))
162
+ #
163
+ # @example Plan replication with manual broker placement - specify brokers
164
+ # plan = Karafka::Admin.plan_topic_replication(
165
+ # topic: 'events',
166
+ # replication_factor: 3,
167
+ # brokers: {
168
+ # 0 => [1, 2, 4], # Partition 0 on brokers 1, 2, 4
169
+ # 1 => [2, 3, 4], # Partition 1 on brokers 2, 3, 4
170
+ # 2 => [1, 3, 5] # Partition 2 on brokers 1, 3, 5
171
+ # }
172
+ # )
173
+ #
174
+ # # The plan will use your exact broker specifications
175
+ # puts plan.partitions_assignment
176
+ # # => { 0=>[1, 2, 4], 1=>[2, 3, 4], 2=>[1, 3, 5] }
177
+ #
178
+ # @see Replication.plan for more details
179
+ def plan_topic_replication(topic:, replication_factor:, brokers: nil)
180
+ Replication.plan(
181
+ topic: topic,
182
+ to: replication_factor,
183
+ brokers: brokers
184
+ )
185
+ end
186
+
142
187
  # @return [Rdkafka::Metadata] cluster metadata info
143
188
  def cluster_info
144
189
  with_admin(&:metadata)
@@ -158,7 +203,7 @@ module Karafka
158
203
  bind_oauth(bind_id, consumer)
159
204
 
160
205
  consumer.start
161
- proxy = ::Karafka::Connection::Proxy.new(consumer)
206
+ proxy = Karafka::Connection::Proxy.new(consumer)
162
207
  yield(proxy)
163
208
  ensure
164
209
  # Always unsubscribe consumer just to be sure, that no metadata requests are running
@@ -188,7 +233,7 @@ module Karafka
188
233
  bind_oauth(bind_id, admin)
189
234
 
190
235
  admin.start
191
- proxy = ::Karafka::Connection::Proxy.new(admin)
236
+ proxy = Karafka::Connection::Proxy.new(admin)
192
237
  yield(proxy)
193
238
  ensure
194
239
  admin&.close
@@ -211,7 +256,7 @@ module Karafka
211
256
  # @param instance [Rdkafka::Consumer, Rdkafka::Admin] rdkafka instance to be used to set
212
257
  # appropriate oauth token when needed
213
258
  def bind_oauth(id, instance)
214
- ::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.add(
259
+ Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.add(
215
260
  id,
216
261
  Instrumentation::Callbacks::OauthbearerTokenRefresh.new(
217
262
  instance
@@ -224,7 +269,7 @@ module Karafka
224
269
  # @param id [String, Symbol] unique (for the lifetime of instance) id that we use for
225
270
  # callback referencing
226
271
  def unbind_oauth(id)
227
- ::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.delete(id)
272
+ Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.delete(id)
228
273
  end
229
274
 
230
275
  # There are some cases where rdkafka admin operations finish successfully but without the
@@ -269,7 +314,7 @@ module Karafka
269
314
  # consumer group or do something similar
270
315
  .merge!(settings)
271
316
  .then { |config| Karafka::Setup::AttributesMap.public_send(type, config) }
272
- .then { |config| ::Rdkafka::Config.new(config) }
317
+ .then { |config| Rdkafka::Config.new(config) }
273
318
  end
274
319
  end
275
320
  end
data/lib/karafka/app.rb CHANGED
@@ -52,7 +52,7 @@ module Karafka
52
52
 
53
53
  # Returns current assignments of this process. Both topics and partitions
54
54
  #
55
- # @return [Hash<Karafka::Routing::Topic, Array<Integer>>]
55
+ # @return [Hash{Karafka::Routing::Topic => Array<Integer>}]
56
56
  def assignments
57
57
  Instrumentation::AssignmentsTracker.instance.current
58
58
  end
@@ -102,8 +102,8 @@ module Karafka
102
102
  #
103
103
  # @param contexts [String] librdkafka low level debug contexts for granular debugging
104
104
  def debug!(contexts = 'all')
105
- logger.level = ::Logger::DEBUG
106
- producer.config.logger.level = ::Logger::DEBUG
105
+ logger.level = Logger::DEBUG
106
+ producer.config.logger.level = Logger::DEBUG
107
107
 
108
108
  config.kafka[:debug] = contexts
109
109
  producer.config.kafka[:debug] = contexts
@@ -5,7 +5,7 @@ module Karafka
5
5
  # Base consumer from which all Karafka consumers should inherit
6
6
  class BaseConsumer
7
7
  # Allow for consumer instance tagging for instrumentation
8
- include ::Karafka::Core::Taggable
8
+ include Karafka::Core::Taggable
9
9
  include Helpers::ConfigImporter.new(
10
10
  monitor: %i[monitor]
11
11
  )
@@ -72,7 +72,7 @@ module Karafka
72
72
 
73
73
  # All other commands except help and install do require an existing boot file if it was
74
74
  # declared
75
- raise ::Karafka::Errors::MissingBootFileError, ::Karafka.boot_file
75
+ raise Karafka::Errors::MissingBootFileError, Karafka.boot_file
76
76
  end
77
77
 
78
78
  # Allows to set options for Thor cli
@@ -28,7 +28,7 @@ module Karafka
28
28
  def call
29
29
  Info.new.call
30
30
 
31
- command = ::Karafka.rails? ? self.class.rails_console : self.class.console
31
+ command = Karafka.rails? ? self.class.rails_console : self.class.console
32
32
 
33
33
  exec "KARAFKA_CONSOLE=true #{command}"
34
34
  end
@@ -5,7 +5,7 @@ module Karafka
5
5
  # CLI related contracts
6
6
  module Contracts
7
7
  # Contract for validating correctness of the server cli command options.
8
- class Server < ::Karafka::Contracts::Base
8
+ class Server < Karafka::Contracts::Base
9
9
  configure do |config|
10
10
  config.error_messages = YAML.safe_load_file(
11
11
  File.join(Karafka.gem_root, 'config', 'locales', 'errors.yml')