karafka 2.5.0.rc2 → 2.5.1.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/{ci.yml → ci_linux_ubuntu_x86_64_gnu.yml} +54 -30
  3. data/.github/workflows/ci_macos_arm64.yml +148 -0
  4. data/.github/workflows/push.yml +2 -2
  5. data/.github/workflows/trigger-wiki-refresh.yml +30 -0
  6. data/.github/workflows/verify-action-pins.yml +1 -1
  7. data/.ruby-version +1 -1
  8. data/CHANGELOG.md +29 -2
  9. data/Gemfile +2 -1
  10. data/Gemfile.lock +56 -27
  11. data/README.md +2 -2
  12. data/bin/integrations +3 -1
  13. data/bin/verify_kafka_warnings +2 -1
  14. data/config/locales/errors.yml +153 -152
  15. data/config/locales/pro_errors.yml +135 -134
  16. data/karafka.gemspec +3 -3
  17. data/lib/active_job/queue_adapters/karafka_adapter.rb +30 -1
  18. data/lib/karafka/active_job/dispatcher.rb +19 -9
  19. data/lib/karafka/admin/acl.rb +7 -8
  20. data/lib/karafka/admin/configs/config.rb +2 -2
  21. data/lib/karafka/admin/configs/resource.rb +2 -2
  22. data/lib/karafka/admin/configs.rb +3 -7
  23. data/lib/karafka/admin/consumer_groups.rb +351 -0
  24. data/lib/karafka/admin/topics.rb +206 -0
  25. data/lib/karafka/admin.rb +42 -451
  26. data/lib/karafka/base_consumer.rb +22 -0
  27. data/lib/karafka/{pro/contracts/server_cli_options.rb → cli/contracts/server.rb} +4 -12
  28. data/lib/karafka/cli/info.rb +1 -1
  29. data/lib/karafka/cli/install.rb +0 -2
  30. data/lib/karafka/connection/client.rb +8 -0
  31. data/lib/karafka/connection/listener.rb +5 -1
  32. data/lib/karafka/connection/status.rb +12 -9
  33. data/lib/karafka/errors.rb +0 -8
  34. data/lib/karafka/instrumentation/assignments_tracker.rb +16 -0
  35. data/lib/karafka/instrumentation/logger_listener.rb +109 -50
  36. data/lib/karafka/pro/active_job/dispatcher.rb +5 -0
  37. data/lib/karafka/pro/cleaner/messages/messages.rb +18 -8
  38. data/lib/karafka/pro/cli/contracts/server.rb +106 -0
  39. data/lib/karafka/pro/encryption/contracts/config.rb +1 -1
  40. data/lib/karafka/pro/loader.rb +1 -1
  41. data/lib/karafka/pro/recurring_tasks/contracts/config.rb +1 -1
  42. data/lib/karafka/pro/routing/features/adaptive_iterator/contracts/topic.rb +1 -1
  43. data/lib/karafka/pro/routing/features/adaptive_iterator/topic.rb +9 -0
  44. data/lib/karafka/pro/routing/features/dead_letter_queue/contracts/topic.rb +1 -1
  45. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +9 -0
  46. data/lib/karafka/pro/routing/features/delaying/contracts/topic.rb +1 -1
  47. data/lib/karafka/pro/routing/features/delaying/topic.rb +9 -0
  48. data/lib/karafka/pro/routing/features/direct_assignments/contracts/consumer_group.rb +1 -1
  49. data/lib/karafka/pro/routing/features/direct_assignments/contracts/topic.rb +1 -1
  50. data/lib/karafka/pro/routing/features/direct_assignments/topic.rb +9 -0
  51. data/lib/karafka/pro/routing/features/expiring/contracts/topic.rb +1 -1
  52. data/lib/karafka/pro/routing/features/expiring/topic.rb +9 -0
  53. data/lib/karafka/pro/routing/features/filtering/contracts/topic.rb +1 -1
  54. data/lib/karafka/pro/routing/features/filtering/topic.rb +9 -0
  55. data/lib/karafka/pro/routing/features/inline_insights/contracts/topic.rb +1 -1
  56. data/lib/karafka/pro/routing/features/inline_insights/topic.rb +9 -0
  57. data/lib/karafka/pro/routing/features/long_running_job/contracts/topic.rb +1 -1
  58. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +9 -0
  59. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +1 -1
  60. data/lib/karafka/pro/routing/features/multiplexing.rb +1 -1
  61. data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +1 -1
  62. data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +9 -0
  63. data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +1 -1
  64. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +1 -1
  65. data/lib/karafka/pro/routing/features/patterns/contracts/topic.rb +1 -1
  66. data/lib/karafka/pro/routing/features/patterns/topic.rb +9 -0
  67. data/lib/karafka/pro/routing/features/pausing/contracts/topic.rb +1 -1
  68. data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +1 -1
  69. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +9 -0
  70. data/lib/karafka/pro/routing/features/recurring_tasks/contracts/topic.rb +1 -1
  71. data/lib/karafka/pro/routing/features/recurring_tasks/topic.rb +9 -0
  72. data/lib/karafka/pro/routing/features/scheduled_messages/contracts/topic.rb +1 -1
  73. data/lib/karafka/pro/routing/features/scheduled_messages/topic.rb +9 -0
  74. data/lib/karafka/pro/routing/features/swarm/contracts/topic.rb +1 -1
  75. data/lib/karafka/pro/routing/features/swarm/topic.rb +9 -0
  76. data/lib/karafka/pro/routing/features/throttling/contracts/topic.rb +1 -1
  77. data/lib/karafka/pro/routing/features/throttling/topic.rb +9 -0
  78. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -1
  79. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +9 -0
  80. data/lib/karafka/pro/scheduled_messages/contracts/config.rb +1 -1
  81. data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -3
  82. data/lib/karafka/pro/swarm/liveness_listener.rb +17 -2
  83. data/lib/karafka/processing/executor.rb +1 -1
  84. data/lib/karafka/routing/builder.rb +0 -3
  85. data/lib/karafka/routing/consumer_group.rb +1 -4
  86. data/lib/karafka/routing/contracts/consumer_group.rb +84 -0
  87. data/lib/karafka/routing/contracts/routing.rb +61 -0
  88. data/lib/karafka/routing/contracts/topic.rb +83 -0
  89. data/lib/karafka/routing/features/active_job/contracts/topic.rb +1 -1
  90. data/lib/karafka/routing/features/active_job/topic.rb +9 -0
  91. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -1
  92. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -0
  93. data/lib/karafka/routing/features/declaratives/contracts/topic.rb +1 -1
  94. data/lib/karafka/routing/features/declaratives/topic.rb +9 -0
  95. data/lib/karafka/routing/features/deserializers/contracts/topic.rb +1 -1
  96. data/lib/karafka/routing/features/deserializers/topic.rb +9 -0
  97. data/lib/karafka/routing/features/eofed/contracts/topic.rb +1 -1
  98. data/lib/karafka/routing/features/eofed/topic.rb +9 -0
  99. data/lib/karafka/routing/features/inline_insights/contracts/topic.rb +1 -1
  100. data/lib/karafka/routing/features/inline_insights/topic.rb +9 -0
  101. data/lib/karafka/routing/features/manual_offset_management/contracts/topic.rb +1 -1
  102. data/lib/karafka/routing/features/manual_offset_management/topic.rb +9 -0
  103. data/lib/karafka/routing/subscription_group.rb +1 -10
  104. data/lib/karafka/routing/topic.rb +9 -1
  105. data/lib/karafka/server.rb +2 -7
  106. data/lib/karafka/setup/attributes_map.rb +36 -0
  107. data/lib/karafka/setup/config.rb +6 -7
  108. data/lib/karafka/setup/contracts/config.rb +217 -0
  109. data/lib/karafka/setup/defaults_injector.rb +3 -1
  110. data/lib/karafka/swarm/node.rb +66 -6
  111. data/lib/karafka/swarm.rb +2 -2
  112. data/lib/karafka/templates/karafka.rb.erb +2 -7
  113. data/lib/karafka/version.rb +1 -1
  114. data/lib/karafka.rb +17 -18
  115. metadata +18 -15
  116. data/lib/karafka/contracts/config.rb +0 -210
  117. data/lib/karafka/contracts/consumer_group.rb +0 -81
  118. data/lib/karafka/contracts/routing.rb +0 -59
  119. data/lib/karafka/contracts/server_cli_options.rb +0 -92
  120. data/lib/karafka/contracts/topic.rb +0 -81
  121. data/lib/karafka/swarm/pidfd.rb +0 -147
@@ -0,0 +1,351 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ class Admin
5
+ # Consumer group administration operations
6
+ # Provides methods to manage Kafka consumer groups including offset management, migration, and
7
+ # introspection
8
+ class ConsumerGroups < Admin
9
+ # 2010-01-01 00:00:00 - way before Kafka was released so no messages should exist prior to
10
+ # this date
11
+ # We do not use the explicit -2 librdkafka value here because we resolve this offset without
12
+ # consuming data
13
+ LONG_TIME_AGO = Time.at(1_262_300_400)
14
+
15
+ # one day in seconds for future time reference
16
+ DAY_IN_SECONDS = 60 * 60 * 24
17
+
18
+ private_constant :LONG_TIME_AGO, :DAY_IN_SECONDS
19
+
20
+ class << self
21
+ # Moves the offset on a given consumer group and provided topic to the requested location
22
+ #
23
+ # @param consumer_group_id [String] id of the consumer group for which we want to move the
24
+ # existing offset
25
+ # @param topics_with_partitions_and_offsets [Hash] Hash with list of topics and settings to
26
+ # where to move given consumer. It allows us to move particular partitions or whole
27
+ # topics if we want to reset all partitions to for example a point in time.
28
+ #
29
+ # @return [void]
30
+ #
31
+ # @note This method should **not** be executed on a running consumer group as it creates a
32
+ # "fake" consumer and uses it to move offsets.
33
+ #
34
+ # @example Move a single topic partition nr 1 offset to 100
35
+ # Karafka::Admin::ConsumerGroups.seek('group-id', { 'topic' => { 1 => 100 } })
36
+ #
37
+ # @example Move offsets on all partitions of a topic to 100
38
+ # Karafka::Admin::ConsumerGroups.seek('group-id', { 'topic' => 100 })
39
+ #
40
+ # @example Move offset to 5 seconds ago on partition 2
41
+ # Karafka::Admin::ConsumerGroups.seek('group-id', { 'topic' => { 2 => 5.seconds.ago } })
42
+ #
43
+ # @example Move to the earliest offset on all the partitions of a topic
44
+ # Karafka::Admin::ConsumerGroups.seek('group-id', { 'topic' => 'earliest' })
45
+ #
46
+ # @example Move to the latest (high-watermark) offset on all the partitions of a topic
47
+ # Karafka::Admin::ConsumerGroups.seek('group-id', { 'topic' => 'latest' })
48
+ #
49
+ # @example Move offset of a single partition to earliest
50
+ # Karafka::Admin::ConsumerGroups.seek('group-id', { 'topic' => { 1 => 'earliest' } })
51
+ #
52
+ # @example Move offset of a single partition to latest
53
+ # Karafka::Admin::ConsumerGroups.seek('group-id', { 'topic' => { 1 => 'latest' } })
54
+ def seek(consumer_group_id, topics_with_partitions_and_offsets)
55
+ tpl_base = {}
56
+
57
+ # Normalize the data so we always have all partitions and topics in the same format
58
+ # That is in a format where we have topics and all partitions with their per partition
59
+ # assigned offsets
60
+ topics_with_partitions_and_offsets.each do |topic, partitions_with_offsets|
61
+ tpl_base[topic] = {}
62
+
63
+ if partitions_with_offsets.is_a?(Hash)
64
+ tpl_base[topic] = partitions_with_offsets
65
+ else
66
+ topic_info = Topics.info(topic)
67
+ topic_info[:partition_count].times do |partition|
68
+ tpl_base[topic][partition] = partitions_with_offsets
69
+ end
70
+ end
71
+ end
72
+
73
+ tpl_base.each_value do |partitions|
74
+ partitions.transform_values! do |position|
75
+ # Support both symbol and string based references
76
+ casted_position = position.is_a?(Symbol) ? position.to_s : position
77
+
78
+ # This remap allows us to transform some special cases in a reference that can be
79
+ # understood by Kafka
80
+ case casted_position
81
+ # Earliest is not always 0. When compacting/deleting it can be much later, that's why
82
+ # we fetch the oldest possible offset
83
+ when 'earliest'
84
+ LONG_TIME_AGO
85
+ # Latest will always be the high-watermark offset and we can get it just by getting
86
+ # a future position
87
+ when 'latest'
88
+ Time.now + DAY_IN_SECONDS
89
+ # Same as `'earliest'`
90
+ when false
91
+ LONG_TIME_AGO
92
+ # Regular offset case
93
+ else
94
+ position
95
+ end
96
+ end
97
+ end
98
+
99
+ tpl = Rdkafka::Consumer::TopicPartitionList.new
100
+ # In case of time based location, we need to to a pre-resolution, that's why we keep it
101
+ # separately
102
+ time_tpl = Rdkafka::Consumer::TopicPartitionList.new
103
+
104
+ # Distribute properly the offset type
105
+ tpl_base.each do |topic, partitions_with_offsets|
106
+ partitions_with_offsets.each do |partition, offset|
107
+ target = offset.is_a?(Time) ? time_tpl : tpl
108
+ # We reverse and uniq to make sure that potentially duplicated references are removed
109
+ # in such a way that the newest stays
110
+ target.to_h[topic] ||= []
111
+ target.to_h[topic] << Rdkafka::Consumer::Partition.new(partition, offset)
112
+ target.to_h[topic].reverse!
113
+ target.to_h[topic].uniq!(&:partition)
114
+ target.to_h[topic].reverse!
115
+ end
116
+ end
117
+
118
+ settings = { 'group.id': consumer_group_id }
119
+
120
+ with_consumer(settings) do |consumer|
121
+ # If we have any time based stuff to resolve, we need to do it prior to commits
122
+ unless time_tpl.empty?
123
+ real_offsets = consumer.offsets_for_times(time_tpl)
124
+
125
+ real_offsets.to_h.each do |name, results|
126
+ results.each do |result|
127
+ raise(Errors::InvalidTimeBasedOffsetError) unless result
128
+
129
+ partition = result.partition
130
+
131
+ # Negative offset means we're beyond last message and we need to query for the
132
+ # high watermark offset to get the most recent offset and move there
133
+ if result.offset.negative?
134
+ _, offset = consumer.query_watermark_offsets(name, result.partition)
135
+ else
136
+ # If we get an offset, it means there existed a message close to this time
137
+ # location
138
+ offset = result.offset
139
+ end
140
+
141
+ # Since now we have proper offsets, we can add this to the final tpl for commit
142
+ tpl.to_h[name] ||= []
143
+ tpl.to_h[name] << Rdkafka::Consumer::Partition.new(partition, offset)
144
+ tpl.to_h[name].reverse!
145
+ tpl.to_h[name].uniq!(&:partition)
146
+ tpl.to_h[name].reverse!
147
+ end
148
+ end
149
+ end
150
+
151
+ consumer.commit_offsets(tpl, async: false)
152
+ end
153
+ end
154
+
155
+ # Takes consumer group and its topics and copies all the offsets to a new named group
156
+ #
157
+ # @param previous_name [String] old consumer group name
158
+ # @param new_name [String] new consumer group name
159
+ # @param topics [Array<String>] topics for which we want to migrate offsets during rename
160
+ #
161
+ # @return [Boolean] true if anything was migrated, otherwise false
162
+ #
163
+ # @note This method should **not** be executed on a running consumer group as it creates a
164
+ # "fake" consumer and uses it to move offsets.
165
+ #
166
+ # @note If new consumer group exists, old offsets will be added to it.
167
+ def copy(previous_name, new_name, topics)
168
+ remap = Hash.new { |h, k| h[k] = {} }
169
+
170
+ old_lags = read_lags_with_offsets({ previous_name => topics })
171
+
172
+ return false if old_lags.empty?
173
+ return false if old_lags.values.all? { |topic_data| topic_data.values.all?(&:empty?) }
174
+
175
+ read_lags_with_offsets({ previous_name => topics })
176
+ .fetch(previous_name)
177
+ .each do |topic, partitions|
178
+ partitions.each do |partition_id, details|
179
+ offset = details[:offset]
180
+
181
+ # No offset on this partition
182
+ next if offset.negative?
183
+
184
+ remap[topic][partition_id] = offset
185
+ end
186
+ end
187
+
188
+ seek(new_name, remap)
189
+
190
+ true
191
+ end
192
+
193
+ # Takes consumer group and its topics and migrates all the offsets to a new named group
194
+ #
195
+ # @param previous_name [String] old consumer group name
196
+ # @param new_name [String] new consumer group name
197
+ # @param topics [Array<String>] topics for which we want to migrate offsets during rename
198
+ # @param delete_previous [Boolean] should we delete previous consumer group after rename.
199
+ # Defaults to true.
200
+ #
201
+ # @return [Boolean] true if rename (and optionally removal) was ok or false if there was
202
+ # nothing really to rename
203
+ #
204
+ # @note This method should **not** be executed on a running consumer group as it creates a
205
+ # "fake" consumer and uses it to move offsets.
206
+ #
207
+ # @note After migration unless `delete_previous` is set to `false`, old group will be
208
+ # removed.
209
+ #
210
+ # @note If new consumer group exists, old offsets will be added to it.
211
+ def rename(previous_name, new_name, topics, delete_previous: true)
212
+ copy_result = copy(previous_name, new_name, topics)
213
+
214
+ return false unless copy_result
215
+ return copy_result unless delete_previous
216
+
217
+ delete(previous_name)
218
+
219
+ true
220
+ end
221
+
222
+ # Removes given consumer group (if exists)
223
+ #
224
+ # @param consumer_group_id [String] consumer group name
225
+ #
226
+ # @return [void]
227
+ #
228
+ # @note This method should not be used on a running consumer group as it will not yield any
229
+ # results.
230
+ def delete(consumer_group_id)
231
+ with_admin do |admin|
232
+ handler = admin.delete_group(consumer_group_id)
233
+ handler.wait(max_wait_timeout: max_wait_time_seconds)
234
+ end
235
+ end
236
+
237
+ # Reads lags and offsets for given topics in the context of consumer groups defined in the
238
+ # routing
239
+ #
240
+ # @param consumer_groups_with_topics [Hash<String, Array<String>>] hash with consumer
241
+ # groups names with array of topics to query per consumer group inside
242
+ # @param active_topics_only [Boolean] if set to false, when we use routing topics, will
243
+ # select also topics that are marked as inactive in routing
244
+ #
245
+ # @return [Hash<String, Hash<Integer, <Hash<Integer>>>>] hash where the top level keys are
246
+ # the consumer groups and values are hashes with topics and inside partitions with lags
247
+ # and offsets
248
+ #
249
+ # @note For topics that do not exist, topic details will be set to an empty hash
250
+ #
251
+ # @note For topics that exist but were never consumed by a given CG we set `-1` as lag and
252
+ # the offset on each of the partitions that were not consumed.
253
+ #
254
+ # @note This lag reporting is for committed lags and is "Kafka-centric", meaning that this
255
+ # represents lags from Kafka perspective and not the consumer. They may differ.
256
+ def read_lags_with_offsets(consumer_groups_with_topics = {}, active_topics_only: true)
257
+ # We first fetch all the topics with partitions count that exist in the cluster so we
258
+ # do not query for topics that do not exist and so we can get partitions count for all
259
+ # the topics we may need. The non-existent and not consumed will be filled at the end
260
+ existing_topics = cluster_info.topics.map do |topic|
261
+ [topic[:topic_name], topic[:partition_count]]
262
+ end.to_h.freeze
263
+
264
+ # If no expected CGs, we use all from routing that have active topics
265
+ if consumer_groups_with_topics.empty?
266
+ consumer_groups_with_topics = Karafka::App.routes.map do |cg|
267
+ cg_topics = cg.topics.select do |cg_topic|
268
+ active_topics_only ? cg_topic.active? : true
269
+ end
270
+
271
+ [cg.id, cg_topics.map(&:name)]
272
+ end.to_h
273
+ end
274
+
275
+ # We make a copy because we will remove once with non-existing topics
276
+ # We keep original requested consumer groups with topics for later backfilling
277
+ cgs_with_topics = consumer_groups_with_topics.dup
278
+ cgs_with_topics.transform_values!(&:dup)
279
+
280
+ # We can query only topics that do exist, this is why we are cleaning those that do not
281
+ # exist
282
+ cgs_with_topics.each_value do |requested_topics|
283
+ requested_topics.delete_if { |topic| !existing_topics.include?(topic) }
284
+ end
285
+
286
+ groups_lags = Hash.new { |h, k| h[k] = {} }
287
+ groups_offs = Hash.new { |h, k| h[k] = {} }
288
+
289
+ cgs_with_topics.each do |cg, topics|
290
+ # Do not add to tpl topics that do not exist
291
+ next if topics.empty?
292
+
293
+ tpl = Rdkafka::Consumer::TopicPartitionList.new
294
+
295
+ with_consumer('group.id': cg) do |consumer|
296
+ topics.each { |topic| tpl.add_topic(topic, existing_topics[topic]) }
297
+
298
+ commit_offsets = consumer.committed(tpl)
299
+
300
+ commit_offsets.to_h.each do |topic, partitions|
301
+ groups_offs[cg][topic] = {}
302
+
303
+ partitions.each do |partition|
304
+ # -1 when no offset is stored
305
+ groups_offs[cg][topic][partition.partition] = partition.offset || -1
306
+ end
307
+ end
308
+
309
+ consumer.lag(commit_offsets).each do |topic, partitions_lags|
310
+ groups_lags[cg][topic] = partitions_lags
311
+ end
312
+ end
313
+ end
314
+
315
+ consumer_groups_with_topics.each do |cg, topics|
316
+ groups_lags[cg]
317
+
318
+ topics.each do |topic|
319
+ groups_lags[cg][topic] ||= {}
320
+
321
+ next unless existing_topics.key?(topic)
322
+
323
+ # We backfill because there is a case where our consumer group would consume for
324
+ # example only one partition out of 20, rest needs to get -1
325
+ existing_topics[topic].times do |partition_id|
326
+ groups_lags[cg][topic][partition_id] ||= -1
327
+ end
328
+ end
329
+ end
330
+
331
+ merged = Hash.new { |h, k| h[k] = {} }
332
+
333
+ groups_lags.each do |cg, topics|
334
+ topics.each do |topic, partitions|
335
+ merged[cg][topic] = {}
336
+
337
+ partitions.each do |partition, lag|
338
+ merged[cg][topic][partition] = {
339
+ offset: groups_offs.fetch(cg).fetch(topic).fetch(partition),
340
+ lag: lag
341
+ }
342
+ end
343
+ end
344
+ end
345
+
346
+ merged
347
+ end
348
+ end
349
+ end
350
+ end
351
+ end
@@ -0,0 +1,206 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ class Admin
5
+ # Topic administration operations
6
+ # Provides methods to manage Kafka topics including creation, deletion, reading, and
7
+ # introspection
8
+ class Topics < Admin
9
+ class << self
10
+ # Allows us to read messages from the topic
11
+ #
12
+ # @param name [String, Symbol] topic name
13
+ # @param partition [Integer] partition
14
+ # @param count [Integer] how many messages we want to get at most
15
+ # @param start_offset [Integer, Time] offset from which we should start. If -1 is provided
16
+ # (default) we will start from the latest offset. If time is provided, the appropriate
17
+ # offset will be resolved. If negative beyond -1 is provided, we move backwards more.
18
+ # @param settings [Hash] kafka extra settings (optional)
19
+ #
20
+ # @return [Array<Karafka::Messages::Message>] array with messages
21
+ def read(name, partition, count, start_offset = -1, settings = {})
22
+ messages = []
23
+ tpl = Rdkafka::Consumer::TopicPartitionList.new
24
+ low_offset, high_offset = nil
25
+
26
+ with_consumer(settings) do |consumer|
27
+ # Convert the time offset (if needed)
28
+ start_offset = resolve_offset(consumer, name.to_s, partition, start_offset)
29
+
30
+ low_offset, high_offset = consumer.query_watermark_offsets(name, partition)
31
+
32
+ # Select offset dynamically if -1 or less and move backwards with the negative
33
+ # offset, allowing to start from N messages back from high-watermark
34
+ start_offset = high_offset - count - start_offset.abs + 1 if start_offset.negative?
35
+ start_offset = low_offset if start_offset.negative?
36
+
37
+ # Build the requested range - since first element is on the start offset we need to
38
+ # subtract one from requested count to end up with expected number of elements
39
+ requested_range = (start_offset..start_offset + (count - 1))
40
+ # Establish theoretical available range. Note, that this does not handle cases related
41
+ # to log retention or compaction
42
+ available_range = (low_offset..(high_offset - 1))
43
+ # Select only offset that we can select. This will remove all the potential offsets
44
+ # that are below the low watermark offset
45
+ possible_range = requested_range.select { |offset| available_range.include?(offset) }
46
+
47
+ start_offset = possible_range.first
48
+ count = possible_range.size
49
+
50
+ tpl.add_topic_and_partitions_with_offsets(name, partition => start_offset)
51
+ consumer.assign(tpl)
52
+
53
+ # We should poll as long as we don't have all the messages that we need or as long as
54
+ # we do not read all the messages from the topic
55
+ loop do
56
+ # If we've got as many messages as we've wanted stop
57
+ break if messages.size >= count
58
+
59
+ message = consumer.poll(200)
60
+
61
+ next unless message
62
+
63
+ # If the message we've got is beyond the requested range, stop
64
+ break unless possible_range.include?(message.offset)
65
+
66
+ messages << message
67
+ rescue Rdkafka::RdkafkaError => e
68
+ # End of partition
69
+ break if e.code == :partition_eof
70
+
71
+ raise e
72
+ end
73
+ end
74
+
75
+ # Use topic from routes if we can match it or create a dummy one
76
+ # Dummy one is used in case we cannot match the topic with routes. This can happen
77
+ # when admin API is used to read topics that are not part of the routing
78
+ topic = ::Karafka::Routing::Router.find_or_initialize_by_name(name)
79
+
80
+ messages.map! do |message|
81
+ Messages::Builders::Message.call(
82
+ message,
83
+ topic,
84
+ Time.now
85
+ )
86
+ end
87
+ end
88
+
89
+ # Creates Kafka topic with given settings
90
+ #
91
+ # @param name [String] topic name
92
+ # @param partitions [Integer] number of partitions we expect
93
+ # @param replication_factor [Integer] number of replicas
94
+ # @param topic_config [Hash] topic config details as described here:
95
+ # https://kafka.apache.org/documentation/#topicconfigs
96
+ #
97
+ # @return [void]
98
+ def create(name, partitions, replication_factor, topic_config = {})
99
+ with_admin do |admin|
100
+ handler = admin.create_topic(name, partitions, replication_factor, topic_config)
101
+
102
+ with_re_wait(
103
+ -> { handler.wait(max_wait_timeout: max_wait_time_seconds) },
104
+ -> { names.include?(name) }
105
+ )
106
+ end
107
+ end
108
+
109
+ # Deleted a given topic
110
+ #
111
+ # @param name [String] topic name
112
+ #
113
+ # @return [void]
114
+ def delete(name)
115
+ with_admin do |admin|
116
+ handler = admin.delete_topic(name)
117
+
118
+ with_re_wait(
119
+ -> { handler.wait(max_wait_timeout: max_wait_time_seconds) },
120
+ -> { !names.include?(name) }
121
+ )
122
+ end
123
+ end
124
+
125
+ # Creates more partitions for a given topic
126
+ #
127
+ # @param name [String] topic name
128
+ # @param partitions [Integer] total number of partitions we expect to end up with
129
+ #
130
+ # @return [void]
131
+ def create_partitions(name, partitions)
132
+ with_admin do |admin|
133
+ handler = admin.create_partitions(name, partitions)
134
+
135
+ with_re_wait(
136
+ -> { handler.wait(max_wait_timeout: max_wait_time_seconds) },
137
+ -> { info(name).fetch(:partition_count) >= partitions }
138
+ )
139
+ end
140
+ end
141
+
142
+ # Fetches the watermark offsets for a given topic partition
143
+ #
144
+ # @param name [String, Symbol] topic name
145
+ # @param partition [Integer] partition
146
+ # @return [Array<Integer, Integer>] low watermark offset and high watermark offset
147
+ def read_watermark_offsets(name, partition)
148
+ with_consumer do |consumer|
149
+ consumer.query_watermark_offsets(name, partition)
150
+ end
151
+ end
152
+
153
+ # Returns basic topic metadata
154
+ #
155
+ # @param topic_name [String] name of the topic we're interested in
156
+ # @return [Hash] topic metadata info hash
157
+ # @raise [Rdkafka::RdkafkaError] `unknown_topic_or_part` if requested topic is not found
158
+ #
159
+ # @note This query is much more efficient than doing a full `#cluster_info` + topic lookup
160
+ # because it does not have to query for all the topics data but just the topic we're
161
+ # interested in
162
+ def info(topic_name)
163
+ with_admin do |admin|
164
+ admin
165
+ .metadata(topic_name)
166
+ .topics
167
+ .find { |topic| topic[:topic_name] == topic_name }
168
+ end
169
+ end
170
+
171
+ private
172
+
173
+ # @return [Array<String>] topics names
174
+ def names
175
+ cluster_info.topics.map { |topic| topic.fetch(:topic_name) }
176
+ end
177
+
178
+ # Resolves the offset if offset is in a time format. Otherwise returns the offset without
179
+ # resolving.
180
+ # @param consumer [::Rdkafka::Consumer]
181
+ # @param name [String, Symbol] expected topic name
182
+ # @param partition [Integer]
183
+ # @param offset [Integer, Time]
184
+ # @return [Integer] expected offset
185
+ def resolve_offset(consumer, name, partition, offset)
186
+ if offset.is_a?(Time)
187
+ tpl = ::Rdkafka::Consumer::TopicPartitionList.new
188
+ tpl.add_topic_and_partitions_with_offsets(
189
+ name, partition => offset
190
+ )
191
+
192
+ real_offsets = consumer.offsets_for_times(tpl)
193
+ detected_offset = real_offsets
194
+ .to_h
195
+ .fetch(name)
196
+ .find { |p_data| p_data.partition == partition }
197
+
198
+ detected_offset&.offset || raise(Errors::InvalidTimeBasedOffsetError)
199
+ else
200
+ offset
201
+ end
202
+ end
203
+ end
204
+ end
205
+ end
206
+ end