karafka 2.4.18 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +3 -0
- data/.github/workflows/ci.yml +59 -15
- data/.github/workflows/push.yml +35 -0
- data/.github/workflows/verify-action-pins.yml +16 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +75 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +72 -53
- data/LICENSE-COMM +2 -2
- data/README.md +1 -1
- data/Rakefile +4 -0
- data/bin/clean_kafka +43 -0
- data/bin/integrations +20 -6
- data/bin/rspecs +15 -3
- data/bin/verify_kafka_warnings +35 -0
- data/bin/verify_topics_naming +27 -0
- data/config/locales/errors.yml +5 -1
- data/config/locales/pro_errors.yml +13 -2
- data/docker-compose.yml +1 -1
- data/examples/payloads/avro/.gitkeep +0 -0
- data/examples/payloads/json/sample_set_01/enrollment_event.json +579 -0
- data/examples/payloads/json/sample_set_01/ingestion_event.json +30 -0
- data/examples/payloads/json/sample_set_01/transaction_event.json +17 -0
- data/examples/payloads/json/sample_set_01/user_event.json +11 -0
- data/karafka.gemspec +3 -8
- data/lib/karafka/active_job/current_attributes.rb +1 -1
- data/lib/karafka/active_job/job_extensions.rb +4 -1
- data/lib/karafka/admin/acl.rb +5 -1
- data/lib/karafka/admin/configs.rb +5 -1
- data/lib/karafka/admin.rb +89 -42
- data/lib/karafka/base_consumer.rb +17 -8
- data/lib/karafka/cli/base.rb +8 -2
- data/lib/karafka/cli/topics/align.rb +7 -4
- data/lib/karafka/cli/topics/base.rb +17 -0
- data/lib/karafka/cli/topics/create.rb +9 -7
- data/lib/karafka/cli/topics/delete.rb +4 -2
- data/lib/karafka/cli/topics/help.rb +39 -0
- data/lib/karafka/cli/topics/repartition.rb +4 -2
- data/lib/karafka/cli/topics.rb +10 -3
- data/lib/karafka/cli.rb +2 -0
- data/lib/karafka/connection/client.rb +39 -9
- data/lib/karafka/connection/listener.rb +24 -12
- data/lib/karafka/connection/messages_buffer.rb +1 -1
- data/lib/karafka/connection/proxy.rb +4 -1
- data/lib/karafka/constraints.rb +3 -3
- data/lib/karafka/contracts/base.rb +3 -2
- data/lib/karafka/contracts/config.rb +5 -1
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/errors.rb +46 -2
- data/lib/karafka/helpers/async.rb +3 -1
- data/lib/karafka/helpers/interval_runner.rb +8 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
- data/lib/karafka/instrumentation/logger_listener.rb +95 -32
- data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +17 -2
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +29 -6
- data/lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb +9 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/pro/cleaner.rb +8 -0
- data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
- data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
- data/lib/karafka/pro/connection/manager.rb +5 -8
- data/lib/karafka/pro/encryption.rb +12 -1
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
- data/lib/karafka/pro/iterator/expander.rb +5 -3
- data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
- data/lib/karafka/pro/loader.rb +10 -0
- data/lib/karafka/pro/processing/coordinator.rb +4 -1
- data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +32 -3
- data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
- data/lib/karafka/pro/processing/filters/base.rb +10 -2
- data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
- data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
- data/lib/karafka/pro/processing/partitioner.rb +1 -13
- data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +36 -8
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +15 -10
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
- data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
- data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
- data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
- data/lib/karafka/pro/recurring_tasks.rb +21 -2
- data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
- data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
- data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
- data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
- data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
- data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
- data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
- data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
- data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
- data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +3 -2
- data/lib/karafka/pro/routing/features/swarm.rb +4 -1
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
- data/lib/karafka/pro/scheduled_messages/consumer.rb +61 -26
- data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
- data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
- data/lib/karafka/pro/scheduled_messages/dispatcher.rb +2 -1
- data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
- data/lib/karafka/pro/scheduled_messages/proxy.rb +15 -3
- data/lib/karafka/pro/scheduled_messages/serializer.rb +2 -4
- data/lib/karafka/pro/scheduled_messages/state.rb +20 -23
- data/lib/karafka/pro/scheduled_messages/tracker.rb +34 -8
- data/lib/karafka/pro/scheduled_messages.rb +17 -1
- data/lib/karafka/processing/coordinators_buffer.rb +1 -0
- data/lib/karafka/processing/strategies/default.rb +4 -4
- data/lib/karafka/routing/builder.rb +12 -3
- data/lib/karafka/routing/features/base/expander.rb +8 -2
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/runner.rb +7 -1
- data/lib/karafka/server.rb +21 -18
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/setup/config.rb +40 -7
- data/lib/karafka/setup/defaults_injector.rb +26 -1
- data/lib/karafka/status.rb +6 -1
- data/lib/karafka/swarm/node.rb +31 -0
- data/lib/karafka/swarm/supervisor.rb +9 -2
- data/lib/karafka/templates/karafka.rb.erb +14 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +17 -9
- data/renovate.json +14 -2
- metadata +41 -40
- checksums.yaml.gz.sig +0 -0
- data/certs/cert.pem +0 -26
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -0
data/lib/karafka/admin/acl.rb
CHANGED
@@ -11,6 +11,10 @@ module Karafka
|
|
11
11
|
# This API works based on ability to create a `Karafka:Admin::Acl` object that can be then used
|
12
12
|
# using `#create`, `#delete` and `#describe` class API.
|
13
13
|
class Acl
|
14
|
+
extend Helpers::ConfigImporter.new(
|
15
|
+
max_wait_time: %i[admin max_wait_time]
|
16
|
+
)
|
17
|
+
|
14
18
|
# Types of resources for which we can assign permissions.
|
15
19
|
#
|
16
20
|
# Resource refers to any entity within the Kafka ecosystem for which access control can be
|
@@ -162,7 +166,7 @@ module Karafka
|
|
162
166
|
# Makes sure that admin is closed afterwards.
|
163
167
|
def with_admin_wait
|
164
168
|
Admin.with_admin do |admin|
|
165
|
-
yield(admin).wait(max_wait_timeout:
|
169
|
+
yield(admin).wait(max_wait_timeout: max_wait_time)
|
166
170
|
end
|
167
171
|
end
|
168
172
|
|
@@ -10,6 +10,10 @@ module Karafka
|
|
10
10
|
#
|
11
11
|
# Altering is done in the incremental way.
|
12
12
|
module Configs
|
13
|
+
extend Helpers::ConfigImporter.new(
|
14
|
+
max_wait_time: %i[admin max_wait_time]
|
15
|
+
)
|
16
|
+
|
13
17
|
class << self
|
14
18
|
# Fetches given resources configurations from Kafka
|
15
19
|
#
|
@@ -94,7 +98,7 @@ module Karafka
|
|
94
98
|
# Makes sure that admin is closed afterwards.
|
95
99
|
def with_admin_wait
|
96
100
|
Admin.with_admin do |admin|
|
97
|
-
yield(admin).wait(max_wait_timeout:
|
101
|
+
yield(admin).wait(max_wait_timeout: max_wait_time)
|
98
102
|
end
|
99
103
|
end
|
100
104
|
end
|
data/lib/karafka/admin.rb
CHANGED
@@ -10,11 +10,28 @@ module Karafka
|
|
10
10
|
# Cluster on which operations are performed can be changed via `admin.kafka` config, however
|
11
11
|
# there is no multi-cluster runtime support.
|
12
12
|
module Admin
|
13
|
-
|
14
|
-
# Used for time referencing that does not have to be accurate but needs to be big
|
15
|
-
HUNDRED_YEARS = 100 * 365.25 * 24 * 60 * 60
|
13
|
+
extend Core::Helpers::Time
|
16
14
|
|
17
|
-
|
15
|
+
extend Helpers::ConfigImporter.new(
|
16
|
+
max_wait_time: %i[admin max_wait_time],
|
17
|
+
poll_timeout: %i[admin poll_timeout],
|
18
|
+
max_retries_duration: %i[admin max_retries_duration],
|
19
|
+
retry_backoff: %i[admin retry_backoff],
|
20
|
+
group_id: %i[admin group_id],
|
21
|
+
app_kafka: %i[kafka],
|
22
|
+
admin_kafka: %i[admin kafka]
|
23
|
+
)
|
24
|
+
|
25
|
+
# 2010-01-01 00:00:00 - way before Kafka was released so no messages should exist prior to
|
26
|
+
# this date
|
27
|
+
# We do not use the explicit -2 librdkafka value here because we resolve this offset without
|
28
|
+
# consuming data
|
29
|
+
LONG_TIME_AGO = Time.at(1_262_300_400)
|
30
|
+
|
31
|
+
# one day in seconds for future time reference
|
32
|
+
DAY_IN_SECONDS = 60 * 60 * 24
|
33
|
+
|
34
|
+
private_constant :LONG_TIME_AGO, :DAY_IN_SECONDS
|
18
35
|
|
19
36
|
class << self
|
20
37
|
# Allows us to read messages from the topic
|
@@ -55,7 +72,7 @@ module Karafka
|
|
55
72
|
possible_range = requested_range.select { |offset| available_range.include?(offset) }
|
56
73
|
|
57
74
|
start_offset = possible_range.first
|
58
|
-
count = possible_range.
|
75
|
+
count = possible_range.size
|
59
76
|
|
60
77
|
tpl.add_topic_and_partitions_with_offsets(name, partition => start_offset)
|
61
78
|
consumer.assign(tpl)
|
@@ -108,7 +125,7 @@ module Karafka
|
|
108
125
|
handler = admin.create_topic(name, partitions, replication_factor, topic_config)
|
109
126
|
|
110
127
|
with_re_wait(
|
111
|
-
-> { handler.wait(max_wait_timeout:
|
128
|
+
-> { handler.wait(max_wait_timeout: max_wait_time_seconds) },
|
112
129
|
-> { topics_names.include?(name) }
|
113
130
|
)
|
114
131
|
end
|
@@ -122,7 +139,7 @@ module Karafka
|
|
122
139
|
handler = admin.delete_topic(name)
|
123
140
|
|
124
141
|
with_re_wait(
|
125
|
-
-> { handler.wait(max_wait_timeout:
|
142
|
+
-> { handler.wait(max_wait_timeout: max_wait_time_seconds) },
|
126
143
|
-> { !topics_names.include?(name) }
|
127
144
|
)
|
128
145
|
end
|
@@ -137,7 +154,7 @@ module Karafka
|
|
137
154
|
handler = admin.create_partitions(name, partitions)
|
138
155
|
|
139
156
|
with_re_wait(
|
140
|
-
-> { handler.wait(max_wait_timeout:
|
157
|
+
-> { handler.wait(max_wait_timeout: max_wait_time_seconds) },
|
141
158
|
-> { topic_info(name).fetch(:partition_count) >= partitions }
|
142
159
|
)
|
143
160
|
end
|
@@ -203,14 +220,14 @@ module Karafka
|
|
203
220
|
# Earliest is not always 0. When compacting/deleting it can be much later, that's why
|
204
221
|
# we fetch the oldest possible offset
|
205
222
|
when 'earliest'
|
206
|
-
|
223
|
+
LONG_TIME_AGO
|
207
224
|
# Latest will always be the high-watermark offset and we can get it just by getting
|
208
225
|
# a future position
|
209
226
|
when 'latest'
|
210
|
-
Time.now +
|
211
|
-
# Same as `'
|
227
|
+
Time.now + DAY_IN_SECONDS
|
228
|
+
# Same as `'earliest'`
|
212
229
|
when false
|
213
|
-
|
230
|
+
LONG_TIME_AGO
|
214
231
|
# Regular offset case
|
215
232
|
else
|
216
233
|
position
|
@@ -274,27 +291,24 @@ module Karafka
|
|
274
291
|
end
|
275
292
|
end
|
276
293
|
|
277
|
-
# Takes consumer group and its topics and
|
294
|
+
# Takes consumer group and its topics and copies all the offsets to a new named group
|
278
295
|
#
|
279
296
|
# @param previous_name [String] old consumer group name
|
280
297
|
# @param new_name [String] new consumer group name
|
281
298
|
# @param topics [Array<String>] topics for which we want to migrate offsets during rename
|
282
|
-
# @
|
283
|
-
# Defaults to true.
|
299
|
+
# @return [Boolean] true if anything was migrated, otherwise false
|
284
300
|
#
|
285
301
|
# @note This method should **not** be executed on a running consumer group as it creates a
|
286
302
|
# "fake" consumer and uses it to move offsets.
|
287
303
|
#
|
288
|
-
# @note After migration unless `delete_previous` is set to `false`, old group will be
|
289
|
-
# removed.
|
290
|
-
#
|
291
304
|
# @note If new consumer group exists, old offsets will be added to it.
|
292
|
-
def
|
305
|
+
def copy_consumer_group(previous_name, new_name, topics)
|
293
306
|
remap = Hash.new { |h, k| h[k] = {} }
|
294
307
|
|
295
308
|
old_lags = read_lags_with_offsets({ previous_name => topics })
|
296
309
|
|
297
|
-
return if old_lags.empty?
|
310
|
+
return false if old_lags.empty?
|
311
|
+
return false if old_lags.values.all? { |topic_data| topic_data.values.all?(&:empty?) }
|
298
312
|
|
299
313
|
read_lags_with_offsets({ previous_name => topics })
|
300
314
|
.fetch(previous_name)
|
@@ -311,9 +325,35 @@ module Karafka
|
|
311
325
|
|
312
326
|
seek_consumer_group(new_name, remap)
|
313
327
|
|
314
|
-
|
328
|
+
true
|
329
|
+
end
|
330
|
+
|
331
|
+
# Takes consumer group and its topics and migrates all the offsets to a new named group
|
332
|
+
#
|
333
|
+
# @param previous_name [String] old consumer group name
|
334
|
+
# @param new_name [String] new consumer group name
|
335
|
+
# @param topics [Array<String>] topics for which we want to migrate offsets during rename
|
336
|
+
# @param delete_previous [Boolean] should we delete previous consumer group after rename.
|
337
|
+
# Defaults to true.
|
338
|
+
# @return [Boolean] true if rename (and optionally removal) was ok or false if there was
|
339
|
+
# nothing really to rename
|
340
|
+
#
|
341
|
+
# @note This method should **not** be executed on a running consumer group as it creates a
|
342
|
+
# "fake" consumer and uses it to move offsets.
|
343
|
+
#
|
344
|
+
# @note After migration unless `delete_previous` is set to `false`, old group will be
|
345
|
+
# removed.
|
346
|
+
#
|
347
|
+
# @note If new consumer group exists, old offsets will be added to it.
|
348
|
+
def rename_consumer_group(previous_name, new_name, topics, delete_previous: true)
|
349
|
+
copy_result = copy_consumer_group(previous_name, new_name, topics)
|
350
|
+
|
351
|
+
return false unless copy_result
|
352
|
+
return copy_result unless delete_previous
|
315
353
|
|
316
354
|
delete_consumer_group(previous_name)
|
355
|
+
|
356
|
+
true
|
317
357
|
end
|
318
358
|
|
319
359
|
# Removes given consumer group (if exists)
|
@@ -325,7 +365,7 @@ module Karafka
|
|
325
365
|
def delete_consumer_group(consumer_group_id)
|
326
366
|
with_admin do |admin|
|
327
367
|
handler = admin.delete_group(consumer_group_id)
|
328
|
-
handler.wait(max_wait_timeout:
|
368
|
+
handler.wait(max_wait_timeout: max_wait_time_seconds)
|
329
369
|
end
|
330
370
|
end
|
331
371
|
|
@@ -509,7 +549,11 @@ module Karafka
|
|
509
549
|
def with_admin
|
510
550
|
bind_id = SecureRandom.uuid
|
511
551
|
|
512
|
-
admin = config(:producer, {}).admin(
|
552
|
+
admin = config(:producer, {}).admin(
|
553
|
+
native_kafka_auto_start: false,
|
554
|
+
native_kafka_poll_timeout_ms: poll_timeout
|
555
|
+
)
|
556
|
+
|
513
557
|
bind_oauth(bind_id, admin)
|
514
558
|
|
515
559
|
admin.start
|
@@ -523,6 +567,12 @@ module Karafka
|
|
523
567
|
|
524
568
|
private
|
525
569
|
|
570
|
+
# @return [Integer] number of seconds to wait. `rdkafka` requires this value
|
571
|
+
# (`max_wait_time`) to be provided in seconds while we define it in ms hence the conversion
|
572
|
+
def max_wait_time_seconds
|
573
|
+
max_wait_time / 1_000.0
|
574
|
+
end
|
575
|
+
|
526
576
|
# Adds a new callback for given rdkafka instance for oauth token refresh (if needed)
|
527
577
|
#
|
528
578
|
# @param id [String, Symbol] unique (for the lifetime of instance) id that we use for
|
@@ -561,31 +611,33 @@ module Karafka
|
|
561
611
|
# @param handler [Proc] the wait handler operation
|
562
612
|
# @param breaker [Proc] extra condition upon timeout that indicates things were finished ok
|
563
613
|
def with_re_wait(handler, breaker)
|
564
|
-
|
565
|
-
|
614
|
+
start_time = monotonic_now
|
615
|
+
# Convert milliseconds to seconds for sleep
|
616
|
+
sleep_time = retry_backoff / 1000.0
|
566
617
|
|
567
|
-
|
618
|
+
loop do
|
619
|
+
handler.call
|
568
620
|
|
569
|
-
|
570
|
-
# not visible and we need to wait
|
571
|
-
raise(Errors::ResultNotVisibleError) unless breaker.call
|
572
|
-
rescue Rdkafka::AbstractHandle::WaitTimeoutError, Errors::ResultNotVisibleError
|
573
|
-
return if breaker.call
|
621
|
+
sleep(sleep_time)
|
574
622
|
|
575
|
-
|
623
|
+
return if breaker.call
|
624
|
+
rescue Rdkafka::AbstractHandle::WaitTimeoutError
|
625
|
+
return if breaker.call
|
576
626
|
|
577
|
-
|
627
|
+
next if monotonic_now - start_time < max_retries_duration
|
628
|
+
|
629
|
+
raise(Errors::ResultNotVisibleError)
|
630
|
+
end
|
578
631
|
end
|
579
632
|
|
580
633
|
# @param type [Symbol] type of config we want
|
581
634
|
# @param settings [Hash] extra settings for config (if needed)
|
582
635
|
# @return [::Rdkafka::Config] rdkafka config
|
583
636
|
def config(type, settings)
|
584
|
-
|
585
|
-
.kafka
|
637
|
+
app_kafka
|
586
638
|
.then(&:dup)
|
587
|
-
.merge(
|
588
|
-
.tap { |config| config[:'group.id'] =
|
639
|
+
.merge(admin_kafka)
|
640
|
+
.tap { |config| config[:'group.id'] = group_id }
|
589
641
|
# We merge after setting the group id so it can be altered if needed
|
590
642
|
# In general in admin we only should alter it when we need to impersonate a given
|
591
643
|
# consumer group or do something similar
|
@@ -619,11 +671,6 @@ module Karafka
|
|
619
671
|
offset
|
620
672
|
end
|
621
673
|
end
|
622
|
-
|
623
|
-
# @return [Karafka::Core::Configurable::Node] root node config
|
624
|
-
def app_config
|
625
|
-
::Karafka::App.config
|
626
|
-
end
|
627
674
|
end
|
628
675
|
end
|
629
676
|
end
|
@@ -21,7 +21,7 @@ module Karafka
|
|
21
21
|
|
22
22
|
# @return [String] id of the current consumer
|
23
23
|
attr_reader :id
|
24
|
-
# @return [Karafka::
|
24
|
+
# @return [Karafka::Messages::Messages] current messages batch
|
25
25
|
attr_accessor :messages
|
26
26
|
# @return [Karafka::Connection::Client] kafka connection client
|
27
27
|
attr_accessor :client
|
@@ -304,7 +304,12 @@ module Karafka
|
|
304
304
|
|
305
305
|
offset = nil if offset == :consecutive
|
306
306
|
|
307
|
-
client.pause(
|
307
|
+
client.pause(
|
308
|
+
topic.name,
|
309
|
+
partition,
|
310
|
+
offset,
|
311
|
+
coordinator.pause_tracker.current_timeout
|
312
|
+
)
|
308
313
|
|
309
314
|
# Indicate, that user took a manual action of pausing
|
310
315
|
coordinator.manual_pause if manual_pause
|
@@ -333,17 +338,21 @@ module Karafka
|
|
333
338
|
|
334
339
|
# Seeks in the context of current topic and partition
|
335
340
|
#
|
336
|
-
# @param offset [Integer, Time
|
337
|
-
# want to seek
|
341
|
+
# @param offset [Integer, Time, Symbol, String] one of:
|
342
|
+
# - offset where we want to seek
|
343
|
+
# - time of the offset where we want to seek
|
344
|
+
# - :earliest (or as a string) to move to earliest message
|
345
|
+
# - :latest (or as a string) to move to latest (high-watermark)
|
346
|
+
#
|
338
347
|
# @param manual_seek [Boolean] Flag to differentiate between user seek and system/strategy
|
339
348
|
# based seek. User seek operations should take precedence over system actions, hence we need
|
340
349
|
# to know who invoked it.
|
341
|
-
# @param reset_offset [Boolean] should we reset offset when seeking backwards. It is false
|
342
|
-
#
|
343
|
-
# for given consumer group. It
|
350
|
+
# @param reset_offset [Boolean] should we reset offset when seeking backwards. It is false
|
351
|
+
# it prevents marking in the offset that was earlier than the highest marked offset
|
352
|
+
# for given consumer group. It is set to true by default to reprocess data once again and
|
344
353
|
# want to make sure that the marking starts from where we moved to.
|
345
354
|
# @note Please note, that if you are seeking to a time offset, getting the offset is blocking
|
346
|
-
def seek(offset, manual_seek = true, reset_offset:
|
355
|
+
def seek(offset, manual_seek = true, reset_offset: true)
|
347
356
|
coordinator.manual_seek if manual_seek
|
348
357
|
self.seek_offset = nil if reset_offset
|
349
358
|
|
data/lib/karafka/cli/base.rb
CHANGED
@@ -112,7 +112,7 @@ module Karafka
|
|
112
112
|
*[names, option[2], option[1]].flatten
|
113
113
|
) { |value| options[option[0]] = value }
|
114
114
|
end
|
115
|
-
end.parse(ARGV)
|
115
|
+
end.parse(ARGV.dup)
|
116
116
|
|
117
117
|
options
|
118
118
|
end
|
@@ -130,8 +130,14 @@ module Karafka
|
|
130
130
|
# given Cli command
|
131
131
|
# @example for Karafka::Cli::Install
|
132
132
|
# name #=> 'install'
|
133
|
+
# @example for Karafka::Cli::TestMe
|
134
|
+
# name => 'test_me'
|
133
135
|
def name
|
134
|
-
to_s
|
136
|
+
to_s
|
137
|
+
.split('::')
|
138
|
+
.last
|
139
|
+
.gsub(/([a-z\d])([A-Z])/, '\1_\2')
|
140
|
+
.downcase
|
135
141
|
end
|
136
142
|
|
137
143
|
# @return [Array<String>] names and aliases for command matching
|
@@ -30,10 +30,13 @@ module Karafka
|
|
30
30
|
return false
|
31
31
|
end
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
resources_to_migrate.each do |resource|
|
34
|
+
supervised("Updating topic: #{resource.name} configuration") do
|
35
|
+
Karafka::Admin::Configs.alter(resource)
|
36
|
+
end
|
37
|
+
|
38
|
+
puts "#{green('Updated')} topic #{resource.name} configuration."
|
39
|
+
end
|
37
40
|
|
38
41
|
true
|
39
42
|
end
|
@@ -12,6 +12,23 @@ module Karafka
|
|
12
12
|
|
13
13
|
private
|
14
14
|
|
15
|
+
# Used to run Karafka Admin commands that talk with Kafka and that can fail due to broker
|
16
|
+
# errors and other issues. We catch errors and provide nicer printed output prior to
|
17
|
+
# re-raising the mapped error for proper exit code status handling
|
18
|
+
#
|
19
|
+
# @param operation_message [String] message that we use to print that it is going to run
|
20
|
+
# and if case if failed with a failure indication.
|
21
|
+
def supervised(operation_message)
|
22
|
+
puts "#{operation_message}..."
|
23
|
+
|
24
|
+
yield
|
25
|
+
rescue Rdkafka::RdkafkaError => e
|
26
|
+
puts "#{operation_message} #{red('failed')}:"
|
27
|
+
puts e
|
28
|
+
|
29
|
+
raise Errors::CommandValidationError, cause: e
|
30
|
+
end
|
31
|
+
|
15
32
|
# @return [Array<Karafka::Routing::Topic>] all available topics that can be managed
|
16
33
|
# @note If topic is defined in multiple consumer groups, first config will be used. This
|
17
34
|
# means, that this CLI will not work for simultaneous management of multiple clusters
|
@@ -15,13 +15,15 @@ module Karafka
|
|
15
15
|
if existing_topics_names.include?(name)
|
16
16
|
puts "#{yellow('Skipping')} because topic #{name} already exists."
|
17
17
|
else
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
supervised("Creating topic #{name}") do
|
19
|
+
Admin.create_topic(
|
20
|
+
name,
|
21
|
+
topic.declaratives.partitions,
|
22
|
+
topic.declaratives.replication_factor,
|
23
|
+
topic.declaratives.details
|
24
|
+
)
|
25
|
+
end
|
26
|
+
|
25
27
|
puts "#{green('Created')} topic #{name}."
|
26
28
|
any_created = true
|
27
29
|
end
|
@@ -13,8 +13,10 @@ module Karafka
|
|
13
13
|
name = topic.name
|
14
14
|
|
15
15
|
if existing_topics_names.include?(name)
|
16
|
-
|
17
|
-
|
16
|
+
supervised("Deleting topic #{name}") do
|
17
|
+
Admin.delete_topic(name)
|
18
|
+
end
|
19
|
+
|
18
20
|
puts "#{green('Deleted')} topic #{name}."
|
19
21
|
any_deleted = true
|
20
22
|
else
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
class Cli
|
5
|
+
class Topics < Cli::Base
|
6
|
+
# Declarative topics CLI sub-help
|
7
|
+
class Help < Base
|
8
|
+
# Displays help information for all available topics management commands
|
9
|
+
def call
|
10
|
+
puts <<~HELP
|
11
|
+
Karafka topics commands:
|
12
|
+
align # Aligns configuration of all declarative topics based on definitions
|
13
|
+
create # Creates topics with appropriate settings
|
14
|
+
delete # Deletes all topics defined in the routes
|
15
|
+
help # Describes available topics management commands
|
16
|
+
migrate # Creates missing topics, repartitions existing and aligns configuration
|
17
|
+
plan # Plans migration process and prints changes to be applied
|
18
|
+
repartition # Adds additional partitions to topics with fewer partitions than expected
|
19
|
+
reset # Deletes and re-creates all topics
|
20
|
+
|
21
|
+
Options:
|
22
|
+
--detailed-exitcode # Provides detailed exit codes (0=no changes, 1=error, 2=changes applied)
|
23
|
+
|
24
|
+
Examples:
|
25
|
+
karafka topics create
|
26
|
+
karafka topics plan --detailed-exitcode
|
27
|
+
karafka topics migrate
|
28
|
+
karafka topics align
|
29
|
+
|
30
|
+
Note: All admin operations run on the default cluster only.
|
31
|
+
HELP
|
32
|
+
|
33
|
+
# We return false to indicate with exit code 0 that no changes were applied
|
34
|
+
false
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -21,8 +21,10 @@ module Karafka
|
|
21
21
|
existing_count = existing_partitions.fetch(name, false)
|
22
22
|
|
23
23
|
if existing_count && existing_count < desired_count
|
24
|
-
|
25
|
-
|
24
|
+
supervised("Increasing number of partitions to #{desired_count} on topic #{name}") do
|
25
|
+
Admin.create_partitions(name, desired_count)
|
26
|
+
end
|
27
|
+
|
26
28
|
change = desired_count - existing_count
|
27
29
|
puts "#{green('Created')} #{change} additional partitions on topic #{name}."
|
28
30
|
any_repartitioned = true
|
data/lib/karafka/cli/topics.rb
CHANGED
@@ -27,10 +27,13 @@ module Karafka
|
|
27
27
|
# crashes
|
28
28
|
CHANGES_EXIT_CODE = 2
|
29
29
|
|
30
|
-
|
30
|
+
# Used when there was an error during execution.
|
31
|
+
ERROR_EXIT_CODE = 1
|
32
|
+
|
33
|
+
private_constant :NO_CHANGES_EXIT_CODE, :CHANGES_EXIT_CODE, :ERROR_EXIT_CODE
|
31
34
|
|
32
35
|
# @param action [String] action we want to take
|
33
|
-
def call(action = '
|
36
|
+
def call(action = 'help')
|
34
37
|
detailed_exit_code = options.fetch(:detailed_exitcode, false)
|
35
38
|
|
36
39
|
command = case action
|
@@ -48,8 +51,10 @@ module Karafka
|
|
48
51
|
Topics::Align
|
49
52
|
when 'plan'
|
50
53
|
Topics::Plan
|
54
|
+
when 'help'
|
55
|
+
Topics::Help
|
51
56
|
else
|
52
|
-
raise ::
|
57
|
+
raise Errors::UnrecognizedCommandError, "Unrecognized topics action: #{action}"
|
53
58
|
end
|
54
59
|
|
55
60
|
changes = command.new.call
|
@@ -57,6 +62,8 @@ module Karafka
|
|
57
62
|
return unless detailed_exit_code
|
58
63
|
|
59
64
|
changes ? exit(CHANGES_EXIT_CODE) : exit(NO_CHANGES_EXIT_CODE)
|
65
|
+
rescue Errors::CommandValidationError
|
66
|
+
exit(ERROR_EXIT_CODE)
|
60
67
|
end
|
61
68
|
end
|
62
69
|
end
|
data/lib/karafka/cli.rb
CHANGED
@@ -9,6 +9,11 @@ module Karafka
|
|
9
9
|
# closed consumer instance as it causes Ruby VM process to crash.
|
10
10
|
class Client
|
11
11
|
include ::Karafka::Core::Helpers::Time
|
12
|
+
include Helpers::ConfigImporter.new(
|
13
|
+
logger: %i[logger],
|
14
|
+
tick_interval: %i[internal tick_interval],
|
15
|
+
shutdown_timeout: %i[shutdown_timeout]
|
16
|
+
)
|
12
17
|
|
13
18
|
attr_reader :rebalance_manager
|
14
19
|
|
@@ -65,9 +70,8 @@ module Karafka
|
|
65
70
|
@closed = false
|
66
71
|
@subscription_group = subscription_group
|
67
72
|
@buffer = RawMessagesBuffer.new
|
68
|
-
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
69
73
|
@rebalance_manager = RebalanceManager.new(@subscription_group.id, @buffer)
|
70
|
-
@rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group)
|
74
|
+
@rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group, id)
|
71
75
|
|
72
76
|
@interval_runner = Helpers::IntervalRunner.new do
|
73
77
|
events_poll
|
@@ -221,10 +225,14 @@ module Karafka
|
|
221
225
|
# @param offset [Integer, nil] offset of the message on which we want to pause (this message
|
222
226
|
# will be reprocessed after getting back to processing) or nil if we want to pause and
|
223
227
|
# resume from the consecutive offset (+1 from the last message passed to us by librdkafka)
|
228
|
+
# @param timeout [Integer] number of ms timeout of pause. It is used only for
|
229
|
+
# instrumentation and not in the pause itself as pausing on this level is infinite always.
|
224
230
|
# @note This will pause indefinitely and requires manual `#resume`
|
225
231
|
# @note When `#internal_seek` is not involved (when offset is `nil`) we will not purge the
|
226
232
|
# librdkafka buffers and continue from the last cursor offset
|
227
|
-
|
233
|
+
# @note We accept the timeout value on this layer to have a cohesive pause/resume
|
234
|
+
# instrumentation, where all the details are available. It is especially needed, when
|
235
|
+
def pause(topic, partition, offset = nil, timeout = 0)
|
228
236
|
@mutex.synchronize do
|
229
237
|
# Do not pause if the client got closed, would not change anything
|
230
238
|
return if @closed
|
@@ -243,7 +251,8 @@ module Karafka
|
|
243
251
|
subscription_group: @subscription_group,
|
244
252
|
topic: topic,
|
245
253
|
partition: partition,
|
246
|
-
offset: offset
|
254
|
+
offset: offset,
|
255
|
+
timeout: timeout
|
247
256
|
)
|
248
257
|
|
249
258
|
@paused_tpls[topic][partition] = tpl
|
@@ -313,7 +322,7 @@ module Karafka
|
|
313
322
|
@unsubscribing = true
|
314
323
|
|
315
324
|
# Give 50% of time for the final close before we reach the forceful
|
316
|
-
max_wait =
|
325
|
+
max_wait = shutdown_timeout * COOP_UNSUBSCRIBE_FACTOR
|
317
326
|
used = 0
|
318
327
|
stopped_at = monotonic_now
|
319
328
|
|
@@ -418,6 +427,15 @@ module Karafka
|
|
418
427
|
@wrapped_kafka.committed(tpl)
|
419
428
|
end
|
420
429
|
|
430
|
+
# Reads watermark offsets for given topic
|
431
|
+
#
|
432
|
+
# @param topic [String] topic name
|
433
|
+
# @param partition [Integer] partition number
|
434
|
+
# @return [Array<Integer, Integer>] watermark offsets (low, high)
|
435
|
+
def query_watermark_offsets(topic, partition)
|
436
|
+
@wrapped_kafka.query_watermark_offsets(topic, partition)
|
437
|
+
end
|
438
|
+
|
421
439
|
private
|
422
440
|
|
423
441
|
# When we cannot store an offset, it means we no longer own the partition
|
@@ -472,15 +490,27 @@ module Karafka
|
|
472
490
|
message.offset = detected_partition&.offset || raise(Errors::InvalidTimeBasedOffsetError)
|
473
491
|
end
|
474
492
|
|
493
|
+
# Those two are librdkafka hardcoded values
|
494
|
+
message.offset = -1 if message.offset.to_s == 'latest'
|
495
|
+
message.offset = -2 if message.offset.to_s == 'earliest'
|
496
|
+
|
475
497
|
# Never seek if we would get the same location as we would get without seeking
|
476
498
|
# This prevents us from the expensive buffer purges that can lead to increased network
|
477
499
|
# traffic and can cost a lot of money
|
478
500
|
#
|
479
501
|
# This code adds around 0.01 ms per seek but saves from many user unexpected behaviours in
|
480
502
|
# seeking and pausing
|
481
|
-
|
503
|
+
position = topic_partition_position(message.topic, message.partition)
|
504
|
+
|
505
|
+
# Always seek if current position cannot be fetched or is negative. Offset seek can also
|
506
|
+
# be negative (-1 or -2) and we should not compare it with the position because they are
|
507
|
+
# special (earliest or latest)
|
508
|
+
return kafka.seek(message) if position.negative?
|
509
|
+
# If offset is the same as the next position, we don't have to seek to get there, hence
|
510
|
+
# only in such case we can do nothing.
|
511
|
+
return kafka.seek(message) if message.offset != position
|
482
512
|
|
483
|
-
|
513
|
+
nil
|
484
514
|
end
|
485
515
|
|
486
516
|
# Commits the stored offsets in a sync way and closes the consumer.
|
@@ -573,7 +603,7 @@ module Karafka
|
|
573
603
|
# We should not run a single poll longer than the tick frequency. Otherwise during a single
|
574
604
|
# `#batch_poll` we would not be able to run `#events_poll` often enough effectively
|
575
605
|
# blocking events from being handled.
|
576
|
-
poll_tick = timeout >
|
606
|
+
poll_tick = timeout > tick_interval ? tick_interval : timeout
|
577
607
|
|
578
608
|
result = kafka.poll(poll_tick)
|
579
609
|
|
@@ -655,7 +685,7 @@ module Karafka
|
|
655
685
|
# Builds a new rdkafka consumer instance based on the subscription group configuration
|
656
686
|
# @return [Rdkafka::Consumer]
|
657
687
|
def build_consumer
|
658
|
-
::Rdkafka::Config.logger =
|
688
|
+
::Rdkafka::Config.logger = logger
|
659
689
|
|
660
690
|
# We need to refresh the setup of this subscription group in case we started running in a
|
661
691
|
# swarm. The initial configuration for validation comes from the parent node, but it needs
|