karafka 2.4.18 → 2.5.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/CODEOWNERS +3 -0
- data/.github/workflows/ci.yml +58 -14
- data/.github/workflows/verify-action-pins.yml +16 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +53 -0
- data/Gemfile +3 -3
- data/Gemfile.lock +55 -58
- data/LICENSE-COMM +2 -2
- data/bin/clean_kafka +43 -0
- data/bin/integrations +17 -5
- data/bin/rspecs +15 -3
- data/bin/verify_kafka_warnings +35 -0
- data/bin/verify_topics_naming +27 -0
- data/config/locales/errors.yml +3 -0
- data/config/locales/pro_errors.yml +13 -2
- data/docker-compose.yml +1 -1
- data/examples/payloads/json/enrollment_event.json +579 -0
- data/examples/payloads/json/ingestion_event.json +30 -0
- data/examples/payloads/json/transaction_event.json +17 -0
- data/examples/payloads/json/user_event.json +11 -0
- data/karafka.gemspec +3 -3
- data/lib/karafka/active_job/current_attributes.rb +1 -1
- data/lib/karafka/admin/acl.rb +5 -1
- data/lib/karafka/admin.rb +51 -19
- data/lib/karafka/base_consumer.rb +17 -8
- data/lib/karafka/cli/base.rb +8 -2
- data/lib/karafka/connection/client.rb +20 -7
- data/lib/karafka/connection/listener.rb +24 -12
- data/lib/karafka/connection/messages_buffer.rb +1 -1
- data/lib/karafka/connection/proxy.rb +3 -0
- data/lib/karafka/contracts/config.rb +3 -0
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/errors.rb +11 -0
- data/lib/karafka/helpers/async.rb +3 -1
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
- data/lib/karafka/instrumentation/logger_listener.rb +86 -23
- data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/pro/cleaner.rb +8 -0
- data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
- data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
- data/lib/karafka/pro/connection/manager.rb +5 -8
- data/lib/karafka/pro/encryption.rb +8 -0
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
- data/lib/karafka/pro/iterator/expander.rb +5 -3
- data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
- data/lib/karafka/pro/loader.rb +10 -0
- data/lib/karafka/pro/processing/coordinator.rb +4 -1
- data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +27 -3
- data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
- data/lib/karafka/pro/processing/filters/base.rb +10 -2
- data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
- data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
- data/lib/karafka/pro/processing/partitioner.rb +1 -13
- data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +36 -8
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +14 -10
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
- data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
- data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
- data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
- data/lib/karafka/pro/recurring_tasks.rb +13 -0
- data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
- data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
- data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
- data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
- data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
- data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
- data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
- data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
- data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
- data/lib/karafka/pro/scheduled_messages/consumer.rb +14 -15
- data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
- data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
- data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
- data/lib/karafka/pro/scheduled_messages.rb +13 -0
- data/lib/karafka/processing/coordinators_buffer.rb +1 -0
- data/lib/karafka/processing/strategies/default.rb +4 -4
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/runner.rb +7 -1
- data/lib/karafka/server.rb +5 -0
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/setup/config.rb +22 -1
- data/lib/karafka/setup/defaults_injector.rb +26 -1
- data/lib/karafka/status.rb +6 -1
- data/lib/karafka/swarm/node.rb +31 -0
- data/lib/karafka/swarm/supervisor.rb +4 -0
- data/lib/karafka/templates/karafka.rb.erb +14 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +17 -9
- data/renovate.json +14 -2
- data.tar.gz.sig +0 -0
- metadata +36 -11
- metadata.gz.sig +0 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
{
|
2
|
+
"metadata": {
|
3
|
+
"request_id": "0bb22210-4a7b-4df3-a0a2-4fed464f4296",
|
4
|
+
"tenant_id": "tenant_2",
|
5
|
+
"event": "service_2.file_ingested",
|
6
|
+
"raw_tenant_file_id": "64eef122-39a4-4ba4-a0eb-2115bb3be545",
|
7
|
+
"row_number": 4,
|
8
|
+
"batch_item_run_id": "61eb1d69-88ad-421a-a57d-543b1d674a86"
|
9
|
+
},
|
10
|
+
"data": {
|
11
|
+
"user": {
|
12
|
+
"partner_user_id": "111111111166666666_555555522222222222",
|
13
|
+
"email": "email@example.com",
|
14
|
+
"status": "active",
|
15
|
+
"first_name": "first name",
|
16
|
+
"last_name": "last name",
|
17
|
+
"birthdate": "1999-09-22",
|
18
|
+
"country_code": "US"
|
19
|
+
},
|
20
|
+
"product": {
|
21
|
+
"custom_fields": {
|
22
|
+
"membership_id": "tttttttttt"
|
23
|
+
},
|
24
|
+
"reference_id": "yyyyyyyyyyy",
|
25
|
+
"code": "404",
|
26
|
+
"start_time": "2024-05-12 0:00:00",
|
27
|
+
"partner_status": "active"
|
28
|
+
}
|
29
|
+
}
|
30
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
{
|
2
|
+
"event_type": "points_transaction",
|
3
|
+
"source": "service_4",
|
4
|
+
"data": {
|
5
|
+
"points_transaction_id": "6d853550-86c0-4878-a234-32ad3b80e7d7",
|
6
|
+
"user_id": "435092f6-a1e3-4c3b-8f22-bc809d04d9f5",
|
7
|
+
"tenant_id": "tenant_1",
|
8
|
+
"category": "redemption",
|
9
|
+
"product_type": "shopping_cart",
|
10
|
+
"product_sub_type": "cashback",
|
11
|
+
"points": 1000,
|
12
|
+
"transaction_time": "2025-05-05T13:13:57Z",
|
13
|
+
"product_name": "Tenant 1 Cashback",
|
14
|
+
"order_item_type": "CashRedemptionOrderItem",
|
15
|
+
"reward_amount": 10.0
|
16
|
+
}
|
17
|
+
}
|
@@ -0,0 +1,11 @@
|
|
1
|
+
{
|
2
|
+
"data": {
|
3
|
+
"user_id": "686eeceb-c763-46fc-b946-17663bb827b4",
|
4
|
+
"tenant_id": "tenant_1",
|
5
|
+
"points_account_id": "95fa8ca3-f89e-4d9b-8aa7-a9ba3b460b49"
|
6
|
+
},
|
7
|
+
"metadata": {
|
8
|
+
"event": "service_1.user_activated",
|
9
|
+
"request_id": "ee3b35f4-68c0-42ab-aa8c-6ca146d3ca89"
|
10
|
+
}
|
11
|
+
}
|
data/karafka.gemspec
CHANGED
@@ -22,9 +22,9 @@ Gem::Specification.new do |spec|
|
|
22
22
|
DESC
|
23
23
|
|
24
24
|
spec.add_dependency 'base64', '~> 0.2'
|
25
|
-
spec.add_dependency 'karafka-core', '>= 2.
|
26
|
-
spec.add_dependency 'karafka-rdkafka', '>= 0.
|
27
|
-
spec.add_dependency 'waterdrop', '>= 2.
|
25
|
+
spec.add_dependency 'karafka-core', '>= 2.5.0', '< 2.6.0'
|
26
|
+
spec.add_dependency 'karafka-rdkafka', '>= 0.19.2'
|
27
|
+
spec.add_dependency 'waterdrop', '>= 2.8.3', '< 3.0.0'
|
28
28
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
29
29
|
|
30
30
|
spec.required_ruby_version = '>= 3.0.0'
|
@@ -29,7 +29,7 @@ module Karafka
|
|
29
29
|
# Prevent registering same klass multiple times
|
30
30
|
next if Dispatcher._cattr_klasses.value?(stringified_klass)
|
31
31
|
|
32
|
-
key = "cattr_#{Dispatcher._cattr_klasses.
|
32
|
+
key = "cattr_#{Dispatcher._cattr_klasses.size}"
|
33
33
|
|
34
34
|
Dispatcher._cattr_klasses[key] = stringified_klass
|
35
35
|
Consumer._cattr_klasses[key] = stringified_klass
|
data/lib/karafka/admin/acl.rb
CHANGED
@@ -11,6 +11,10 @@ module Karafka
|
|
11
11
|
# This API works based on ability to create a `Karafka:Admin::Acl` object that can be then used
|
12
12
|
# using `#create`, `#delete` and `#describe` class API.
|
13
13
|
class Acl
|
14
|
+
extend Helpers::ConfigImporter.new(
|
15
|
+
max_wait_time: %i[admin max_wait_time]
|
16
|
+
)
|
17
|
+
|
14
18
|
# Types of resources for which we can assign permissions.
|
15
19
|
#
|
16
20
|
# Resource refers to any entity within the Kafka ecosystem for which access control can be
|
@@ -162,7 +166,7 @@ module Karafka
|
|
162
166
|
# Makes sure that admin is closed afterwards.
|
163
167
|
def with_admin_wait
|
164
168
|
Admin.with_admin do |admin|
|
165
|
-
yield(admin).wait(max_wait_timeout:
|
169
|
+
yield(admin).wait(max_wait_timeout: max_wait_time)
|
166
170
|
end
|
167
171
|
end
|
168
172
|
|
data/lib/karafka/admin.rb
CHANGED
@@ -10,11 +10,16 @@ module Karafka
|
|
10
10
|
# Cluster on which operations are performed can be changed via `admin.kafka` config, however
|
11
11
|
# there is no multi-cluster runtime support.
|
12
12
|
module Admin
|
13
|
-
#
|
14
|
-
#
|
15
|
-
|
13
|
+
# 2010-01-01 00:00:00 - way before Kafka was released so no messages should exist prior to
|
14
|
+
# this date
|
15
|
+
# We do not use the explicit -2 librdkafka value here because we resolve this offset without
|
16
|
+
# consuming data
|
17
|
+
LONG_TIME_AGO = Time.at(1_262_300_400)
|
16
18
|
|
17
|
-
|
19
|
+
# one day in seconds for future time reference
|
20
|
+
DAY_IN_SECONDS = 60 * 60 * 24
|
21
|
+
|
22
|
+
private_constant :LONG_TIME_AGO, :DAY_IN_SECONDS
|
18
23
|
|
19
24
|
class << self
|
20
25
|
# Allows us to read messages from the topic
|
@@ -55,7 +60,7 @@ module Karafka
|
|
55
60
|
possible_range = requested_range.select { |offset| available_range.include?(offset) }
|
56
61
|
|
57
62
|
start_offset = possible_range.first
|
58
|
-
count = possible_range.
|
63
|
+
count = possible_range.size
|
59
64
|
|
60
65
|
tpl.add_topic_and_partitions_with_offsets(name, partition => start_offset)
|
61
66
|
consumer.assign(tpl)
|
@@ -203,14 +208,14 @@ module Karafka
|
|
203
208
|
# Earliest is not always 0. When compacting/deleting it can be much later, that's why
|
204
209
|
# we fetch the oldest possible offset
|
205
210
|
when 'earliest'
|
206
|
-
|
211
|
+
LONG_TIME_AGO
|
207
212
|
# Latest will always be the high-watermark offset and we can get it just by getting
|
208
213
|
# a future position
|
209
214
|
when 'latest'
|
210
|
-
Time.now +
|
211
|
-
# Same as `'
|
215
|
+
Time.now + DAY_IN_SECONDS
|
216
|
+
# Same as `'earliest'`
|
212
217
|
when false
|
213
|
-
|
218
|
+
LONG_TIME_AGO
|
214
219
|
# Regular offset case
|
215
220
|
else
|
216
221
|
position
|
@@ -274,27 +279,24 @@ module Karafka
|
|
274
279
|
end
|
275
280
|
end
|
276
281
|
|
277
|
-
# Takes consumer group and its topics and
|
282
|
+
# Takes consumer group and its topics and copies all the offsets to a new named group
|
278
283
|
#
|
279
284
|
# @param previous_name [String] old consumer group name
|
280
285
|
# @param new_name [String] new consumer group name
|
281
286
|
# @param topics [Array<String>] topics for which we want to migrate offsets during rename
|
282
|
-
# @
|
283
|
-
# Defaults to true.
|
287
|
+
# @return [Boolean] true if anything was migrated, otherwise false
|
284
288
|
#
|
285
289
|
# @note This method should **not** be executed on a running consumer group as it creates a
|
286
290
|
# "fake" consumer and uses it to move offsets.
|
287
291
|
#
|
288
|
-
# @note After migration unless `delete_previous` is set to `false`, old group will be
|
289
|
-
# removed.
|
290
|
-
#
|
291
292
|
# @note If new consumer group exists, old offsets will be added to it.
|
292
|
-
def
|
293
|
+
def copy_consumer_group(previous_name, new_name, topics)
|
293
294
|
remap = Hash.new { |h, k| h[k] = {} }
|
294
295
|
|
295
296
|
old_lags = read_lags_with_offsets({ previous_name => topics })
|
296
297
|
|
297
|
-
return if old_lags.empty?
|
298
|
+
return false if old_lags.empty?
|
299
|
+
return false if old_lags.values.all? { |topic_data| topic_data.values.all?(&:empty?) }
|
298
300
|
|
299
301
|
read_lags_with_offsets({ previous_name => topics })
|
300
302
|
.fetch(previous_name)
|
@@ -311,9 +313,35 @@ module Karafka
|
|
311
313
|
|
312
314
|
seek_consumer_group(new_name, remap)
|
313
315
|
|
314
|
-
|
316
|
+
true
|
317
|
+
end
|
318
|
+
|
319
|
+
# Takes consumer group and its topics and migrates all the offsets to a new named group
|
320
|
+
#
|
321
|
+
# @param previous_name [String] old consumer group name
|
322
|
+
# @param new_name [String] new consumer group name
|
323
|
+
# @param topics [Array<String>] topics for which we want to migrate offsets during rename
|
324
|
+
# @param delete_previous [Boolean] should we delete previous consumer group after rename.
|
325
|
+
# Defaults to true.
|
326
|
+
# @return [Boolean] true if rename (and optionally removal) was ok or false if there was
|
327
|
+
# nothing really to rename
|
328
|
+
#
|
329
|
+
# @note This method should **not** be executed on a running consumer group as it creates a
|
330
|
+
# "fake" consumer and uses it to move offsets.
|
331
|
+
#
|
332
|
+
# @note After migration unless `delete_previous` is set to `false`, old group will be
|
333
|
+
# removed.
|
334
|
+
#
|
335
|
+
# @note If new consumer group exists, old offsets will be added to it.
|
336
|
+
def rename_consumer_group(previous_name, new_name, topics, delete_previous: true)
|
337
|
+
copy_result = copy_consumer_group(previous_name, new_name, topics)
|
338
|
+
|
339
|
+
return false unless copy_result
|
340
|
+
return copy_result unless delete_previous
|
315
341
|
|
316
342
|
delete_consumer_group(previous_name)
|
343
|
+
|
344
|
+
true
|
317
345
|
end
|
318
346
|
|
319
347
|
# Removes given consumer group (if exists)
|
@@ -509,7 +537,11 @@ module Karafka
|
|
509
537
|
def with_admin
|
510
538
|
bind_id = SecureRandom.uuid
|
511
539
|
|
512
|
-
admin = config(:producer, {}).admin(
|
540
|
+
admin = config(:producer, {}).admin(
|
541
|
+
native_kafka_auto_start: false,
|
542
|
+
native_kafka_poll_timeout_ms: app_config.admin.poll_timeout
|
543
|
+
)
|
544
|
+
|
513
545
|
bind_oauth(bind_id, admin)
|
514
546
|
|
515
547
|
admin.start
|
@@ -21,7 +21,7 @@ module Karafka
|
|
21
21
|
|
22
22
|
# @return [String] id of the current consumer
|
23
23
|
attr_reader :id
|
24
|
-
# @return [Karafka::
|
24
|
+
# @return [Karafka::Messages::Messages] current messages batch
|
25
25
|
attr_accessor :messages
|
26
26
|
# @return [Karafka::Connection::Client] kafka connection client
|
27
27
|
attr_accessor :client
|
@@ -304,7 +304,12 @@ module Karafka
|
|
304
304
|
|
305
305
|
offset = nil if offset == :consecutive
|
306
306
|
|
307
|
-
client.pause(
|
307
|
+
client.pause(
|
308
|
+
topic.name,
|
309
|
+
partition,
|
310
|
+
offset,
|
311
|
+
coordinator.pause_tracker.current_timeout
|
312
|
+
)
|
308
313
|
|
309
314
|
# Indicate, that user took a manual action of pausing
|
310
315
|
coordinator.manual_pause if manual_pause
|
@@ -333,17 +338,21 @@ module Karafka
|
|
333
338
|
|
334
339
|
# Seeks in the context of current topic and partition
|
335
340
|
#
|
336
|
-
# @param offset [Integer, Time
|
337
|
-
# want to seek
|
341
|
+
# @param offset [Integer, Time, Symbol, String] one of:
|
342
|
+
# - offset where we want to seek
|
343
|
+
# - time of the offset where we want to seek
|
344
|
+
# - :earliest (or as a string) to move to earliest message
|
345
|
+
# - :latest (or as a string) to move to latest (high-watermark)
|
346
|
+
#
|
338
347
|
# @param manual_seek [Boolean] Flag to differentiate between user seek and system/strategy
|
339
348
|
# based seek. User seek operations should take precedence over system actions, hence we need
|
340
349
|
# to know who invoked it.
|
341
|
-
# @param reset_offset [Boolean] should we reset offset when seeking backwards. It is false
|
342
|
-
#
|
343
|
-
# for given consumer group. It
|
350
|
+
# @param reset_offset [Boolean] should we reset offset when seeking backwards. It is false
|
351
|
+
# it prevents marking in the offset that was earlier than the highest marked offset
|
352
|
+
# for given consumer group. It is set to true by default to reprocess data once again and
|
344
353
|
# want to make sure that the marking starts from where we moved to.
|
345
354
|
# @note Please note, that if you are seeking to a time offset, getting the offset is blocking
|
346
|
-
def seek(offset, manual_seek = true, reset_offset:
|
355
|
+
def seek(offset, manual_seek = true, reset_offset: true)
|
347
356
|
coordinator.manual_seek if manual_seek
|
348
357
|
self.seek_offset = nil if reset_offset
|
349
358
|
|
data/lib/karafka/cli/base.rb
CHANGED
@@ -112,7 +112,7 @@ module Karafka
|
|
112
112
|
*[names, option[2], option[1]].flatten
|
113
113
|
) { |value| options[option[0]] = value }
|
114
114
|
end
|
115
|
-
end.parse(ARGV)
|
115
|
+
end.parse(ARGV.dup)
|
116
116
|
|
117
117
|
options
|
118
118
|
end
|
@@ -130,8 +130,14 @@ module Karafka
|
|
130
130
|
# given Cli command
|
131
131
|
# @example for Karafka::Cli::Install
|
132
132
|
# name #=> 'install'
|
133
|
+
# @example for Karafka::Cli::TestMe
|
134
|
+
# name => 'test_me'
|
133
135
|
def name
|
134
|
-
to_s
|
136
|
+
to_s
|
137
|
+
.split('::')
|
138
|
+
.last
|
139
|
+
.gsub(/([a-z\d])([A-Z])/, '\1_\2')
|
140
|
+
.downcase
|
135
141
|
end
|
136
142
|
|
137
143
|
# @return [Array<String>] names and aliases for command matching
|
@@ -9,6 +9,11 @@ module Karafka
|
|
9
9
|
# closed consumer instance as it causes Ruby VM process to crash.
|
10
10
|
class Client
|
11
11
|
include ::Karafka::Core::Helpers::Time
|
12
|
+
include Helpers::ConfigImporter.new(
|
13
|
+
logger: %i[logger],
|
14
|
+
tick_interval: %i[internal tick_interval],
|
15
|
+
shutdown_timeout: %i[shutdown_timeout]
|
16
|
+
)
|
12
17
|
|
13
18
|
attr_reader :rebalance_manager
|
14
19
|
|
@@ -65,9 +70,8 @@ module Karafka
|
|
65
70
|
@closed = false
|
66
71
|
@subscription_group = subscription_group
|
67
72
|
@buffer = RawMessagesBuffer.new
|
68
|
-
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
69
73
|
@rebalance_manager = RebalanceManager.new(@subscription_group.id, @buffer)
|
70
|
-
@rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group)
|
74
|
+
@rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group, id)
|
71
75
|
|
72
76
|
@interval_runner = Helpers::IntervalRunner.new do
|
73
77
|
events_poll
|
@@ -221,10 +225,14 @@ module Karafka
|
|
221
225
|
# @param offset [Integer, nil] offset of the message on which we want to pause (this message
|
222
226
|
# will be reprocessed after getting back to processing) or nil if we want to pause and
|
223
227
|
# resume from the consecutive offset (+1 from the last message passed to us by librdkafka)
|
228
|
+
# @param timeout [Integer] number of ms timeout of pause. It is used only for
|
229
|
+
# instrumentation and not in the pause itself as pausing on this level is infinite always.
|
224
230
|
# @note This will pause indefinitely and requires manual `#resume`
|
225
231
|
# @note When `#internal_seek` is not involved (when offset is `nil`) we will not purge the
|
226
232
|
# librdkafka buffers and continue from the last cursor offset
|
227
|
-
|
233
|
+
# @note We accept the timeout value on this layer to have a cohesive pause/resume
|
234
|
+
# instrumentation, where all the details are available. It is especially needed, when
|
235
|
+
def pause(topic, partition, offset = nil, timeout = 0)
|
228
236
|
@mutex.synchronize do
|
229
237
|
# Do not pause if the client got closed, would not change anything
|
230
238
|
return if @closed
|
@@ -243,7 +251,8 @@ module Karafka
|
|
243
251
|
subscription_group: @subscription_group,
|
244
252
|
topic: topic,
|
245
253
|
partition: partition,
|
246
|
-
offset: offset
|
254
|
+
offset: offset,
|
255
|
+
timeout: timeout
|
247
256
|
)
|
248
257
|
|
249
258
|
@paused_tpls[topic][partition] = tpl
|
@@ -313,7 +322,7 @@ module Karafka
|
|
313
322
|
@unsubscribing = true
|
314
323
|
|
315
324
|
# Give 50% of time for the final close before we reach the forceful
|
316
|
-
max_wait =
|
325
|
+
max_wait = shutdown_timeout * COOP_UNSUBSCRIBE_FACTOR
|
317
326
|
used = 0
|
318
327
|
stopped_at = monotonic_now
|
319
328
|
|
@@ -472,6 +481,10 @@ module Karafka
|
|
472
481
|
message.offset = detected_partition&.offset || raise(Errors::InvalidTimeBasedOffsetError)
|
473
482
|
end
|
474
483
|
|
484
|
+
# Those two are librdkafka hardcoded values
|
485
|
+
message.offset = -1 if message.offset.to_s == 'latest'
|
486
|
+
message.offset = -2 if message.offset.to_s == 'earliest'
|
487
|
+
|
475
488
|
# Never seek if we would get the same location as we would get without seeking
|
476
489
|
# This prevents us from the expensive buffer purges that can lead to increased network
|
477
490
|
# traffic and can cost a lot of money
|
@@ -573,7 +586,7 @@ module Karafka
|
|
573
586
|
# We should not run a single poll longer than the tick frequency. Otherwise during a single
|
574
587
|
# `#batch_poll` we would not be able to run `#events_poll` often enough effectively
|
575
588
|
# blocking events from being handled.
|
576
|
-
poll_tick = timeout >
|
589
|
+
poll_tick = timeout > tick_interval ? tick_interval : timeout
|
577
590
|
|
578
591
|
result = kafka.poll(poll_tick)
|
579
592
|
|
@@ -655,7 +668,7 @@ module Karafka
|
|
655
668
|
# Builds a new rdkafka consumer instance based on the subscription group configuration
|
656
669
|
# @return [Rdkafka::Consumer]
|
657
670
|
def build_consumer
|
658
|
-
::Rdkafka::Config.logger =
|
671
|
+
::Rdkafka::Config.logger = logger
|
659
672
|
|
660
673
|
# We need to refresh the setup of this subscription group in case we started running in a
|
661
674
|
# swarm. The initial configuration for validation comes from the parent node, but it needs
|
@@ -12,6 +12,13 @@ module Karafka
|
|
12
12
|
class Listener
|
13
13
|
include Helpers::Async
|
14
14
|
|
15
|
+
include Helpers::ConfigImporter.new(
|
16
|
+
jobs_builder: %i[internal processing jobs_builder],
|
17
|
+
partitioner_class: %i[internal processing partitioner_class],
|
18
|
+
reset_backoff: %i[internal connection reset_backoff],
|
19
|
+
listener_thread_priority: %i[internal connection listener_thread_priority]
|
20
|
+
)
|
21
|
+
|
15
22
|
# Can be useful for logging
|
16
23
|
# @return [String] id of this listener
|
17
24
|
attr_reader :id
|
@@ -19,6 +26,11 @@ module Karafka
|
|
19
26
|
# @return [Karafka::Routing::SubscriptionGroup] subscription group that this listener handles
|
20
27
|
attr_reader :subscription_group
|
21
28
|
|
29
|
+
# @return [Processing::CoordinatorsBuffer] coordinator buffers that can be used directly in
|
30
|
+
# advanced cases of changes to the polling flow (like triggered seek back without messages
|
31
|
+
# ahead in the topic)
|
32
|
+
attr_reader :coordinators
|
33
|
+
|
22
34
|
# How long to wait in the initial events poll. Increases chances of having the initial events
|
23
35
|
# immediately available
|
24
36
|
INITIAL_EVENTS_POLL_TIMEOUT = 100
|
@@ -30,16 +42,13 @@ module Karafka
|
|
30
42
|
# @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
|
31
43
|
# @return [Karafka::Connection::Listener] listener instance
|
32
44
|
def initialize(subscription_group, jobs_queue, scheduler)
|
33
|
-
proc_config = ::Karafka::App.config.internal.processing
|
34
|
-
|
35
45
|
@id = SecureRandom.hex(6)
|
36
46
|
@subscription_group = subscription_group
|
37
47
|
@jobs_queue = jobs_queue
|
38
48
|
@coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
|
39
49
|
@client = Client.new(@subscription_group, -> { running? })
|
40
50
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
41
|
-
@
|
42
|
-
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
51
|
+
@partitioner = partitioner_class.new(subscription_group)
|
43
52
|
@scheduler = scheduler
|
44
53
|
@events_poller = Helpers::IntervalRunner.new { @client.events_poll }
|
45
54
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
@@ -111,7 +120,10 @@ module Karafka
|
|
111
120
|
|
112
121
|
@status.start!
|
113
122
|
|
114
|
-
async_call(
|
123
|
+
async_call(
|
124
|
+
"karafka.listener##{@subscription_group.id}",
|
125
|
+
listener_thread_priority
|
126
|
+
)
|
115
127
|
end
|
116
128
|
|
117
129
|
# Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
|
@@ -254,7 +266,7 @@ module Karafka
|
|
254
266
|
reset
|
255
267
|
|
256
268
|
# Ruby sleep is in seconds
|
257
|
-
sleep_time =
|
269
|
+
sleep_time = reset_backoff / 10_000.0
|
258
270
|
sleep(sleep_time) && retry
|
259
271
|
end
|
260
272
|
|
@@ -294,7 +306,7 @@ module Karafka
|
|
294
306
|
# here. In cases like this, we do not run a revocation job
|
295
307
|
@executors.find_all(topic, partition).each do |executor|
|
296
308
|
executor.coordinator.increment(:revoked)
|
297
|
-
jobs <<
|
309
|
+
jobs << jobs_builder.revoked(executor)
|
298
310
|
end
|
299
311
|
|
300
312
|
# We need to remove all the executors of a given topic partition that we have lost, so
|
@@ -318,7 +330,7 @@ module Karafka
|
|
318
330
|
|
319
331
|
@executors.each do |executor|
|
320
332
|
executor.coordinator.increment(:shutdown)
|
321
|
-
job =
|
333
|
+
job = jobs_builder.shutdown(executor)
|
322
334
|
jobs << job
|
323
335
|
end
|
324
336
|
|
@@ -355,7 +367,7 @@ module Karafka
|
|
355
367
|
if coordinator.topic.eofed?
|
356
368
|
@executors.find_all_or_create(topic, partition, coordinator).each do |executor|
|
357
369
|
coordinator.increment(:eofed)
|
358
|
-
eofed_jobs <<
|
370
|
+
eofed_jobs << jobs_builder.eofed(executor)
|
359
371
|
end
|
360
372
|
end
|
361
373
|
|
@@ -372,7 +384,7 @@ module Karafka
|
|
372
384
|
# Start work coordination for this topic partition
|
373
385
|
coordinator.increment(:idle)
|
374
386
|
executor = @executors.find_or_create(topic, partition, 0, coordinator)
|
375
|
-
idle_jobs <<
|
387
|
+
idle_jobs << jobs_builder.idle(executor)
|
376
388
|
|
377
389
|
next
|
378
390
|
end
|
@@ -383,7 +395,7 @@ module Karafka
|
|
383
395
|
@partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
|
384
396
|
coordinator.increment(:consume)
|
385
397
|
executor = @executors.find_or_create(topic, partition, group_id, coordinator)
|
386
|
-
consume_jobs <<
|
398
|
+
consume_jobs << jobs_builder.consume(executor, partition_messages)
|
387
399
|
end
|
388
400
|
end
|
389
401
|
|
@@ -451,7 +463,7 @@ module Karafka
|
|
451
463
|
|
452
464
|
@executors.find_all_or_create(topic_name, partition, coordinator).each do |executor|
|
453
465
|
coordinator.increment(:periodic)
|
454
|
-
jobs <<
|
466
|
+
jobs << jobs_builder.periodic(executor)
|
455
467
|
end
|
456
468
|
end
|
457
469
|
end
|
@@ -108,6 +108,7 @@ module Karafka
|
|
108
108
|
rescue Rdkafka::RdkafkaError => e
|
109
109
|
return false if e.code == :assignment_lost
|
110
110
|
return false if e.code == :state
|
111
|
+
return false if e.code == :illegal_generation
|
111
112
|
|
112
113
|
raise e
|
113
114
|
end
|
@@ -136,6 +137,8 @@ module Karafka
|
|
136
137
|
return false
|
137
138
|
when :unknown_member_id
|
138
139
|
return false
|
140
|
+
when :illegal_generation
|
141
|
+
return false
|
139
142
|
when :no_offset
|
140
143
|
return true
|
141
144
|
when :coordinator_load_in_progress
|
@@ -35,6 +35,7 @@ module Karafka
|
|
35
35
|
required(:group_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
|
36
36
|
required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
|
37
37
|
required(:strict_declarative_topics) { |val| [true, false].include?(val) }
|
38
|
+
required(:worker_thread_priority) { |val| (-3..3).to_a.include?(val) }
|
38
39
|
|
39
40
|
nested(:swarm) do
|
40
41
|
required(:nodes) { |val| val.is_a?(Integer) && val.positive? }
|
@@ -81,6 +82,7 @@ module Karafka
|
|
81
82
|
required(:manager) { |val| !val.nil? }
|
82
83
|
required(:conductor) { |val| !val.nil? }
|
83
84
|
required(:reset_backoff) { |val| val.is_a?(Integer) && val >= 1_000 }
|
85
|
+
required(:listener_thread_priority) { |val| (-3..3).to_a.include?(val) }
|
84
86
|
|
85
87
|
nested(:proxy) do
|
86
88
|
nested(:commit) do
|
@@ -114,6 +116,7 @@ module Karafka
|
|
114
116
|
required(:jobs_queue_class) { |val| !val.nil? }
|
115
117
|
required(:scheduler_class) { |val| !val.nil? }
|
116
118
|
required(:coordinator_class) { |val| !val.nil? }
|
119
|
+
required(:errors_tracker_class) { |val| val.nil? || val.is_a?(Class) }
|
117
120
|
required(:partitioner_class) { |val| !val.nil? }
|
118
121
|
required(:strategy_selector) { |val| !val.nil? }
|
119
122
|
required(:expansions_selector) { |val| !val.nil? }
|
@@ -70,7 +70,7 @@ module Karafka
|
|
70
70
|
next unless ::Karafka::App.config.strict_topics_namespacing
|
71
71
|
|
72
72
|
value = data.fetch(:name)
|
73
|
-
namespacing_chars_count = value.chars.find_all { |c| ['.', '_'].include?(c) }.uniq.
|
73
|
+
namespacing_chars_count = value.chars.find_all { |c| ['.', '_'].include?(c) }.uniq.size
|
74
74
|
|
75
75
|
next if namespacing_chars_count <= 1
|
76
76
|
|
data/lib/karafka/errors.rb
CHANGED
@@ -35,6 +35,9 @@ module Karafka
|
|
35
35
|
# Raised when given topic is not found while expected
|
36
36
|
TopicNotFoundError = Class.new(BaseError)
|
37
37
|
|
38
|
+
# Raised when given consumer group is not found while expected
|
39
|
+
ConsumerGroupNotFoundError = Class.new(BaseError)
|
40
|
+
|
38
41
|
# This should never happen. Please open an issue if it does.
|
39
42
|
UnsupportedCaseError = Class.new(BaseError)
|
40
43
|
|
@@ -64,6 +67,10 @@ module Karafka
|
|
64
67
|
# Raised when there is an attempt to run an unrecognized CLI command
|
65
68
|
UnrecognizedCommandError = Class.new(BaseError)
|
66
69
|
|
70
|
+
# Raised when you were executing a command and it could not finish successfully because of
|
71
|
+
# a setup state or parameters configuration
|
72
|
+
CommandValidationError = Class.new(BaseError)
|
73
|
+
|
67
74
|
# Raised when we attempt to perform operation that is only allowed inside of a transaction and
|
68
75
|
# there is no transaction around us
|
69
76
|
TransactionRequiredError = Class.new(BaseError)
|
@@ -71,6 +78,10 @@ module Karafka
|
|
71
78
|
# Raised in case user would want to perform nested transactions.
|
72
79
|
TransactionAlreadyInitializedError = Class.new(BaseError)
|
73
80
|
|
81
|
+
# Raised when user used transactional offset marking but after that tried to use
|
82
|
+
# non-transactional marking, effectively mixing both. This is not allowed.
|
83
|
+
NonTransactionalMarkingAttemptError = Class.new(BaseError)
|
84
|
+
|
74
85
|
# Raised in case a listener that was paused is being resumed
|
75
86
|
InvalidListenerResumeError = Class.new(BaseError)
|
76
87
|
|
@@ -37,12 +37,14 @@ module Karafka
|
|
37
37
|
|
38
38
|
# Runs the `#call` method in a new thread
|
39
39
|
# @param thread_name [String] name that we want to assign to the thread when we start it
|
40
|
-
|
40
|
+
# @param thread_priority [Integer] Ruby thread priority
|
41
|
+
def async_call(thread_name, thread_priority = 0)
|
41
42
|
MUTEX.synchronize do
|
42
43
|
return if @thread&.alive?
|
43
44
|
|
44
45
|
@thread = Thread.new do
|
45
46
|
Thread.current.name = thread_name
|
47
|
+
Thread.current.priority = thread_priority
|
46
48
|
|
47
49
|
Thread.current.abort_on_exception = true
|
48
50
|
|
@@ -12,8 +12,10 @@ module Karafka
|
|
12
12
|
|
13
13
|
# @param subscription_group [Karafka::Routes::SubscriptionGroup] subscription group for
|
14
14
|
# which we want to manage rebalances
|
15
|
-
|
15
|
+
# @param client_id [String] id of the client managing this rebalance
|
16
|
+
def initialize(subscription_group, client_id)
|
16
17
|
@subscription_group = subscription_group
|
18
|
+
@client_id = client_id
|
17
19
|
end
|
18
20
|
|
19
21
|
# Publishes an event that partitions are going to be revoked.
|
@@ -62,6 +64,7 @@ module Karafka
|
|
62
64
|
subscription_group: @subscription_group,
|
63
65
|
consumer_group_id: @subscription_group.consumer_group.id,
|
64
66
|
consumer_group: @subscription_group.consumer_group,
|
67
|
+
client_id: @client_id,
|
65
68
|
tpl: tpl
|
66
69
|
)
|
67
70
|
rescue StandardError => e
|
@@ -71,6 +74,7 @@ module Karafka
|
|
71
74
|
subscription_group_id: @subscription_group.id,
|
72
75
|
consumer_group_id: @subscription_group.consumer_group.id,
|
73
76
|
type: "callbacks.rebalance.#{name}.error",
|
77
|
+
client_id: @client_id,
|
74
78
|
error: e
|
75
79
|
)
|
76
80
|
end
|