karafka 2.0.37 → 2.0.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +1 -1
- data/.ruby-version +1 -1
- data/CHANGELOG.md +34 -0
- data/Gemfile.lock +7 -7
- data/README.md +1 -1
- data/bin/integrations +1 -1
- data/config/locales/errors.yml +0 -7
- data/config/locales/pro_errors.yml +18 -0
- data/lib/karafka/active_job/consumer.rb +22 -7
- data/lib/karafka/admin.rb +46 -14
- data/lib/karafka/base_consumer.rb +35 -55
- data/lib/karafka/connection/listener.rb +15 -10
- data/lib/karafka/errors.rb +0 -3
- data/lib/karafka/instrumentation/logger_listener.rb +44 -3
- data/lib/karafka/instrumentation/notifications.rb +7 -0
- data/lib/karafka/pro/active_job/consumer.rb +10 -5
- data/lib/karafka/pro/processing/coordinator.rb +13 -4
- data/lib/karafka/pro/processing/filters/base.rb +61 -0
- data/lib/karafka/pro/processing/filters/delayer.rb +70 -0
- data/lib/karafka/pro/processing/filters/expirer.rb +51 -0
- data/lib/karafka/pro/processing/filters/throttler.rb +84 -0
- data/lib/karafka/pro/processing/filters_applier.rb +100 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +7 -3
- data/lib/karafka/pro/processing/scheduler.rb +24 -7
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +68 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +74 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +72 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +76 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +62 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +68 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +64 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +69 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom.rb +38 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +64 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_mom.rb +38 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_mom_vp.rb +58 -0
- data/lib/karafka/pro/processing/strategies/{dlq_lrj_vp.rb → aj/lrj_mom.rb} +14 -13
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +77 -0
- data/lib/karafka/pro/processing/strategies/aj/mom.rb +36 -0
- data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +52 -0
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +131 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +61 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +75 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +74 -0
- data/lib/karafka/pro/processing/strategies/{mom.rb → dlq/ftr_lrj_vp.rb} +16 -19
- data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +73 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +39 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +63 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +66 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +38 -0
- data/lib/karafka/pro/processing/strategies/dlq/mom.rb +67 -0
- data/lib/karafka/pro/processing/strategies/dlq/vp.rb +39 -0
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +104 -0
- data/lib/karafka/pro/processing/strategies/ftr/vp.rb +40 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +85 -0
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +69 -0
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +67 -0
- data/lib/karafka/pro/processing/strategies/{vp.rb → lrj/ftr_vp.rb} +15 -13
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +78 -0
- data/lib/karafka/pro/processing/strategies/{aj_lrj_mom.rb → lrj/vp.rb} +13 -12
- data/lib/karafka/pro/processing/strategies/mom/default.rb +46 -0
- data/lib/karafka/pro/processing/strategies/mom/ftr.rb +53 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +53 -0
- data/lib/karafka/pro/processing/{strategies/lrj_vp.rb → strategies.rb} +1 -13
- data/lib/karafka/pro/processing/strategy_selector.rb +44 -18
- data/lib/karafka/pro/{processing/strategies/aj_mom.rb → routing/features/delaying/config.rb} +7 -13
- data/lib/karafka/pro/routing/features/delaying/contract.rb +38 -0
- data/lib/karafka/pro/routing/features/delaying/topic.rb +59 -0
- data/lib/karafka/pro/routing/features/delaying.rb +29 -0
- data/lib/karafka/pro/routing/features/expiring/config.rb +27 -0
- data/lib/karafka/pro/routing/features/expiring/contract.rb +38 -0
- data/lib/karafka/pro/routing/features/expiring/topic.rb +59 -0
- data/lib/karafka/pro/routing/features/expiring.rb +27 -0
- data/lib/karafka/pro/routing/features/filtering/config.rb +40 -0
- data/lib/karafka/pro/routing/features/filtering/contract.rb +41 -0
- data/lib/karafka/pro/routing/features/filtering/topic.rb +51 -0
- data/lib/karafka/pro/routing/features/filtering.rb +27 -0
- data/lib/karafka/pro/routing/features/long_running_job/contract.rb +1 -1
- data/lib/karafka/pro/routing/features/throttling/config.rb +32 -0
- data/lib/karafka/pro/routing/features/throttling/contract.rb +41 -0
- data/lib/karafka/pro/routing/features/throttling/topic.rb +69 -0
- data/lib/karafka/pro/routing/features/throttling.rb +30 -0
- data/lib/karafka/processing/coordinator.rb +60 -30
- data/lib/karafka/processing/coordinators_buffer.rb +5 -1
- data/lib/karafka/processing/executor.rb +23 -16
- data/lib/karafka/processing/executors_buffer.rb +10 -26
- data/lib/karafka/processing/jobs/consume.rb +2 -4
- data/lib/karafka/processing/jobs/idle.rb +24 -0
- data/lib/karafka/processing/jobs_builder.rb +2 -3
- data/lib/karafka/processing/result.rb +5 -0
- data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
- data/lib/karafka/processing/strategies/base.rb +5 -0
- data/lib/karafka/processing/strategies/default.rb +50 -0
- data/lib/karafka/processing/strategies/dlq.rb +13 -4
- data/lib/karafka/processing/strategies/dlq_mom.rb +8 -3
- data/lib/karafka/processing/strategy_selector.rb +27 -10
- data/lib/karafka/version.rb +1 -1
- data/renovate.json +6 -0
- data.tar.gz.sig +0 -0
- metadata +66 -22
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +0 -42
- data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom_vp.rb +0 -70
- data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +0 -62
- data/lib/karafka/pro/processing/strategies/aj_dlq_mom_vp.rb +0 -68
- data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +0 -75
- data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +0 -62
- data/lib/karafka/pro/processing/strategies/dlq.rb +0 -120
- data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +0 -65
- data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +0 -62
- data/lib/karafka/pro/processing/strategies/dlq_mom.rb +0 -62
- data/lib/karafka/pro/processing/strategies/dlq_vp.rb +0 -37
- data/lib/karafka/pro/processing/strategies/lrj.rb +0 -83
- data/lib/karafka/pro/processing/strategies/lrj_mom.rb +0 -73
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 12fe8a47dc0ab16b0f7783424cd1aa043c2d2b228b4f4164f1cecefe604269d9
|
|
4
|
+
data.tar.gz: 9fa3bae282770dd67503c41ef4b73a27a38bfcff3bf472ddd63753d14d03614f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9e6536c90a411a0b42337f73c00d9f454028366f42eabb1b7f40902181bcbcfd43258741d6fc51c6e29046b9ee1f8598755440d28a00ca96104a61a8095c20c2
|
|
7
|
+
data.tar.gz: be75dd1bfa744187f770f2e1f0deeedfba4f3fb1b824d6bab91f056f96e60a33498429e35ea22841404def0935e584db0df4289d2818631ace2e597d28785960
|
checksums.yaml.gz.sig
CHANGED
|
Binary file
|
data/.github/workflows/ci.yml
CHANGED
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
3.2.
|
|
1
|
+
3.2.2
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,39 @@
|
|
|
1
1
|
# Karafka framework changelog
|
|
2
2
|
|
|
3
|
+
## 2.0.39 (2023-04-11)
|
|
4
|
+
- **[Feature]** Provide ability to throttle/limit number of messages processed in a time unit (#1203)
|
|
5
|
+
- **[Feature]** Provide Delayed Topics (#1000)
|
|
6
|
+
- **[Feature]** Provide ability to expire messages (expiring topics)
|
|
7
|
+
- **[Feature]** Provide ability to apply filters after messages are polled and before enqueued. This is a generic filter API for any usage.
|
|
8
|
+
- [Improvement] When using ActiveJob with Virtual Partitions, Karafka will stop if collectively VPs are failing. This minimizes number of jobs that will be collectively re-processed.
|
|
9
|
+
- [Improvement] `#retrying?` method has been added to consumers to provide ability to check, that we're reprocessing data after a failure. This is useful for branching out processing based on errors.
|
|
10
|
+
- [Improvement] Track active_job_id in instrumentation (#1372)
|
|
11
|
+
- [Improvement] Introduce new housekeeping job type called `Idle` for non-consumption execution flows.
|
|
12
|
+
- [Improvement] Change how a manual offset management works with Long-Running Jobs. Use the last message offset to move forward instead of relying on the last message marked as consumed for a scenario where no message is marked.
|
|
13
|
+
- [Improvement] Prioritize in Pro non-consumption jobs execution over consumption despite LJF. This will ensure, that housekeeping as well as other non-consumption events are not saturated when running a lot of work.
|
|
14
|
+
- [Improvement] Normalize the DLQ behaviour with MoM. Always pause on dispatch for all the strategies.
|
|
15
|
+
- [Improvement] Improve the manual offset management and DLQ behaviour when no markings occur for OSS.
|
|
16
|
+
- [Improvement] Do not early stop ActiveJob work running under virtual partitions to prevent extensive reprocessing.
|
|
17
|
+
- [Improvement] Drastically increase number of scenarios covered by integration specs (OSS and Pro).
|
|
18
|
+
- [Improvement] Introduce a `Coordinator#synchronize` lock for cross virtual partitions operations.
|
|
19
|
+
- [Fix] Do not resume partition that is not paused.
|
|
20
|
+
- [Fix] Fix `LoggerListener` cases where logs would not include caller id (when available)
|
|
21
|
+
- [Fix] Fix not working benchmark tests.
|
|
22
|
+
- [Fix] Fix a case where when using manual offset management with a user pause would ignore the pause and seek to the next message.
|
|
23
|
+
- [Fix] Fix a case where dead letter queue would go into an infinite loop on message with first ever offset if the first ever offset would not recover.
|
|
24
|
+
- [Fix] Make sure to resume always for all LRJ strategies on revocation.
|
|
25
|
+
- [Refactor] Make sure that coordinator is topic aware. Needed for throttling, delayed processing and expired jobs.
|
|
26
|
+
- [Refactor] Put Pro strategies into namespaces to better organize multiple combinations.
|
|
27
|
+
- [Refactor] Do not rely on messages metadata for internal topic and partition operations like `#seek` so they can run independently from the consumption flow.
|
|
28
|
+
- [Refactor] Hold a single topic/partition reference on a coordinator instead of in executor, coordinator and consumer.
|
|
29
|
+
- [Refactor] Move `#mark_as_consumed` and `#mark_as_consumed!`into `Strategies::Default` to be able to introduce marking for virtual partitions.
|
|
30
|
+
|
|
31
|
+
## 2.0.38 (2023-03-27)
|
|
32
|
+
- [Improvement] Introduce `Karafka::Admin#read_watermark_offsets` to get low and high watermark offsets values.
|
|
33
|
+
- [Improvement] Track active_job_id in instrumentation (#1372)
|
|
34
|
+
- [Improvement] Improve `#read_topic` reading in case of a compacted partition where the offset is below the low watermark offset. This should optimize reading and should not go beyond the low watermark offset.
|
|
35
|
+
- [Improvement] Allow `#read_topic` to accept instance settings to overwrite any settings needed to customize reading behaviours.
|
|
36
|
+
|
|
3
37
|
## 2.0.37 (2023-03-20)
|
|
4
38
|
- [Fix] Declarative topics execution on a secondary cluster run topics creation on the primary one (#1365)
|
|
5
39
|
- [Fix] Admin read operations commit offset when not needed (#1369)
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
karafka (2.0.
|
|
4
|
+
karafka (2.0.39)
|
|
5
5
|
karafka-core (>= 2.0.12, < 3.0.0)
|
|
6
6
|
thor (>= 0.20)
|
|
7
7
|
waterdrop (>= 2.4.10, < 3.0.0)
|
|
@@ -10,10 +10,10 @@ PATH
|
|
|
10
10
|
GEM
|
|
11
11
|
remote: https://rubygems.org/
|
|
12
12
|
specs:
|
|
13
|
-
activejob (7.0.4.
|
|
14
|
-
activesupport (= 7.0.4.
|
|
13
|
+
activejob (7.0.4.3)
|
|
14
|
+
activesupport (= 7.0.4.3)
|
|
15
15
|
globalid (>= 0.3.6)
|
|
16
|
-
activesupport (7.0.4.
|
|
16
|
+
activesupport (7.0.4.3)
|
|
17
17
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
18
18
|
i18n (>= 1.6, < 2)
|
|
19
19
|
minitest (>= 5.1)
|
|
@@ -48,7 +48,7 @@ GEM
|
|
|
48
48
|
rspec-expectations (3.12.2)
|
|
49
49
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
50
50
|
rspec-support (~> 3.12.0)
|
|
51
|
-
rspec-mocks (3.12.
|
|
51
|
+
rspec-mocks (3.12.5)
|
|
52
52
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
53
53
|
rspec-support (~> 3.12.0)
|
|
54
54
|
rspec-support (3.12.0)
|
|
@@ -61,7 +61,7 @@ GEM
|
|
|
61
61
|
thor (1.2.1)
|
|
62
62
|
tzinfo (2.0.6)
|
|
63
63
|
concurrent-ruby (~> 1.0)
|
|
64
|
-
waterdrop (2.5.
|
|
64
|
+
waterdrop (2.5.1)
|
|
65
65
|
karafka-core (>= 2.0.12, < 3.0.0)
|
|
66
66
|
zeitwerk (~> 2.3)
|
|
67
67
|
zeitwerk (2.6.7)
|
|
@@ -79,4 +79,4 @@ DEPENDENCIES
|
|
|
79
79
|
simplecov
|
|
80
80
|
|
|
81
81
|
BUNDLED WITH
|
|
82
|
-
2.4.
|
|
82
|
+
2.4.10
|
data/README.md
CHANGED
|
@@ -86,7 +86,7 @@ bundle exec karafka server
|
|
|
86
86
|
|
|
87
87
|
I also sell Karafka Pro subscriptions. It includes a commercial-friendly license, priority support, architecture consultations, enhanced Web UI and high throughput data processing-related features (virtual partitions, long-running jobs, and more).
|
|
88
88
|
|
|
89
|
-
**
|
|
89
|
+
**10%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
|
|
90
90
|
|
|
91
91
|
Help me provide high-quality open-source software. Please see the Karafka [homepage](https://karafka.io/#become-pro) for more details.
|
|
92
92
|
|
data/bin/integrations
CHANGED
|
@@ -25,7 +25,7 @@ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../
|
|
|
25
25
|
# we limit it. Locally we can run a lot of those, as many of them have sleeps and do not use a lot
|
|
26
26
|
# of CPU. Locally we also cannot go beyond certain limit due to how often and how many topics we
|
|
27
27
|
# create in Kafka. With an overloaded system, we start getting timeouts.
|
|
28
|
-
CONCURRENCY = ENV.key?('CI') ?
|
|
28
|
+
CONCURRENCY = ENV.key?('CI') ? 5 : Etc.nprocessors * 3
|
|
29
29
|
|
|
30
30
|
# How may bytes do we want to keep from the stdout in the buffer for when we need to print it
|
|
31
31
|
MAX_BUFFER_OUTPUT = 51_200
|
data/config/locales/errors.yml
CHANGED
|
@@ -72,10 +72,3 @@ en:
|
|
|
72
72
|
test:
|
|
73
73
|
missing: needs to be present
|
|
74
74
|
id_format: needs to be a String
|
|
75
|
-
|
|
76
|
-
pro_topic:
|
|
77
|
-
virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
|
|
78
|
-
virtual_partitions.max_partitions_format: needs to be equal or more than 1
|
|
79
|
-
manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
|
|
80
|
-
long_running_job.active_format: needs to be either true or false
|
|
81
|
-
dead_letter_queue_not_with_virtual_partitions: cannot be used together with Virtual Partitions
|
|
@@ -3,10 +3,28 @@ en:
|
|
|
3
3
|
topic:
|
|
4
4
|
virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
|
|
5
5
|
virtual_partitions.max_partitions_format: needs to be equal or more than 1
|
|
6
|
+
|
|
6
7
|
manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
|
|
8
|
+
|
|
7
9
|
long_running_job.active_format: needs to be either true or false
|
|
10
|
+
|
|
8
11
|
dead_letter_queue_with_virtual_partitions: when using Dead Letter Queue with Virtual Partitions, at least one retry is required.
|
|
9
12
|
|
|
13
|
+
throttling.active_format: needs to be either true or false
|
|
14
|
+
throttling.limit_format: needs to be equal or more than 1
|
|
15
|
+
throttling.interval_format: needs to be equal or more than 1
|
|
16
|
+
|
|
17
|
+
filtering.active_missing: needs to be present
|
|
18
|
+
filtering.factory_format: 'needs to respond to #call'
|
|
19
|
+
filtering.factories_format: 'needs to contain only factories responding to #call'
|
|
20
|
+
filtering.active_format: 'needs to be boolean'
|
|
21
|
+
|
|
22
|
+
expiring.ttl_format: 'needs to be equal or more than 0 and an integer'
|
|
23
|
+
expiring.active_format: 'needs to be boolean'
|
|
24
|
+
|
|
25
|
+
delaying.delay_format: 'needs to be equal or more than 0 and an integer'
|
|
26
|
+
delaying.active_format: 'needs to be boolean'
|
|
27
|
+
|
|
10
28
|
config:
|
|
11
29
|
encryption.active_format: 'needs to be either true or false'
|
|
12
30
|
encryption.public_key_invalid: 'is not a valid public RSA key'
|
|
@@ -12,16 +12,31 @@ module Karafka
|
|
|
12
12
|
messages.each do |message|
|
|
13
13
|
break if Karafka::App.stopping?
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
# message instead of using the `#raw_payload`. This is not done on purpose to simplify
|
|
17
|
-
# the ActiveJob setup here
|
|
18
|
-
job = ::ActiveSupport::JSON.decode(message.raw_payload)
|
|
15
|
+
consume_job(message)
|
|
19
16
|
|
|
20
|
-
|
|
17
|
+
mark_as_consumed(message)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
21
20
|
|
|
22
|
-
|
|
21
|
+
private
|
|
23
22
|
|
|
24
|
-
|
|
23
|
+
# Consumes a message with the job and runs needed instrumentation
|
|
24
|
+
#
|
|
25
|
+
# @param job_message [Karafka::Messages::Message] message with active job
|
|
26
|
+
def consume_job(job_message)
|
|
27
|
+
# We technically speaking could set this as deserializer and reference it from the
|
|
28
|
+
# message instead of using the `#raw_payload`. This is not done on purpose to simplify
|
|
29
|
+
# the ActiveJob setup here
|
|
30
|
+
job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
|
|
31
|
+
|
|
32
|
+
tags.add(:job_class, job['job_class'])
|
|
33
|
+
|
|
34
|
+
payload = { caller: self, job: job, message: job_message }
|
|
35
|
+
|
|
36
|
+
# We publish both to make it consistent with `consumer.x` events
|
|
37
|
+
Karafka.monitor.instrument('active_job.consume', payload)
|
|
38
|
+
Karafka.monitor.instrument('active_job.consumed', payload) do
|
|
39
|
+
::ActiveJob::Base.execute(job)
|
|
25
40
|
end
|
|
26
41
|
end
|
|
27
42
|
end
|
data/lib/karafka/admin.rb
CHANGED
|
@@ -44,17 +44,32 @@ module Karafka
|
|
|
44
44
|
# @param count [Integer] how many messages we want to get at most
|
|
45
45
|
# @param start_offset [Integer] offset from which we should start. If -1 is provided
|
|
46
46
|
# (default) we will start from the latest offset
|
|
47
|
+
# @param settings [Hash] kafka extra settings (optional)
|
|
47
48
|
#
|
|
48
49
|
# @return [Array<Karafka::Messages::Message>] array with messages
|
|
49
|
-
def read_topic(name, partition, count, start_offset = -1)
|
|
50
|
+
def read_topic(name, partition, count, start_offset = -1, settings = {})
|
|
50
51
|
messages = []
|
|
51
52
|
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
|
53
|
+
low_offset, high_offset = nil
|
|
52
54
|
|
|
53
|
-
with_consumer do |consumer|
|
|
54
|
-
|
|
55
|
-
|
|
55
|
+
with_consumer(settings) do |consumer|
|
|
56
|
+
low_offset, high_offset = consumer.query_watermark_offsets(name, partition)
|
|
57
|
+
|
|
58
|
+
# Select offset dynamically if -1 or less
|
|
59
|
+
start_offset = high_offset - count if start_offset.negative?
|
|
56
60
|
|
|
57
|
-
|
|
61
|
+
# Build the requested range - since first element is on the start offset we need to
|
|
62
|
+
# subtract one from requested count to end up with expected number of elements
|
|
63
|
+
requested_range = (start_offset..start_offset + (count - 1))
|
|
64
|
+
# Establish theoretical available range. Note, that this does not handle cases related to
|
|
65
|
+
# log retention or compaction
|
|
66
|
+
available_range = (low_offset..high_offset)
|
|
67
|
+
# Select only offset that we can select. This will remove all the potential offsets that
|
|
68
|
+
# are below the low watermark offset
|
|
69
|
+
possible_range = requested_range.select { |offset| available_range.include?(offset) }
|
|
70
|
+
|
|
71
|
+
start_offset = possible_range.first
|
|
72
|
+
count = possible_range.count
|
|
58
73
|
|
|
59
74
|
tpl.add_topic_and_partitions_with_offsets(name, partition => start_offset)
|
|
60
75
|
consumer.assign(tpl)
|
|
@@ -64,11 +79,15 @@ module Karafka
|
|
|
64
79
|
loop do
|
|
65
80
|
# If we've got as many messages as we've wanted stop
|
|
66
81
|
break if messages.size >= count
|
|
67
|
-
# If we've reached end of the topic messages, don't process more
|
|
68
|
-
break if !messages.empty? && end_offset <= messages.last.offset
|
|
69
82
|
|
|
70
83
|
message = consumer.poll(200)
|
|
71
|
-
|
|
84
|
+
|
|
85
|
+
next unless message
|
|
86
|
+
|
|
87
|
+
# If the message we've got is beyond the requested range, stop
|
|
88
|
+
break unless possible_range.include?(message.offset)
|
|
89
|
+
|
|
90
|
+
messages << message
|
|
72
91
|
rescue Rdkafka::RdkafkaError => e
|
|
73
92
|
# End of partition
|
|
74
93
|
break if e.code == :partition_eof
|
|
@@ -77,7 +96,7 @@ module Karafka
|
|
|
77
96
|
end
|
|
78
97
|
end
|
|
79
98
|
|
|
80
|
-
messages.map do |message|
|
|
99
|
+
messages.map! do |message|
|
|
81
100
|
Messages::Builders::Message.call(
|
|
82
101
|
message,
|
|
83
102
|
# Use topic from routes if we can match it or create a dummy one
|
|
@@ -136,6 +155,17 @@ module Karafka
|
|
|
136
155
|
end
|
|
137
156
|
end
|
|
138
157
|
|
|
158
|
+
# Fetches the watermark offsets for a given topic partition
|
|
159
|
+
#
|
|
160
|
+
# @param name [String, Symbol] topic name
|
|
161
|
+
# @param partition [Integer] partition
|
|
162
|
+
# @return [Array<Integer, Integer>] low watermark offset and high watermark offset
|
|
163
|
+
def read_watermark_offsets(name, partition)
|
|
164
|
+
with_consumer do |consumer|
|
|
165
|
+
consumer.query_watermark_offsets(name, partition)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
139
169
|
# @return [Rdkafka::Metadata] cluster metadata info
|
|
140
170
|
def cluster_info
|
|
141
171
|
with_admin do |admin|
|
|
@@ -159,15 +189,16 @@ module Karafka
|
|
|
159
189
|
|
|
160
190
|
# Creates admin instance and yields it. After usage it closes the admin instance
|
|
161
191
|
def with_admin
|
|
162
|
-
admin = config(:producer).admin
|
|
192
|
+
admin = config(:producer, {}).admin
|
|
163
193
|
yield(admin)
|
|
164
194
|
ensure
|
|
165
195
|
admin&.close
|
|
166
196
|
end
|
|
167
197
|
|
|
168
198
|
# Creates consumer instance and yields it. After usage it closes the consumer instance
|
|
169
|
-
|
|
170
|
-
|
|
199
|
+
# @param settings [Hash] extra settings to customize consumer
|
|
200
|
+
def with_consumer(settings = {})
|
|
201
|
+
consumer = config(:consumer, settings).consumer
|
|
171
202
|
yield(consumer)
|
|
172
203
|
ensure
|
|
173
204
|
consumer&.close
|
|
@@ -196,11 +227,12 @@ module Karafka
|
|
|
196
227
|
end
|
|
197
228
|
|
|
198
229
|
# @param type [Symbol] type of config we want
|
|
230
|
+
# @param settings [Hash] extra settings for config (if needed)
|
|
199
231
|
# @return [::Rdkafka::Config] rdkafka config
|
|
200
|
-
def config(type)
|
|
232
|
+
def config(type, settings)
|
|
201
233
|
config_hash = Karafka::Setup::AttributesMap.public_send(
|
|
202
234
|
type,
|
|
203
|
-
Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS)
|
|
235
|
+
Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS).merge!(settings)
|
|
204
236
|
)
|
|
205
237
|
|
|
206
238
|
::Rdkafka::Config.new(config_hash)
|
|
@@ -7,11 +7,13 @@ module Karafka
|
|
|
7
7
|
# Allow for consumer instance tagging for instrumentation
|
|
8
8
|
include ::Karafka::Core::Taggable
|
|
9
9
|
|
|
10
|
+
extend Forwardable
|
|
11
|
+
|
|
12
|
+
def_delegators :@coordinator, :topic, :partition
|
|
13
|
+
|
|
10
14
|
# @return [String] id of the current consumer
|
|
11
15
|
attr_reader :id
|
|
12
16
|
# @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
|
|
13
|
-
attr_accessor :topic
|
|
14
|
-
# @return [Karafka::Messages::Messages] current messages batch
|
|
15
17
|
attr_accessor :messages
|
|
16
18
|
# @return [Karafka::Connection::Client] kafka connection client
|
|
17
19
|
attr_accessor :client
|
|
@@ -97,6 +99,20 @@ module Karafka
|
|
|
97
99
|
)
|
|
98
100
|
end
|
|
99
101
|
|
|
102
|
+
# Trigger method for running on idle runs without messages
|
|
103
|
+
#
|
|
104
|
+
# @private
|
|
105
|
+
def on_idle
|
|
106
|
+
handle_idle
|
|
107
|
+
rescue StandardError => e
|
|
108
|
+
Karafka.monitor.instrument(
|
|
109
|
+
'error.occurred',
|
|
110
|
+
error: e,
|
|
111
|
+
caller: self,
|
|
112
|
+
type: 'consumer.idle.error'
|
|
113
|
+
)
|
|
114
|
+
end
|
|
115
|
+
|
|
100
116
|
# Trigger method for running on partition revocation.
|
|
101
117
|
#
|
|
102
118
|
# @private
|
|
@@ -143,51 +159,6 @@ module Karafka
|
|
|
143
159
|
# some teardown procedures (closing file handler, etc).
|
|
144
160
|
def shutdown; end
|
|
145
161
|
|
|
146
|
-
# Marks message as consumed in an async way.
|
|
147
|
-
#
|
|
148
|
-
# @param message [Messages::Message] last successfully processed message.
|
|
149
|
-
# @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
|
|
150
|
-
# that we were not able and that we have lost the partition.
|
|
151
|
-
#
|
|
152
|
-
# @note We keep track of this offset in case we would mark as consumed and got error when
|
|
153
|
-
# processing another message. In case like this we do not pause on the message we've already
|
|
154
|
-
# processed but rather at the next one. This applies to both sync and async versions of this
|
|
155
|
-
# method.
|
|
156
|
-
def mark_as_consumed(message)
|
|
157
|
-
# Ignore earlier offsets than the one we alread committed
|
|
158
|
-
return true if coordinator.seek_offset > message.offset
|
|
159
|
-
|
|
160
|
-
unless client.mark_as_consumed(message)
|
|
161
|
-
coordinator.revoke
|
|
162
|
-
|
|
163
|
-
return false
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
coordinator.seek_offset = message.offset + 1
|
|
167
|
-
|
|
168
|
-
true
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
# Marks message as consumed in a sync way.
|
|
172
|
-
#
|
|
173
|
-
# @param message [Messages::Message] last successfully processed message.
|
|
174
|
-
# @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
|
|
175
|
-
# that we were not able and that we have lost the partition.
|
|
176
|
-
def mark_as_consumed!(message)
|
|
177
|
-
# Ignore earlier offsets than the one we alread committed
|
|
178
|
-
return true if coordinator.seek_offset > message.offset
|
|
179
|
-
|
|
180
|
-
unless client.mark_as_consumed!(message)
|
|
181
|
-
coordinator.revoke
|
|
182
|
-
|
|
183
|
-
return false
|
|
184
|
-
end
|
|
185
|
-
|
|
186
|
-
coordinator.seek_offset = message.offset + 1
|
|
187
|
-
|
|
188
|
-
true
|
|
189
|
-
end
|
|
190
|
-
|
|
191
162
|
# Pauses processing on a given offset for the current topic partition
|
|
192
163
|
#
|
|
193
164
|
# After given partition is resumed, it will continue processing from the given offset
|
|
@@ -201,8 +172,8 @@ module Karafka
|
|
|
201
172
|
timeout ? coordinator.pause_tracker.pause(timeout) : coordinator.pause_tracker.pause
|
|
202
173
|
|
|
203
174
|
client.pause(
|
|
204
|
-
|
|
205
|
-
|
|
175
|
+
topic.name,
|
|
176
|
+
partition,
|
|
206
177
|
offset
|
|
207
178
|
)
|
|
208
179
|
|
|
@@ -213,8 +184,8 @@ module Karafka
|
|
|
213
184
|
'consumer.consuming.pause',
|
|
214
185
|
caller: self,
|
|
215
186
|
manual: manual_pause,
|
|
216
|
-
topic:
|
|
217
|
-
partition:
|
|
187
|
+
topic: topic.name,
|
|
188
|
+
partition: partition,
|
|
218
189
|
offset: offset,
|
|
219
190
|
timeout: coordinator.pause_tracker.current_timeout,
|
|
220
191
|
attempt: coordinator.pause_tracker.attempt
|
|
@@ -223,6 +194,8 @@ module Karafka
|
|
|
223
194
|
|
|
224
195
|
# Resumes processing of the current topic partition
|
|
225
196
|
def resume
|
|
197
|
+
return unless coordinator.pause_tracker.paused?
|
|
198
|
+
|
|
226
199
|
# This is sufficient to expire a partition pause, as with it will be resumed by the listener
|
|
227
200
|
# thread before the next poll.
|
|
228
201
|
coordinator.pause_tracker.expire
|
|
@@ -234,8 +207,8 @@ module Karafka
|
|
|
234
207
|
def seek(offset)
|
|
235
208
|
client.seek(
|
|
236
209
|
Karafka::Messages::Seek.new(
|
|
237
|
-
|
|
238
|
-
|
|
210
|
+
topic.name,
|
|
211
|
+
partition,
|
|
239
212
|
offset
|
|
240
213
|
)
|
|
241
214
|
)
|
|
@@ -248,6 +221,13 @@ module Karafka
|
|
|
248
221
|
coordinator.revoked?
|
|
249
222
|
end
|
|
250
223
|
|
|
224
|
+
# @return [Boolean] are we retrying processing after an error. This can be used to provide a
|
|
225
|
+
# different flow after there is an error, for example for resources cleanup, small manual
|
|
226
|
+
# backoff or different instrumentation tracking.
|
|
227
|
+
def retrying?
|
|
228
|
+
coordinator.pause_tracker.attempt.positive?
|
|
229
|
+
end
|
|
230
|
+
|
|
251
231
|
# Pauses the processing from the last offset to retry on given message
|
|
252
232
|
# @private
|
|
253
233
|
def retry_after_pause
|
|
@@ -258,8 +238,8 @@ module Karafka
|
|
|
258
238
|
Karafka.monitor.instrument(
|
|
259
239
|
'consumer.consuming.retry',
|
|
260
240
|
caller: self,
|
|
261
|
-
topic:
|
|
262
|
-
partition:
|
|
241
|
+
topic: topic.name,
|
|
242
|
+
partition: partition,
|
|
263
243
|
offset: coordinator.seek_offset,
|
|
264
244
|
timeout: coordinator.pause_tracker.current_timeout,
|
|
265
245
|
attempt: coordinator.pause_tracker.attempt
|
|
@@ -25,7 +25,7 @@ module Karafka
|
|
|
25
25
|
@consumer_group_coordinator = consumer_group_coordinator
|
|
26
26
|
@subscription_group = subscription_group
|
|
27
27
|
@jobs_queue = jobs_queue
|
|
28
|
-
@coordinators = Processing::CoordinatorsBuffer.new
|
|
28
|
+
@coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
|
|
29
29
|
@client = Client.new(@subscription_group)
|
|
30
30
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
|
31
31
|
@jobs_builder = proc_config.jobs_builder
|
|
@@ -234,7 +234,7 @@ module Karafka
|
|
|
234
234
|
def build_and_schedule_shutdown_jobs
|
|
235
235
|
jobs = []
|
|
236
236
|
|
|
237
|
-
@executors.each do |
|
|
237
|
+
@executors.each do |executor|
|
|
238
238
|
job = @jobs_builder.shutdown(executor)
|
|
239
239
|
job.before_enqueue
|
|
240
240
|
jobs << job
|
|
@@ -263,20 +263,25 @@ module Karafka
|
|
|
263
263
|
|
|
264
264
|
@messages_buffer.each do |topic, partition, messages|
|
|
265
265
|
coordinator = @coordinators.find_or_create(topic, partition)
|
|
266
|
-
|
|
267
266
|
# Start work coordination for this topic partition
|
|
268
267
|
coordinator.start(messages)
|
|
269
268
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
executor = @executors.find_or_create(topic, partition,
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
269
|
+
# We do not increment coordinator for idle job because it's not a user related one
|
|
270
|
+
# and it will not go through a standard lifecycle. Same applies to revoked and shutdown
|
|
271
|
+
if messages.empty?
|
|
272
|
+
executor = @executors.find_or_create(topic, partition, 0, coordinator)
|
|
273
|
+
jobs << @jobs_builder.idle(executor)
|
|
274
|
+
else
|
|
275
|
+
@partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
|
|
276
|
+
executor = @executors.find_or_create(topic, partition, group_id, coordinator)
|
|
277
|
+
coordinator.increment
|
|
278
|
+
jobs << @jobs_builder.consume(executor, partition_messages)
|
|
279
|
+
end
|
|
277
280
|
end
|
|
278
281
|
end
|
|
279
282
|
|
|
283
|
+
jobs.each(&:before_enqueue)
|
|
284
|
+
|
|
280
285
|
@scheduler.schedule_consumption(@jobs_queue, jobs)
|
|
281
286
|
end
|
|
282
287
|
|
data/lib/karafka/errors.rb
CHANGED
|
@@ -46,8 +46,5 @@ module Karafka
|
|
|
46
46
|
|
|
47
47
|
# This should never happen. Please open an issue if it does.
|
|
48
48
|
StrategyNotFoundError = Class.new(BaseError)
|
|
49
|
-
|
|
50
|
-
# This should never happen. Please open an issue if it does.
|
|
51
|
-
SkipMessageNotFoundError = Class.new(BaseError)
|
|
52
49
|
end
|
|
53
50
|
end
|
|
@@ -170,13 +170,51 @@ module Karafka
|
|
|
170
170
|
#
|
|
171
171
|
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
|
172
172
|
def on_dead_letter_queue_dispatched(event)
|
|
173
|
+
consumer = event[:caller]
|
|
174
|
+
topic = consumer.topic.name
|
|
173
175
|
message = event[:message]
|
|
174
176
|
offset = message.offset
|
|
175
|
-
|
|
176
|
-
dlq_topic = event[:caller].topic.dead_letter_queue.topic
|
|
177
|
+
dlq_topic = consumer.topic.dead_letter_queue.topic
|
|
177
178
|
partition = message.partition
|
|
178
179
|
|
|
179
|
-
info "
|
|
180
|
+
info <<~MSG.tr("\n", ' ').strip!
|
|
181
|
+
[#{consumer.id}] Dispatched message #{offset}
|
|
182
|
+
from #{topic}/#{partition}
|
|
183
|
+
to DLQ topic: #{dlq_topic}
|
|
184
|
+
MSG
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Logs info about throttling event
|
|
188
|
+
#
|
|
189
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
|
190
|
+
def on_filtering_throttled(event)
|
|
191
|
+
consumer = event[:caller]
|
|
192
|
+
topic = consumer.topic.name
|
|
193
|
+
# Here we get last message before throttle
|
|
194
|
+
message = event[:message]
|
|
195
|
+
partition = message.partition
|
|
196
|
+
offset = message.offset
|
|
197
|
+
|
|
198
|
+
info <<~MSG.tr("\n", ' ').strip!
|
|
199
|
+
[#{consumer.id}] Throttled and will resume
|
|
200
|
+
from message #{offset}
|
|
201
|
+
on #{topic}/#{partition}
|
|
202
|
+
MSG
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
|
206
|
+
def on_filtering_seek(event)
|
|
207
|
+
consumer = event[:caller]
|
|
208
|
+
topic = consumer.topic.name
|
|
209
|
+
# Message to which we seek
|
|
210
|
+
message = event[:message]
|
|
211
|
+
partition = message.partition
|
|
212
|
+
offset = message.offset
|
|
213
|
+
|
|
214
|
+
info <<~MSG.tr("\n", ' ').strip!
|
|
215
|
+
[#{consumer.id}] Post-filtering seeking to message #{offset}
|
|
216
|
+
on #{topic}/#{partition}
|
|
217
|
+
MSG
|
|
180
218
|
end
|
|
181
219
|
|
|
182
220
|
# There are many types of errors that can occur in many places, but we provide a single
|
|
@@ -203,6 +241,9 @@ module Karafka
|
|
|
203
241
|
when 'consumer.after_consume.error'
|
|
204
242
|
error "Consumer after consume failed due to an error: #{error}"
|
|
205
243
|
error details
|
|
244
|
+
when 'consumer.idle.error'
|
|
245
|
+
error "Consumer idle failed due to an error: #{error}"
|
|
246
|
+
error details
|
|
206
247
|
when 'consumer.shutdown.error'
|
|
207
248
|
error "Consumer on shutdown failed due to an error: #{error}"
|
|
208
249
|
error details
|
|
@@ -17,6 +17,9 @@ module Karafka
|
|
|
17
17
|
# complete list of all the events. Please use the #available_events on fully loaded
|
|
18
18
|
# Karafka system to determine all of the events you can use.
|
|
19
19
|
EVENTS = %w[
|
|
20
|
+
active_job.consume
|
|
21
|
+
active_job.consumed
|
|
22
|
+
|
|
20
23
|
app.initialized
|
|
21
24
|
app.running
|
|
22
25
|
app.quieting
|
|
@@ -36,6 +39,7 @@ module Karafka
|
|
|
36
39
|
consumer.consumed
|
|
37
40
|
consumer.consuming.pause
|
|
38
41
|
consumer.consuming.retry
|
|
42
|
+
consumer.idle
|
|
39
43
|
consumer.revoke
|
|
40
44
|
consumer.revoked
|
|
41
45
|
consumer.shutting_down
|
|
@@ -43,6 +47,9 @@ module Karafka
|
|
|
43
47
|
|
|
44
48
|
dead_letter_queue.dispatched
|
|
45
49
|
|
|
50
|
+
filtering.throttled
|
|
51
|
+
filtering.seek
|
|
52
|
+
|
|
46
53
|
process.notice_signal
|
|
47
54
|
|
|
48
55
|
statistics.emitted
|