karafka 2.0.37 → 2.0.39
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +1 -1
- data/.ruby-version +1 -1
- data/CHANGELOG.md +34 -0
- data/Gemfile.lock +7 -7
- data/README.md +1 -1
- data/bin/integrations +1 -1
- data/config/locales/errors.yml +0 -7
- data/config/locales/pro_errors.yml +18 -0
- data/lib/karafka/active_job/consumer.rb +22 -7
- data/lib/karafka/admin.rb +46 -14
- data/lib/karafka/base_consumer.rb +35 -55
- data/lib/karafka/connection/listener.rb +15 -10
- data/lib/karafka/errors.rb +0 -3
- data/lib/karafka/instrumentation/logger_listener.rb +44 -3
- data/lib/karafka/instrumentation/notifications.rb +7 -0
- data/lib/karafka/pro/active_job/consumer.rb +10 -5
- data/lib/karafka/pro/processing/coordinator.rb +13 -4
- data/lib/karafka/pro/processing/filters/base.rb +61 -0
- data/lib/karafka/pro/processing/filters/delayer.rb +70 -0
- data/lib/karafka/pro/processing/filters/expirer.rb +51 -0
- data/lib/karafka/pro/processing/filters/throttler.rb +84 -0
- data/lib/karafka/pro/processing/filters_applier.rb +100 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +7 -3
- data/lib/karafka/pro/processing/scheduler.rb +24 -7
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +68 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +74 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +72 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +76 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +62 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +68 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +64 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +69 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom.rb +38 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +64 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_mom.rb +38 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_mom_vp.rb +58 -0
- data/lib/karafka/pro/processing/strategies/{dlq_lrj_vp.rb → aj/lrj_mom.rb} +14 -13
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +77 -0
- data/lib/karafka/pro/processing/strategies/aj/mom.rb +36 -0
- data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +52 -0
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +131 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +61 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +75 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +74 -0
- data/lib/karafka/pro/processing/strategies/{mom.rb → dlq/ftr_lrj_vp.rb} +16 -19
- data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +73 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +39 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +63 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +66 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +38 -0
- data/lib/karafka/pro/processing/strategies/dlq/mom.rb +67 -0
- data/lib/karafka/pro/processing/strategies/dlq/vp.rb +39 -0
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +104 -0
- data/lib/karafka/pro/processing/strategies/ftr/vp.rb +40 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +85 -0
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +69 -0
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +67 -0
- data/lib/karafka/pro/processing/strategies/{vp.rb → lrj/ftr_vp.rb} +15 -13
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +78 -0
- data/lib/karafka/pro/processing/strategies/{aj_lrj_mom.rb → lrj/vp.rb} +13 -12
- data/lib/karafka/pro/processing/strategies/mom/default.rb +46 -0
- data/lib/karafka/pro/processing/strategies/mom/ftr.rb +53 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +53 -0
- data/lib/karafka/pro/processing/{strategies/lrj_vp.rb → strategies.rb} +1 -13
- data/lib/karafka/pro/processing/strategy_selector.rb +44 -18
- data/lib/karafka/pro/{processing/strategies/aj_mom.rb → routing/features/delaying/config.rb} +7 -13
- data/lib/karafka/pro/routing/features/delaying/contract.rb +38 -0
- data/lib/karafka/pro/routing/features/delaying/topic.rb +59 -0
- data/lib/karafka/pro/routing/features/delaying.rb +29 -0
- data/lib/karafka/pro/routing/features/expiring/config.rb +27 -0
- data/lib/karafka/pro/routing/features/expiring/contract.rb +38 -0
- data/lib/karafka/pro/routing/features/expiring/topic.rb +59 -0
- data/lib/karafka/pro/routing/features/expiring.rb +27 -0
- data/lib/karafka/pro/routing/features/filtering/config.rb +40 -0
- data/lib/karafka/pro/routing/features/filtering/contract.rb +41 -0
- data/lib/karafka/pro/routing/features/filtering/topic.rb +51 -0
- data/lib/karafka/pro/routing/features/filtering.rb +27 -0
- data/lib/karafka/pro/routing/features/long_running_job/contract.rb +1 -1
- data/lib/karafka/pro/routing/features/throttling/config.rb +32 -0
- data/lib/karafka/pro/routing/features/throttling/contract.rb +41 -0
- data/lib/karafka/pro/routing/features/throttling/topic.rb +69 -0
- data/lib/karafka/pro/routing/features/throttling.rb +30 -0
- data/lib/karafka/processing/coordinator.rb +60 -30
- data/lib/karafka/processing/coordinators_buffer.rb +5 -1
- data/lib/karafka/processing/executor.rb +23 -16
- data/lib/karafka/processing/executors_buffer.rb +10 -26
- data/lib/karafka/processing/jobs/consume.rb +2 -4
- data/lib/karafka/processing/jobs/idle.rb +24 -0
- data/lib/karafka/processing/jobs_builder.rb +2 -3
- data/lib/karafka/processing/result.rb +5 -0
- data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
- data/lib/karafka/processing/strategies/base.rb +5 -0
- data/lib/karafka/processing/strategies/default.rb +50 -0
- data/lib/karafka/processing/strategies/dlq.rb +13 -4
- data/lib/karafka/processing/strategies/dlq_mom.rb +8 -3
- data/lib/karafka/processing/strategy_selector.rb +27 -10
- data/lib/karafka/version.rb +1 -1
- data/renovate.json +6 -0
- data.tar.gz.sig +0 -0
- metadata +66 -22
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +0 -42
- data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom_vp.rb +0 -70
- data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +0 -62
- data/lib/karafka/pro/processing/strategies/aj_dlq_mom_vp.rb +0 -68
- data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +0 -75
- data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +0 -62
- data/lib/karafka/pro/processing/strategies/dlq.rb +0 -120
- data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +0 -65
- data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +0 -62
- data/lib/karafka/pro/processing/strategies/dlq_mom.rb +0 -62
- data/lib/karafka/pro/processing/strategies/dlq_vp.rb +0 -37
- data/lib/karafka/pro/processing/strategies/lrj.rb +0 -83
- data/lib/karafka/pro/processing/strategies/lrj_mom.rb +0 -73
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 12fe8a47dc0ab16b0f7783424cd1aa043c2d2b228b4f4164f1cecefe604269d9
|
4
|
+
data.tar.gz: 9fa3bae282770dd67503c41ef4b73a27a38bfcff3bf472ddd63753d14d03614f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e6536c90a411a0b42337f73c00d9f454028366f42eabb1b7f40902181bcbcfd43258741d6fc51c6e29046b9ee1f8598755440d28a00ca96104a61a8095c20c2
|
7
|
+
data.tar.gz: be75dd1bfa744187f770f2e1f0deeedfba4f3fb1b824d6bab91f056f96e60a33498429e35ea22841404def0935e584db0df4289d2818631ace2e597d28785960
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/.github/workflows/ci.yml
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.2.
|
1
|
+
3.2.2
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,39 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.0.39 (2023-04-11)
|
4
|
+
- **[Feature]** Provide ability to throttle/limit number of messages processed in a time unit (#1203)
|
5
|
+
- **[Feature]** Provide Delayed Topics (#1000)
|
6
|
+
- **[Feature]** Provide ability to expire messages (expiring topics)
|
7
|
+
- **[Feature]** Provide ability to apply filters after messages are polled and before enqueued. This is a generic filter API for any usage.
|
8
|
+
- [Improvement] When using ActiveJob with Virtual Partitions, Karafka will stop if collectively VPs are failing. This minimizes number of jobs that will be collectively re-processed.
|
9
|
+
- [Improvement] `#retrying?` method has been added to consumers to provide ability to check, that we're reprocessing data after a failure. This is useful for branching out processing based on errors.
|
10
|
+
- [Improvement] Track active_job_id in instrumentation (#1372)
|
11
|
+
- [Improvement] Introduce new housekeeping job type called `Idle` for non-consumption execution flows.
|
12
|
+
- [Improvement] Change how a manual offset management works with Long-Running Jobs. Use the last message offset to move forward instead of relying on the last message marked as consumed for a scenario where no message is marked.
|
13
|
+
- [Improvement] Prioritize in Pro non-consumption jobs execution over consumption despite LJF. This will ensure, that housekeeping as well as other non-consumption events are not saturated when running a lot of work.
|
14
|
+
- [Improvement] Normalize the DLQ behaviour with MoM. Always pause on dispatch for all the strategies.
|
15
|
+
- [Improvement] Improve the manual offset management and DLQ behaviour when no markings occur for OSS.
|
16
|
+
- [Improvement] Do not early stop ActiveJob work running under virtual partitions to prevent extensive reprocessing.
|
17
|
+
- [Improvement] Drastically increase number of scenarios covered by integration specs (OSS and Pro).
|
18
|
+
- [Improvement] Introduce a `Coordinator#synchronize` lock for cross virtual partitions operations.
|
19
|
+
- [Fix] Do not resume partition that is not paused.
|
20
|
+
- [Fix] Fix `LoggerListener` cases where logs would not include caller id (when available)
|
21
|
+
- [Fix] Fix not working benchmark tests.
|
22
|
+
- [Fix] Fix a case where when using manual offset management with a user pause would ignore the pause and seek to the next message.
|
23
|
+
- [Fix] Fix a case where dead letter queue would go into an infinite loop on message with first ever offset if the first ever offset would not recover.
|
24
|
+
- [Fix] Make sure to resume always for all LRJ strategies on revocation.
|
25
|
+
- [Refactor] Make sure that coordinator is topic aware. Needed for throttling, delayed processing and expired jobs.
|
26
|
+
- [Refactor] Put Pro strategies into namespaces to better organize multiple combinations.
|
27
|
+
- [Refactor] Do not rely on messages metadata for internal topic and partition operations like `#seek` so they can run independently from the consumption flow.
|
28
|
+
- [Refactor] Hold a single topic/partition reference on a coordinator instead of in executor, coordinator and consumer.
|
29
|
+
- [Refactor] Move `#mark_as_consumed` and `#mark_as_consumed!`into `Strategies::Default` to be able to introduce marking for virtual partitions.
|
30
|
+
|
31
|
+
## 2.0.38 (2023-03-27)
|
32
|
+
- [Improvement] Introduce `Karafka::Admin#read_watermark_offsets` to get low and high watermark offsets values.
|
33
|
+
- [Improvement] Track active_job_id in instrumentation (#1372)
|
34
|
+
- [Improvement] Improve `#read_topic` reading in case of a compacted partition where the offset is below the low watermark offset. This should optimize reading and should not go beyond the low watermark offset.
|
35
|
+
- [Improvement] Allow `#read_topic` to accept instance settings to overwrite any settings needed to customize reading behaviours.
|
36
|
+
|
3
37
|
## 2.0.37 (2023-03-20)
|
4
38
|
- [Fix] Declarative topics execution on a secondary cluster run topics creation on the primary one (#1365)
|
5
39
|
- [Fix] Admin read operations commit offset when not needed (#1369)
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.0.
|
4
|
+
karafka (2.0.39)
|
5
5
|
karafka-core (>= 2.0.12, < 3.0.0)
|
6
6
|
thor (>= 0.20)
|
7
7
|
waterdrop (>= 2.4.10, < 3.0.0)
|
@@ -10,10 +10,10 @@ PATH
|
|
10
10
|
GEM
|
11
11
|
remote: https://rubygems.org/
|
12
12
|
specs:
|
13
|
-
activejob (7.0.4.
|
14
|
-
activesupport (= 7.0.4.
|
13
|
+
activejob (7.0.4.3)
|
14
|
+
activesupport (= 7.0.4.3)
|
15
15
|
globalid (>= 0.3.6)
|
16
|
-
activesupport (7.0.4.
|
16
|
+
activesupport (7.0.4.3)
|
17
17
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
18
18
|
i18n (>= 1.6, < 2)
|
19
19
|
minitest (>= 5.1)
|
@@ -48,7 +48,7 @@ GEM
|
|
48
48
|
rspec-expectations (3.12.2)
|
49
49
|
diff-lcs (>= 1.2.0, < 2.0)
|
50
50
|
rspec-support (~> 3.12.0)
|
51
|
-
rspec-mocks (3.12.
|
51
|
+
rspec-mocks (3.12.5)
|
52
52
|
diff-lcs (>= 1.2.0, < 2.0)
|
53
53
|
rspec-support (~> 3.12.0)
|
54
54
|
rspec-support (3.12.0)
|
@@ -61,7 +61,7 @@ GEM
|
|
61
61
|
thor (1.2.1)
|
62
62
|
tzinfo (2.0.6)
|
63
63
|
concurrent-ruby (~> 1.0)
|
64
|
-
waterdrop (2.5.
|
64
|
+
waterdrop (2.5.1)
|
65
65
|
karafka-core (>= 2.0.12, < 3.0.0)
|
66
66
|
zeitwerk (~> 2.3)
|
67
67
|
zeitwerk (2.6.7)
|
@@ -79,4 +79,4 @@ DEPENDENCIES
|
|
79
79
|
simplecov
|
80
80
|
|
81
81
|
BUNDLED WITH
|
82
|
-
2.4.
|
82
|
+
2.4.10
|
data/README.md
CHANGED
@@ -86,7 +86,7 @@ bundle exec karafka server
|
|
86
86
|
|
87
87
|
I also sell Karafka Pro subscriptions. It includes a commercial-friendly license, priority support, architecture consultations, enhanced Web UI and high throughput data processing-related features (virtual partitions, long-running jobs, and more).
|
88
88
|
|
89
|
-
**
|
89
|
+
**10%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
|
90
90
|
|
91
91
|
Help me provide high-quality open-source software. Please see the Karafka [homepage](https://karafka.io/#become-pro) for more details.
|
92
92
|
|
data/bin/integrations
CHANGED
@@ -25,7 +25,7 @@ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../
|
|
25
25
|
# we limit it. Locally we can run a lot of those, as many of them have sleeps and do not use a lot
|
26
26
|
# of CPU. Locally we also cannot go beyond certain limit due to how often and how many topics we
|
27
27
|
# create in Kafka. With an overloaded system, we start getting timeouts.
|
28
|
-
CONCURRENCY = ENV.key?('CI') ?
|
28
|
+
CONCURRENCY = ENV.key?('CI') ? 5 : Etc.nprocessors * 3
|
29
29
|
|
30
30
|
# How may bytes do we want to keep from the stdout in the buffer for when we need to print it
|
31
31
|
MAX_BUFFER_OUTPUT = 51_200
|
data/config/locales/errors.yml
CHANGED
@@ -72,10 +72,3 @@ en:
|
|
72
72
|
test:
|
73
73
|
missing: needs to be present
|
74
74
|
id_format: needs to be a String
|
75
|
-
|
76
|
-
pro_topic:
|
77
|
-
virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
|
78
|
-
virtual_partitions.max_partitions_format: needs to be equal or more than 1
|
79
|
-
manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
|
80
|
-
long_running_job.active_format: needs to be either true or false
|
81
|
-
dead_letter_queue_not_with_virtual_partitions: cannot be used together with Virtual Partitions
|
@@ -3,10 +3,28 @@ en:
|
|
3
3
|
topic:
|
4
4
|
virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
|
5
5
|
virtual_partitions.max_partitions_format: needs to be equal or more than 1
|
6
|
+
|
6
7
|
manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
|
8
|
+
|
7
9
|
long_running_job.active_format: needs to be either true or false
|
10
|
+
|
8
11
|
dead_letter_queue_with_virtual_partitions: when using Dead Letter Queue with Virtual Partitions, at least one retry is required.
|
9
12
|
|
13
|
+
throttling.active_format: needs to be either true or false
|
14
|
+
throttling.limit_format: needs to be equal or more than 1
|
15
|
+
throttling.interval_format: needs to be equal or more than 1
|
16
|
+
|
17
|
+
filtering.active_missing: needs to be present
|
18
|
+
filtering.factory_format: 'needs to respond to #call'
|
19
|
+
filtering.factories_format: 'needs to contain only factories responding to #call'
|
20
|
+
filtering.active_format: 'needs to be boolean'
|
21
|
+
|
22
|
+
expiring.ttl_format: 'needs to be equal or more than 0 and an integer'
|
23
|
+
expiring.active_format: 'needs to be boolean'
|
24
|
+
|
25
|
+
delaying.delay_format: 'needs to be equal or more than 0 and an integer'
|
26
|
+
delaying.active_format: 'needs to be boolean'
|
27
|
+
|
10
28
|
config:
|
11
29
|
encryption.active_format: 'needs to be either true or false'
|
12
30
|
encryption.public_key_invalid: 'is not a valid public RSA key'
|
@@ -12,16 +12,31 @@ module Karafka
|
|
12
12
|
messages.each do |message|
|
13
13
|
break if Karafka::App.stopping?
|
14
14
|
|
15
|
-
|
16
|
-
# message instead of using the `#raw_payload`. This is not done on purpose to simplify
|
17
|
-
# the ActiveJob setup here
|
18
|
-
job = ::ActiveSupport::JSON.decode(message.raw_payload)
|
15
|
+
consume_job(message)
|
19
16
|
|
20
|
-
|
17
|
+
mark_as_consumed(message)
|
18
|
+
end
|
19
|
+
end
|
21
20
|
|
22
|
-
|
21
|
+
private
|
23
22
|
|
24
|
-
|
23
|
+
# Consumes a message with the job and runs needed instrumentation
|
24
|
+
#
|
25
|
+
# @param job_message [Karafka::Messages::Message] message with active job
|
26
|
+
def consume_job(job_message)
|
27
|
+
# We technically speaking could set this as deserializer and reference it from the
|
28
|
+
# message instead of using the `#raw_payload`. This is not done on purpose to simplify
|
29
|
+
# the ActiveJob setup here
|
30
|
+
job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
|
31
|
+
|
32
|
+
tags.add(:job_class, job['job_class'])
|
33
|
+
|
34
|
+
payload = { caller: self, job: job, message: job_message }
|
35
|
+
|
36
|
+
# We publish both to make it consistent with `consumer.x` events
|
37
|
+
Karafka.monitor.instrument('active_job.consume', payload)
|
38
|
+
Karafka.monitor.instrument('active_job.consumed', payload) do
|
39
|
+
::ActiveJob::Base.execute(job)
|
25
40
|
end
|
26
41
|
end
|
27
42
|
end
|
data/lib/karafka/admin.rb
CHANGED
@@ -44,17 +44,32 @@ module Karafka
|
|
44
44
|
# @param count [Integer] how many messages we want to get at most
|
45
45
|
# @param start_offset [Integer] offset from which we should start. If -1 is provided
|
46
46
|
# (default) we will start from the latest offset
|
47
|
+
# @param settings [Hash] kafka extra settings (optional)
|
47
48
|
#
|
48
49
|
# @return [Array<Karafka::Messages::Message>] array with messages
|
49
|
-
def read_topic(name, partition, count, start_offset = -1)
|
50
|
+
def read_topic(name, partition, count, start_offset = -1, settings = {})
|
50
51
|
messages = []
|
51
52
|
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
53
|
+
low_offset, high_offset = nil
|
52
54
|
|
53
|
-
with_consumer do |consumer|
|
54
|
-
|
55
|
-
|
55
|
+
with_consumer(settings) do |consumer|
|
56
|
+
low_offset, high_offset = consumer.query_watermark_offsets(name, partition)
|
57
|
+
|
58
|
+
# Select offset dynamically if -1 or less
|
59
|
+
start_offset = high_offset - count if start_offset.negative?
|
56
60
|
|
57
|
-
|
61
|
+
# Build the requested range - since first element is on the start offset we need to
|
62
|
+
# subtract one from requested count to end up with expected number of elements
|
63
|
+
requested_range = (start_offset..start_offset + (count - 1))
|
64
|
+
# Establish theoretical available range. Note, that this does not handle cases related to
|
65
|
+
# log retention or compaction
|
66
|
+
available_range = (low_offset..high_offset)
|
67
|
+
# Select only offset that we can select. This will remove all the potential offsets that
|
68
|
+
# are below the low watermark offset
|
69
|
+
possible_range = requested_range.select { |offset| available_range.include?(offset) }
|
70
|
+
|
71
|
+
start_offset = possible_range.first
|
72
|
+
count = possible_range.count
|
58
73
|
|
59
74
|
tpl.add_topic_and_partitions_with_offsets(name, partition => start_offset)
|
60
75
|
consumer.assign(tpl)
|
@@ -64,11 +79,15 @@ module Karafka
|
|
64
79
|
loop do
|
65
80
|
# If we've got as many messages as we've wanted stop
|
66
81
|
break if messages.size >= count
|
67
|
-
# If we've reached end of the topic messages, don't process more
|
68
|
-
break if !messages.empty? && end_offset <= messages.last.offset
|
69
82
|
|
70
83
|
message = consumer.poll(200)
|
71
|
-
|
84
|
+
|
85
|
+
next unless message
|
86
|
+
|
87
|
+
# If the message we've got is beyond the requested range, stop
|
88
|
+
break unless possible_range.include?(message.offset)
|
89
|
+
|
90
|
+
messages << message
|
72
91
|
rescue Rdkafka::RdkafkaError => e
|
73
92
|
# End of partition
|
74
93
|
break if e.code == :partition_eof
|
@@ -77,7 +96,7 @@ module Karafka
|
|
77
96
|
end
|
78
97
|
end
|
79
98
|
|
80
|
-
messages.map do |message|
|
99
|
+
messages.map! do |message|
|
81
100
|
Messages::Builders::Message.call(
|
82
101
|
message,
|
83
102
|
# Use topic from routes if we can match it or create a dummy one
|
@@ -136,6 +155,17 @@ module Karafka
|
|
136
155
|
end
|
137
156
|
end
|
138
157
|
|
158
|
+
# Fetches the watermark offsets for a given topic partition
|
159
|
+
#
|
160
|
+
# @param name [String, Symbol] topic name
|
161
|
+
# @param partition [Integer] partition
|
162
|
+
# @return [Array<Integer, Integer>] low watermark offset and high watermark offset
|
163
|
+
def read_watermark_offsets(name, partition)
|
164
|
+
with_consumer do |consumer|
|
165
|
+
consumer.query_watermark_offsets(name, partition)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
139
169
|
# @return [Rdkafka::Metadata] cluster metadata info
|
140
170
|
def cluster_info
|
141
171
|
with_admin do |admin|
|
@@ -159,15 +189,16 @@ module Karafka
|
|
159
189
|
|
160
190
|
# Creates admin instance and yields it. After usage it closes the admin instance
|
161
191
|
def with_admin
|
162
|
-
admin = config(:producer).admin
|
192
|
+
admin = config(:producer, {}).admin
|
163
193
|
yield(admin)
|
164
194
|
ensure
|
165
195
|
admin&.close
|
166
196
|
end
|
167
197
|
|
168
198
|
# Creates consumer instance and yields it. After usage it closes the consumer instance
|
169
|
-
|
170
|
-
|
199
|
+
# @param settings [Hash] extra settings to customize consumer
|
200
|
+
def with_consumer(settings = {})
|
201
|
+
consumer = config(:consumer, settings).consumer
|
171
202
|
yield(consumer)
|
172
203
|
ensure
|
173
204
|
consumer&.close
|
@@ -196,11 +227,12 @@ module Karafka
|
|
196
227
|
end
|
197
228
|
|
198
229
|
# @param type [Symbol] type of config we want
|
230
|
+
# @param settings [Hash] extra settings for config (if needed)
|
199
231
|
# @return [::Rdkafka::Config] rdkafka config
|
200
|
-
def config(type)
|
232
|
+
def config(type, settings)
|
201
233
|
config_hash = Karafka::Setup::AttributesMap.public_send(
|
202
234
|
type,
|
203
|
-
Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS)
|
235
|
+
Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS).merge!(settings)
|
204
236
|
)
|
205
237
|
|
206
238
|
::Rdkafka::Config.new(config_hash)
|
@@ -7,11 +7,13 @@ module Karafka
|
|
7
7
|
# Allow for consumer instance tagging for instrumentation
|
8
8
|
include ::Karafka::Core::Taggable
|
9
9
|
|
10
|
+
extend Forwardable
|
11
|
+
|
12
|
+
def_delegators :@coordinator, :topic, :partition
|
13
|
+
|
10
14
|
# @return [String] id of the current consumer
|
11
15
|
attr_reader :id
|
12
16
|
# @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
|
13
|
-
attr_accessor :topic
|
14
|
-
# @return [Karafka::Messages::Messages] current messages batch
|
15
17
|
attr_accessor :messages
|
16
18
|
# @return [Karafka::Connection::Client] kafka connection client
|
17
19
|
attr_accessor :client
|
@@ -97,6 +99,20 @@ module Karafka
|
|
97
99
|
)
|
98
100
|
end
|
99
101
|
|
102
|
+
# Trigger method for running on idle runs without messages
|
103
|
+
#
|
104
|
+
# @private
|
105
|
+
def on_idle
|
106
|
+
handle_idle
|
107
|
+
rescue StandardError => e
|
108
|
+
Karafka.monitor.instrument(
|
109
|
+
'error.occurred',
|
110
|
+
error: e,
|
111
|
+
caller: self,
|
112
|
+
type: 'consumer.idle.error'
|
113
|
+
)
|
114
|
+
end
|
115
|
+
|
100
116
|
# Trigger method for running on partition revocation.
|
101
117
|
#
|
102
118
|
# @private
|
@@ -143,51 +159,6 @@ module Karafka
|
|
143
159
|
# some teardown procedures (closing file handler, etc).
|
144
160
|
def shutdown; end
|
145
161
|
|
146
|
-
# Marks message as consumed in an async way.
|
147
|
-
#
|
148
|
-
# @param message [Messages::Message] last successfully processed message.
|
149
|
-
# @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
|
150
|
-
# that we were not able and that we have lost the partition.
|
151
|
-
#
|
152
|
-
# @note We keep track of this offset in case we would mark as consumed and got error when
|
153
|
-
# processing another message. In case like this we do not pause on the message we've already
|
154
|
-
# processed but rather at the next one. This applies to both sync and async versions of this
|
155
|
-
# method.
|
156
|
-
def mark_as_consumed(message)
|
157
|
-
# Ignore earlier offsets than the one we alread committed
|
158
|
-
return true if coordinator.seek_offset > message.offset
|
159
|
-
|
160
|
-
unless client.mark_as_consumed(message)
|
161
|
-
coordinator.revoke
|
162
|
-
|
163
|
-
return false
|
164
|
-
end
|
165
|
-
|
166
|
-
coordinator.seek_offset = message.offset + 1
|
167
|
-
|
168
|
-
true
|
169
|
-
end
|
170
|
-
|
171
|
-
# Marks message as consumed in a sync way.
|
172
|
-
#
|
173
|
-
# @param message [Messages::Message] last successfully processed message.
|
174
|
-
# @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
|
175
|
-
# that we were not able and that we have lost the partition.
|
176
|
-
def mark_as_consumed!(message)
|
177
|
-
# Ignore earlier offsets than the one we alread committed
|
178
|
-
return true if coordinator.seek_offset > message.offset
|
179
|
-
|
180
|
-
unless client.mark_as_consumed!(message)
|
181
|
-
coordinator.revoke
|
182
|
-
|
183
|
-
return false
|
184
|
-
end
|
185
|
-
|
186
|
-
coordinator.seek_offset = message.offset + 1
|
187
|
-
|
188
|
-
true
|
189
|
-
end
|
190
|
-
|
191
162
|
# Pauses processing on a given offset for the current topic partition
|
192
163
|
#
|
193
164
|
# After given partition is resumed, it will continue processing from the given offset
|
@@ -201,8 +172,8 @@ module Karafka
|
|
201
172
|
timeout ? coordinator.pause_tracker.pause(timeout) : coordinator.pause_tracker.pause
|
202
173
|
|
203
174
|
client.pause(
|
204
|
-
|
205
|
-
|
175
|
+
topic.name,
|
176
|
+
partition,
|
206
177
|
offset
|
207
178
|
)
|
208
179
|
|
@@ -213,8 +184,8 @@ module Karafka
|
|
213
184
|
'consumer.consuming.pause',
|
214
185
|
caller: self,
|
215
186
|
manual: manual_pause,
|
216
|
-
topic:
|
217
|
-
partition:
|
187
|
+
topic: topic.name,
|
188
|
+
partition: partition,
|
218
189
|
offset: offset,
|
219
190
|
timeout: coordinator.pause_tracker.current_timeout,
|
220
191
|
attempt: coordinator.pause_tracker.attempt
|
@@ -223,6 +194,8 @@ module Karafka
|
|
223
194
|
|
224
195
|
# Resumes processing of the current topic partition
|
225
196
|
def resume
|
197
|
+
return unless coordinator.pause_tracker.paused?
|
198
|
+
|
226
199
|
# This is sufficient to expire a partition pause, as with it will be resumed by the listener
|
227
200
|
# thread before the next poll.
|
228
201
|
coordinator.pause_tracker.expire
|
@@ -234,8 +207,8 @@ module Karafka
|
|
234
207
|
def seek(offset)
|
235
208
|
client.seek(
|
236
209
|
Karafka::Messages::Seek.new(
|
237
|
-
|
238
|
-
|
210
|
+
topic.name,
|
211
|
+
partition,
|
239
212
|
offset
|
240
213
|
)
|
241
214
|
)
|
@@ -248,6 +221,13 @@ module Karafka
|
|
248
221
|
coordinator.revoked?
|
249
222
|
end
|
250
223
|
|
224
|
+
# @return [Boolean] are we retrying processing after an error. This can be used to provide a
|
225
|
+
# different flow after there is an error, for example for resources cleanup, small manual
|
226
|
+
# backoff or different instrumentation tracking.
|
227
|
+
def retrying?
|
228
|
+
coordinator.pause_tracker.attempt.positive?
|
229
|
+
end
|
230
|
+
|
251
231
|
# Pauses the processing from the last offset to retry on given message
|
252
232
|
# @private
|
253
233
|
def retry_after_pause
|
@@ -258,8 +238,8 @@ module Karafka
|
|
258
238
|
Karafka.monitor.instrument(
|
259
239
|
'consumer.consuming.retry',
|
260
240
|
caller: self,
|
261
|
-
topic:
|
262
|
-
partition:
|
241
|
+
topic: topic.name,
|
242
|
+
partition: partition,
|
263
243
|
offset: coordinator.seek_offset,
|
264
244
|
timeout: coordinator.pause_tracker.current_timeout,
|
265
245
|
attempt: coordinator.pause_tracker.attempt
|
@@ -25,7 +25,7 @@ module Karafka
|
|
25
25
|
@consumer_group_coordinator = consumer_group_coordinator
|
26
26
|
@subscription_group = subscription_group
|
27
27
|
@jobs_queue = jobs_queue
|
28
|
-
@coordinators = Processing::CoordinatorsBuffer.new
|
28
|
+
@coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
|
29
29
|
@client = Client.new(@subscription_group)
|
30
30
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
31
31
|
@jobs_builder = proc_config.jobs_builder
|
@@ -234,7 +234,7 @@ module Karafka
|
|
234
234
|
def build_and_schedule_shutdown_jobs
|
235
235
|
jobs = []
|
236
236
|
|
237
|
-
@executors.each do |
|
237
|
+
@executors.each do |executor|
|
238
238
|
job = @jobs_builder.shutdown(executor)
|
239
239
|
job.before_enqueue
|
240
240
|
jobs << job
|
@@ -263,20 +263,25 @@ module Karafka
|
|
263
263
|
|
264
264
|
@messages_buffer.each do |topic, partition, messages|
|
265
265
|
coordinator = @coordinators.find_or_create(topic, partition)
|
266
|
-
|
267
266
|
# Start work coordination for this topic partition
|
268
267
|
coordinator.start(messages)
|
269
268
|
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
executor = @executors.find_or_create(topic, partition,
|
274
|
-
|
275
|
-
|
276
|
-
|
269
|
+
# We do not increment coordinator for idle job because it's not a user related one
|
270
|
+
# and it will not go through a standard lifecycle. Same applies to revoked and shutdown
|
271
|
+
if messages.empty?
|
272
|
+
executor = @executors.find_or_create(topic, partition, 0, coordinator)
|
273
|
+
jobs << @jobs_builder.idle(executor)
|
274
|
+
else
|
275
|
+
@partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
|
276
|
+
executor = @executors.find_or_create(topic, partition, group_id, coordinator)
|
277
|
+
coordinator.increment
|
278
|
+
jobs << @jobs_builder.consume(executor, partition_messages)
|
279
|
+
end
|
277
280
|
end
|
278
281
|
end
|
279
282
|
|
283
|
+
jobs.each(&:before_enqueue)
|
284
|
+
|
280
285
|
@scheduler.schedule_consumption(@jobs_queue, jobs)
|
281
286
|
end
|
282
287
|
|
data/lib/karafka/errors.rb
CHANGED
@@ -46,8 +46,5 @@ module Karafka
|
|
46
46
|
|
47
47
|
# This should never happen. Please open an issue if it does.
|
48
48
|
StrategyNotFoundError = Class.new(BaseError)
|
49
|
-
|
50
|
-
# This should never happen. Please open an issue if it does.
|
51
|
-
SkipMessageNotFoundError = Class.new(BaseError)
|
52
49
|
end
|
53
50
|
end
|
@@ -170,13 +170,51 @@ module Karafka
|
|
170
170
|
#
|
171
171
|
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
172
172
|
def on_dead_letter_queue_dispatched(event)
|
173
|
+
consumer = event[:caller]
|
174
|
+
topic = consumer.topic.name
|
173
175
|
message = event[:message]
|
174
176
|
offset = message.offset
|
175
|
-
|
176
|
-
dlq_topic = event[:caller].topic.dead_letter_queue.topic
|
177
|
+
dlq_topic = consumer.topic.dead_letter_queue.topic
|
177
178
|
partition = message.partition
|
178
179
|
|
179
|
-
info "
|
180
|
+
info <<~MSG.tr("\n", ' ').strip!
|
181
|
+
[#{consumer.id}] Dispatched message #{offset}
|
182
|
+
from #{topic}/#{partition}
|
183
|
+
to DLQ topic: #{dlq_topic}
|
184
|
+
MSG
|
185
|
+
end
|
186
|
+
|
187
|
+
# Logs info about throttling event
|
188
|
+
#
|
189
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
190
|
+
def on_filtering_throttled(event)
|
191
|
+
consumer = event[:caller]
|
192
|
+
topic = consumer.topic.name
|
193
|
+
# Here we get last message before throttle
|
194
|
+
message = event[:message]
|
195
|
+
partition = message.partition
|
196
|
+
offset = message.offset
|
197
|
+
|
198
|
+
info <<~MSG.tr("\n", ' ').strip!
|
199
|
+
[#{consumer.id}] Throttled and will resume
|
200
|
+
from message #{offset}
|
201
|
+
on #{topic}/#{partition}
|
202
|
+
MSG
|
203
|
+
end
|
204
|
+
|
205
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
206
|
+
def on_filtering_seek(event)
|
207
|
+
consumer = event[:caller]
|
208
|
+
topic = consumer.topic.name
|
209
|
+
# Message to which we seek
|
210
|
+
message = event[:message]
|
211
|
+
partition = message.partition
|
212
|
+
offset = message.offset
|
213
|
+
|
214
|
+
info <<~MSG.tr("\n", ' ').strip!
|
215
|
+
[#{consumer.id}] Post-filtering seeking to message #{offset}
|
216
|
+
on #{topic}/#{partition}
|
217
|
+
MSG
|
180
218
|
end
|
181
219
|
|
182
220
|
# There are many types of errors that can occur in many places, but we provide a single
|
@@ -203,6 +241,9 @@ module Karafka
|
|
203
241
|
when 'consumer.after_consume.error'
|
204
242
|
error "Consumer after consume failed due to an error: #{error}"
|
205
243
|
error details
|
244
|
+
when 'consumer.idle.error'
|
245
|
+
error "Consumer idle failed due to an error: #{error}"
|
246
|
+
error details
|
206
247
|
when 'consumer.shutdown.error'
|
207
248
|
error "Consumer on shutdown failed due to an error: #{error}"
|
208
249
|
error details
|
@@ -17,6 +17,9 @@ module Karafka
|
|
17
17
|
# complete list of all the events. Please use the #available_events on fully loaded
|
18
18
|
# Karafka system to determine all of the events you can use.
|
19
19
|
EVENTS = %w[
|
20
|
+
active_job.consume
|
21
|
+
active_job.consumed
|
22
|
+
|
20
23
|
app.initialized
|
21
24
|
app.running
|
22
25
|
app.quieting
|
@@ -36,6 +39,7 @@ module Karafka
|
|
36
39
|
consumer.consumed
|
37
40
|
consumer.consuming.pause
|
38
41
|
consumer.consuming.retry
|
42
|
+
consumer.idle
|
39
43
|
consumer.revoke
|
40
44
|
consumer.revoked
|
41
45
|
consumer.shutting_down
|
@@ -43,6 +47,9 @@ module Karafka
|
|
43
47
|
|
44
48
|
dead_letter_queue.dispatched
|
45
49
|
|
50
|
+
filtering.throttled
|
51
|
+
filtering.seek
|
52
|
+
|
46
53
|
process.notice_signal
|
47
54
|
|
48
55
|
statistics.emitted
|