karafka 2.0.39 → 2.0.41
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +16 -0
- data/Gemfile +3 -2
- data/Gemfile.lock +13 -2
- data/bin/integrations +17 -2
- data/lib/karafka/admin.rb +17 -13
- data/lib/karafka/connection/client.rb +5 -2
- data/lib/karafka/instrumentation/callbacks/statistics.rb +12 -0
- data/lib/karafka/instrumentation/logger_listener.rb +16 -5
- data/lib/karafka/messages/builders/batch_metadata.rb +6 -5
- data/lib/karafka/messages/builders/messages.rb +3 -1
- data/lib/karafka/messages/messages.rb +5 -0
- data/lib/karafka/pro/iterator.rb +253 -0
- data/lib/karafka/pro/processing/strategies/dlq/vp.rb +2 -1
- data/lib/karafka/processing/executor.rb +15 -6
- data/lib/karafka/routing/router.rb +15 -0
- data/lib/karafka/setup/config.rb +7 -1
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +3 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9560b22fc8cfd59dcaeb6551bcc0b2d2ebfef2f162a12905fa3aefe0c9c5865e
|
4
|
+
data.tar.gz: a5b7aba125288ec96cf3b862e72447bf467fe23f923c232fe1d3ff9c48b1fdb4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d6581af85f8900d2d5ce91b6f9ec8ed0e1f6be5f3e80c36315c44c8dc07c30949566e281f40feb1b54cc9bbca771ac2188637e916d19edcb2fe26c04aeb362e1
|
7
|
+
data.tar.gz: e467612b3185b5ec764d387e72507b617bf49d702436da2021d467fb0c23630aa98151c9679444a34114317a8c52e3c37f0268c6a6bdb4564ffa1bab51993109
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,21 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.0.41 (2023-14-19)
|
4
|
+
- **[Feature]** Provide `Karafka::Pro::Iterator` for anonymous topic/partitions iterations and messages lookups (#1389 and #1427).
|
5
|
+
- [Improvement] Optimize topic lookup for `read_topic` admin method usage.
|
6
|
+
- [Improvement] Report via `LoggerListener` information about the partition on which a given job has started and finished.
|
7
|
+
- [Improvement] Slightly normalize the `LoggerListener` format. Always report partition related operations as followed: `TOPIC_NAME/PARTITION`.
|
8
|
+
- [Improvement] Do not retry recovery from `unknown_topic_or_part` when Karafka is shutting down as there is no point and no risk of any data losses.
|
9
|
+
- [Improvement] Report `client.software.name` and `client.software.version` according to `librdkafka` recommendation.
|
10
|
+
- [Improvement] Report ten longest integration specs after the suite execution.
|
11
|
+
- [Improvement] Prevent user originating errors related to statistics processing after listener loop crash from potentially crashing the listener loop and hanging Karafka process.
|
12
|
+
|
13
|
+
## 2.0.40 (2023-04-13)
|
14
|
+
- [Improvement] Introduce `Karafka::Messages::Messages#empty?` method to handle Idle related cases where shutdown or revocation would be called on an empty messages set. This method allows for checking if there are any messages in the messages batch.
|
15
|
+
- [Refactor] Require messages builder to accept partition and do not fetch it from messages.
|
16
|
+
- [Refactor] Use empty messages set for internal APIs (Idle) (so there always is `Karafka::Messages::Messages`)
|
17
|
+
- [Refactor] Allow for empty messages set initialization with -1001 and -1 on metadata (similar to `librdkafka`)
|
18
|
+
|
3
19
|
## 2.0.39 (2023-04-11)
|
4
20
|
- **[Feature]** Provide ability to throttle/limit number of messages processed in a time unit (#1203)
|
5
21
|
- **[Feature]** Provide Delayed Topics (#1000)
|
data/Gemfile
CHANGED
@@ -6,10 +6,11 @@ plugin 'diffend'
|
|
6
6
|
|
7
7
|
gemspec
|
8
8
|
|
9
|
-
# Karafka gem does not require
|
10
|
-
#
|
9
|
+
# Karafka gem does not require activejob nor karafka-web to work
|
10
|
+
# They are added here because they are part of the integration suite
|
11
11
|
group :integrations do
|
12
12
|
gem 'activejob'
|
13
|
+
gem 'karafka-web'
|
13
14
|
end
|
14
15
|
|
15
16
|
group :test do
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.0.
|
4
|
+
karafka (2.0.41)
|
5
5
|
karafka-core (>= 2.0.12, < 3.0.0)
|
6
6
|
thor (>= 0.20)
|
7
7
|
waterdrop (>= 2.4.10, < 3.0.0)
|
@@ -22,6 +22,7 @@ GEM
|
|
22
22
|
concurrent-ruby (1.2.2)
|
23
23
|
diff-lcs (1.5.0)
|
24
24
|
docile (1.4.0)
|
25
|
+
erubi (1.12.0)
|
25
26
|
factory_bot (6.2.1)
|
26
27
|
activesupport (>= 5.0.0)
|
27
28
|
ffi (1.15.5)
|
@@ -36,9 +37,18 @@ GEM
|
|
36
37
|
ffi (~> 1.15)
|
37
38
|
mini_portile2 (~> 2.6)
|
38
39
|
rake (> 12)
|
40
|
+
karafka-web (0.5.1)
|
41
|
+
erubi (~> 1.4)
|
42
|
+
karafka (>= 2.0.40, < 3.0.0)
|
43
|
+
karafka-core (>= 2.0.12, < 3.0.0)
|
44
|
+
roda (~> 3.63)
|
45
|
+
tilt (~> 2.0)
|
39
46
|
mini_portile2 (2.8.1)
|
40
47
|
minitest (5.18.0)
|
48
|
+
rack (3.0.7)
|
41
49
|
rake (13.0.6)
|
50
|
+
roda (3.67.0)
|
51
|
+
rack
|
42
52
|
rspec (3.12.0)
|
43
53
|
rspec-core (~> 3.12.0)
|
44
54
|
rspec-expectations (~> 3.12.0)
|
@@ -59,6 +69,7 @@ GEM
|
|
59
69
|
simplecov-html (0.12.3)
|
60
70
|
simplecov_json_formatter (0.1.4)
|
61
71
|
thor (1.2.1)
|
72
|
+
tilt (2.1.0)
|
62
73
|
tzinfo (2.0.6)
|
63
74
|
concurrent-ruby (~> 1.0)
|
64
75
|
waterdrop (2.5.1)
|
@@ -67,7 +78,6 @@ GEM
|
|
67
78
|
zeitwerk (2.6.7)
|
68
79
|
|
69
80
|
PLATFORMS
|
70
|
-
arm64-darwin-21
|
71
81
|
x86_64-linux
|
72
82
|
|
73
83
|
DEPENDENCIES
|
@@ -75,6 +85,7 @@ DEPENDENCIES
|
|
75
85
|
byebug
|
76
86
|
factory_bot
|
77
87
|
karafka!
|
88
|
+
karafka-web
|
78
89
|
rspec
|
79
90
|
simplecov
|
80
91
|
|
data/bin/integrations
CHANGED
@@ -152,8 +152,14 @@ class Scenario
|
|
152
152
|
end
|
153
153
|
end
|
154
154
|
|
155
|
+
# @return [Float] number of seconds that a given spec took to run
|
156
|
+
def time_taken
|
157
|
+
@finished_at - @started_at
|
158
|
+
end
|
159
|
+
|
155
160
|
# Close all the files that are open, so they do not pile up
|
156
161
|
def close
|
162
|
+
@finished_at = current_time
|
157
163
|
@stdin.close
|
158
164
|
@stdout.close
|
159
165
|
@stderr.close
|
@@ -262,13 +268,22 @@ while finished_scenarios.size < scenarios.size
|
|
262
268
|
sleep(0.1)
|
263
269
|
end
|
264
270
|
|
271
|
+
# Report longest scenarios
|
272
|
+
puts
|
273
|
+
puts "\nLongest scenarios:\n\n"
|
274
|
+
|
275
|
+
finished_scenarios.sort_by(&:time_taken).reverse.first(10).each do |long_scenario|
|
276
|
+
puts "[#{'%6.2f' % long_scenario.time_taken}] #{long_scenario.name}"
|
277
|
+
end
|
278
|
+
|
265
279
|
failed_scenarios = finished_scenarios.reject(&:success?)
|
266
280
|
|
267
|
-
# Report once more on the failed jobs
|
268
|
-
# This will only list scenarios that failed without printing their stdout here.
|
269
281
|
if failed_scenarios.empty?
|
270
282
|
puts
|
271
283
|
else
|
284
|
+
# Report once more on the failed jobs
|
285
|
+
# This will only list scenarios that failed without printing their stdout here.
|
286
|
+
puts
|
272
287
|
puts "\nFailed scenarios:\n\n"
|
273
288
|
|
274
289
|
failed_scenarios.each do |scenario|
|
data/lib/karafka/admin.rb
CHANGED
@@ -96,13 +96,15 @@ module Karafka
|
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
99
|
+
# Use topic from routes if we can match it or create a dummy one
|
100
|
+
# Dummy one is used in case we cannot match the topic with routes. This can happen
|
101
|
+
# when admin API is used to read topics that are not part of the routing
|
102
|
+
topic = ::Karafka::Routing::Router.find_or_initialize_by_name(name)
|
103
|
+
|
99
104
|
messages.map! do |message|
|
100
105
|
Messages::Builders::Message.call(
|
101
106
|
message,
|
102
|
-
|
103
|
-
# Dummy one is used in case we cannot match the topic with routes. This can happen
|
104
|
-
# when admin API is used to read topics that are not part of the routing
|
105
|
-
Routing::Router.find_by(name: name) || Topic.new(name, App.config.deserializer),
|
107
|
+
topic,
|
106
108
|
Time.now
|
107
109
|
)
|
108
110
|
end
|
@@ -173,6 +175,17 @@ module Karafka
|
|
173
175
|
end
|
174
176
|
end
|
175
177
|
|
178
|
+
# Creates consumer instance and yields it. After usage it closes the consumer instance
|
179
|
+
# This API can be used in other pieces of code and allows for low-level consumer usage
|
180
|
+
#
|
181
|
+
# @param settings [Hash] extra settings to customize consumer
|
182
|
+
def with_consumer(settings = {})
|
183
|
+
consumer = config(:consumer, settings).consumer
|
184
|
+
yield(consumer)
|
185
|
+
ensure
|
186
|
+
consumer&.close
|
187
|
+
end
|
188
|
+
|
176
189
|
private
|
177
190
|
|
178
191
|
# @return [Array<String>] topics names
|
@@ -195,15 +208,6 @@ module Karafka
|
|
195
208
|
admin&.close
|
196
209
|
end
|
197
210
|
|
198
|
-
# Creates consumer instance and yields it. After usage it closes the consumer instance
|
199
|
-
# @param settings [Hash] extra settings to customize consumer
|
200
|
-
def with_consumer(settings = {})
|
201
|
-
consumer = config(:consumer, settings).consumer
|
202
|
-
yield(consumer)
|
203
|
-
ensure
|
204
|
-
consumer&.close
|
205
|
-
end
|
206
|
-
|
207
211
|
# There are some cases where rdkafka admin operations finish successfully but without the
|
208
212
|
# callback being triggered to materialize the post-promise object. Until this is fixed we
|
209
213
|
# can figure out, that operation we wanted to do finished successfully by checking that the
|
@@ -369,6 +369,8 @@ module Karafka
|
|
369
369
|
rescue ::Rdkafka::RdkafkaError => e
|
370
370
|
early_report = false
|
371
371
|
|
372
|
+
retryable = time_poll.attempts <= MAX_POLL_RETRIES && time_poll.retryable?
|
373
|
+
|
372
374
|
# There are retryable issues on which we want to report fast as they are source of
|
373
375
|
# problems and can mean some bigger system instabilities
|
374
376
|
# Those are mainly network issues and exceeding the max poll interval
|
@@ -389,9 +391,10 @@ module Karafka
|
|
389
391
|
return nil if @subscription_group.kafka[:'allow.auto.create.topics']
|
390
392
|
|
391
393
|
early_report = true
|
392
|
-
end
|
393
394
|
|
394
|
-
|
395
|
+
# No sense in retrying when no topic/partition and we're no longer running
|
396
|
+
retryable = false unless Karafka::App.running?
|
397
|
+
end
|
395
398
|
|
396
399
|
if early_report || !retryable
|
397
400
|
Karafka.monitor.instrument(
|
@@ -32,6 +32,18 @@ module Karafka
|
|
32
32
|
consumer_group_id: @consumer_group_id,
|
33
33
|
statistics: @statistics_decorator.call(statistics)
|
34
34
|
)
|
35
|
+
# We need to catch and handle any potential errors coming from the instrumentation pipeline
|
36
|
+
# as otherwise, in case of statistics which run in the main librdkafka thread, any crash
|
37
|
+
# will hang the whole process.
|
38
|
+
rescue StandardError => e
|
39
|
+
::Karafka.monitor.instrument(
|
40
|
+
'error.occurred',
|
41
|
+
caller: self,
|
42
|
+
subscription_group_id: @subscription_group_id,
|
43
|
+
consumer_group_id: @consumer_group_id,
|
44
|
+
type: 'statistics.emitted.error',
|
45
|
+
error: e
|
46
|
+
)
|
35
47
|
end
|
36
48
|
end
|
37
49
|
end
|
@@ -48,7 +48,8 @@ module Karafka
|
|
48
48
|
job_type = job.class.to_s.split('::').last
|
49
49
|
consumer = job.executor.topic.consumer
|
50
50
|
topic = job.executor.topic.name
|
51
|
-
|
51
|
+
partition = job.executor.partition
|
52
|
+
info "[#{job.id}] #{job_type} job for #{consumer} on #{topic}/#{partition} started"
|
52
53
|
end
|
53
54
|
|
54
55
|
# Prints info about the fact that a given job has finished
|
@@ -60,7 +61,11 @@ module Karafka
|
|
60
61
|
job_type = job.class.to_s.split('::').last
|
61
62
|
consumer = job.executor.topic.consumer
|
62
63
|
topic = job.executor.topic.name
|
63
|
-
|
64
|
+
partition = job.executor.partition
|
65
|
+
info <<~MSG.tr("\n", ' ').strip!
|
66
|
+
[#{job.id}] #{job_type} job for #{consumer}
|
67
|
+
on #{topic}/#{partition} finished in #{time}ms
|
68
|
+
MSG
|
64
69
|
end
|
65
70
|
|
66
71
|
# Prints info about a consumer pause occurrence. Irrelevant if user or system initiated.
|
@@ -73,7 +78,7 @@ module Karafka
|
|
73
78
|
client = event[:caller]
|
74
79
|
|
75
80
|
info <<~MSG.tr("\n", ' ').strip!
|
76
|
-
[#{client.id}] Pausing
|
81
|
+
[#{client.id}] Pausing on topic #{topic}/#{partition} on offset #{offset}
|
77
82
|
MSG
|
78
83
|
end
|
79
84
|
|
@@ -86,7 +91,7 @@ module Karafka
|
|
86
91
|
client = event[:caller]
|
87
92
|
|
88
93
|
info <<~MSG.tr("\n", ' ').strip!
|
89
|
-
[#{client.id}] Resuming
|
94
|
+
[#{client.id}] Resuming on topic #{topic}/#{partition}
|
90
95
|
MSG
|
91
96
|
end
|
92
97
|
|
@@ -102,7 +107,7 @@ module Karafka
|
|
102
107
|
|
103
108
|
info <<~MSG.tr("\n", ' ').strip!
|
104
109
|
[#{consumer.id}] Retrying of #{consumer.class} after #{timeout} ms
|
105
|
-
on
|
110
|
+
on topic #{topic}/#{partition} from offset #{offset}
|
106
111
|
MSG
|
107
112
|
end
|
108
113
|
|
@@ -261,6 +266,12 @@ module Karafka
|
|
261
266
|
when 'librdkafka.error'
|
262
267
|
error "librdkafka internal error occurred: #{error}"
|
263
268
|
error details
|
269
|
+
# Those can occur when emitted statistics are consumed by the end user and the processing
|
270
|
+
# of statistics fails. The statistics are emitted from librdkafka main loop thread and
|
271
|
+
# any errors there crash the whole thread
|
272
|
+
when 'statistics.emitted.error'
|
273
|
+
error "statistics.emitted processing failed due to an error: #{error}"
|
274
|
+
error details
|
264
275
|
# Those will only occur when retries in the client fail and when they did not stop after
|
265
276
|
# back-offs
|
266
277
|
when 'connection.client.poll.error'
|
@@ -10,22 +10,23 @@ module Karafka
|
|
10
10
|
#
|
11
11
|
# @param messages [Array<Karafka::Messages::Message>] messages array
|
12
12
|
# @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
|
13
|
+
# @param partition [Integer] partition of this metadata
|
13
14
|
# @param scheduled_at [Time] moment when the batch was scheduled for processing
|
14
15
|
# @return [Karafka::Messages::BatchMetadata] batch metadata object
|
15
16
|
#
|
16
17
|
# @note We do not set `processed_at` as this needs to be assigned when the batch is
|
17
18
|
# picked up for processing.
|
18
|
-
def call(messages, topic, scheduled_at)
|
19
|
+
def call(messages, topic, partition, scheduled_at)
|
19
20
|
Karafka::Messages::BatchMetadata.new(
|
20
21
|
size: messages.count,
|
21
|
-
first_offset: messages.first
|
22
|
-
last_offset: messages.last
|
22
|
+
first_offset: messages.first&.offset || -1001,
|
23
|
+
last_offset: messages.last&.offset || -1001,
|
23
24
|
deserializer: topic.deserializer,
|
24
|
-
partition:
|
25
|
+
partition: partition,
|
25
26
|
topic: topic.name,
|
26
27
|
# We go with the assumption that the creation of the whole batch is the last message
|
27
28
|
# creation time
|
28
|
-
created_at: messages.last
|
29
|
+
created_at: messages.last&.timestamp || nil,
|
29
30
|
# When this batch was built and scheduled for execution
|
30
31
|
scheduled_at: scheduled_at,
|
31
32
|
# This needs to be set to a correct value prior to processing starting
|
@@ -11,14 +11,16 @@ module Karafka
|
|
11
11
|
#
|
12
12
|
# @param messages [Array<Karafka::Messages::Message>] karafka messages array
|
13
13
|
# @param topic [Karafka::Routing::Topic] topic for which we're received messages
|
14
|
+
# @param partition [Integer] partition of those messages
|
14
15
|
# @param received_at [Time] moment in time when the messages were received
|
15
16
|
# @return [Karafka::Messages::Messages] messages batch object
|
16
|
-
def call(messages, topic, received_at)
|
17
|
+
def call(messages, topic, partition, received_at)
|
17
18
|
# We cannot freeze the batch metadata because it is altered with the processed_at time
|
18
19
|
# prior to the consumption. It is being frozen there
|
19
20
|
metadata = BatchMetadata.call(
|
20
21
|
messages,
|
21
22
|
topic,
|
23
|
+
partition,
|
22
24
|
received_at
|
23
25
|
)
|
24
26
|
|
@@ -0,0 +1,253 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
# Topic iterator allows you to iterate over topic/partition data and perform lookups for
|
17
|
+
# information that you need.
|
18
|
+
#
|
19
|
+
# It supports early stops on finding the requested data and allows for seeking till
|
20
|
+
# the end. It also allows for signaling, when a given message should be last out of certain
|
21
|
+
# partition, but we still want to continue iterating in other messages.
|
22
|
+
#
|
23
|
+
# It does **not** create a consumer group and does not have any offset management.
|
24
|
+
class Iterator
|
25
|
+
# Local partition reference for librdkafka
|
26
|
+
Partition = Struct.new(:partition, :offset)
|
27
|
+
|
28
|
+
private_constant :Partition
|
29
|
+
|
30
|
+
# A simple API allowing to iterate over topic/partition data, without having to subscribe
|
31
|
+
# and deal with rebalances. This API allows for multi-partition streaming and is optimized
|
32
|
+
# for data lookups. It allows for explicit stopping iteration over any partition during
|
33
|
+
# the iteration process, allowing for optimized lookups.
|
34
|
+
#
|
35
|
+
# @param topics [Array<String>, Hash] list of strings if we want to subscribe to multiple
|
36
|
+
# topics and all of their partitions or a hash where keys are the topics and values are
|
37
|
+
# hashes with partitions and their initial offsets.
|
38
|
+
# @param settings [Hash] extra settings for the consumer. Please keep in mind, that if
|
39
|
+
# overwritten, you may want to include `auto.offset.reset` to match your case.
|
40
|
+
# @param yield_nil [Boolean] should we yield also `nil` values when poll returns nothing.
|
41
|
+
# Useful in particular for long-living iterators.
|
42
|
+
#
|
43
|
+
# @note It is worth keeping in mind, that this API also needs to operate within
|
44
|
+
# `max.poll.interval.ms` limitations on each iteration
|
45
|
+
#
|
46
|
+
# @note In case of a never-ending iterator, you need to set `enable.partition.eof` to `false`
|
47
|
+
# so we don't stop polling data even when reaching the end (end on a given moment)
|
48
|
+
def initialize(
|
49
|
+
topics,
|
50
|
+
settings: { 'auto.offset.reset': 'beginning' },
|
51
|
+
yield_nil: false
|
52
|
+
)
|
53
|
+
@topics_with_partitions = expand_topics_with_partitions(topics)
|
54
|
+
|
55
|
+
@routing_topics = @topics_with_partitions.map do |name, _|
|
56
|
+
[name, ::Karafka::Routing::Router.find_or_initialize_by_name(name)]
|
57
|
+
end.to_h
|
58
|
+
|
59
|
+
@total_partitions = @topics_with_partitions.map(&:last).sum(&:count)
|
60
|
+
|
61
|
+
@stopped_partitions = 0
|
62
|
+
|
63
|
+
@settings = settings
|
64
|
+
@yield_nil = yield_nil
|
65
|
+
end
|
66
|
+
|
67
|
+
# Iterates over requested topic partitions and yields the results with the iterator itself
|
68
|
+
# Iterator instance is yielded because one can run `stop_partition` to stop iterating over
|
69
|
+
# part of data. It is useful for scenarios where we are looking for some information in all
|
70
|
+
# the partitions but once we found it, given partition data is no longer needed and would
|
71
|
+
# only eat up resources.
|
72
|
+
def each
|
73
|
+
Admin.with_consumer(@settings) do |consumer|
|
74
|
+
tpl = tpl_with_expanded_offsets(consumer)
|
75
|
+
consumer.assign(tpl)
|
76
|
+
|
77
|
+
# We need this for self-referenced APIs like pausing
|
78
|
+
@current_consumer = consumer
|
79
|
+
|
80
|
+
# Stream data until we reach the end of all the partitions or until the end user
|
81
|
+
# indicates that they are done
|
82
|
+
until done?
|
83
|
+
message = poll(200)
|
84
|
+
|
85
|
+
# Skip nils if not explicitly required
|
86
|
+
next if message.nil? && !@yield_nil
|
87
|
+
|
88
|
+
if message
|
89
|
+
@current_message = build_message(message)
|
90
|
+
|
91
|
+
yield(@current_message, self)
|
92
|
+
else
|
93
|
+
yield(nil, self)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
@current_message = nil
|
98
|
+
@current_consumer = nil
|
99
|
+
end
|
100
|
+
|
101
|
+
# Reset so we can use the same iterator again if needed
|
102
|
+
@stopped_partitions = 0
|
103
|
+
end
|
104
|
+
|
105
|
+
# Stops the partition we're currently yielded into
|
106
|
+
def stop_current_partition
|
107
|
+
stop_partition(
|
108
|
+
@current_message.topic,
|
109
|
+
@current_message.partition
|
110
|
+
)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Stops processing of a given partition
|
114
|
+
# We expect the partition to be provided because of a scenario, where there is a
|
115
|
+
# multi-partition iteration and we want to stop a different partition that the one that
|
116
|
+
# is currently yielded.
|
117
|
+
#
|
118
|
+
# We pause it forever and no longer work with it.
|
119
|
+
#
|
120
|
+
# @param name [String] topic name of which partition we want to stop
|
121
|
+
# @param partition [Integer] partition we want to stop processing
|
122
|
+
def stop_partition(name, partition)
|
123
|
+
@stopped_partitions += 1
|
124
|
+
|
125
|
+
@current_consumer.pause(
|
126
|
+
Rdkafka::Consumer::TopicPartitionList.new(
|
127
|
+
name => [Partition.new(partition, 0)]
|
128
|
+
)
|
129
|
+
)
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
# Expands topics to which we want to subscribe with partitions information in case this
|
135
|
+
# info is not provided. For our convenience we want to support 5 formats of defining
|
136
|
+
# the subscribed topics:
|
137
|
+
#
|
138
|
+
# - 'topic1' - just a string with one topic name
|
139
|
+
# - ['topic1', 'topic2'] - just the names
|
140
|
+
# - { 'topic1' => -100 } - names with negative lookup offset
|
141
|
+
# - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
|
142
|
+
# - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
|
143
|
+
#
|
144
|
+
# @param topics [Array, Hash] topics definitions
|
145
|
+
# @return [Hash] hash with topics containing partitions definitions
|
146
|
+
def expand_topics_with_partitions(topics)
|
147
|
+
# Simplification for the single topic case
|
148
|
+
topics = [topics] if topics.is_a?(String)
|
149
|
+
# If we've got just array with topics, we need to convert that into a representation
|
150
|
+
# that we can expand with offsets
|
151
|
+
topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
|
152
|
+
|
153
|
+
expanded = Hash.new { |h, k| h[k] = {} }
|
154
|
+
|
155
|
+
topics.map do |topic, details|
|
156
|
+
if details.is_a?(Hash)
|
157
|
+
details.each do |partition, offset|
|
158
|
+
expanded[topic][partition] = offset
|
159
|
+
end
|
160
|
+
else
|
161
|
+
partition_count(topic.to_s).times do |partition|
|
162
|
+
# If no offsets are provided, we just start from zero
|
163
|
+
expanded[topic][partition] = details || 0
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
expanded
|
169
|
+
end
|
170
|
+
|
171
|
+
# @param timeout [Integer] timeout in ms
|
172
|
+
# @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
|
173
|
+
def poll(timeout)
|
174
|
+
@current_consumer.poll(timeout)
|
175
|
+
rescue Rdkafka::RdkafkaError => e
|
176
|
+
# End of partition
|
177
|
+
if e.code == :partition_eof
|
178
|
+
@stopped_partitions += 1
|
179
|
+
|
180
|
+
retry
|
181
|
+
end
|
182
|
+
|
183
|
+
raise e
|
184
|
+
end
|
185
|
+
|
186
|
+
# Converts raw rdkafka message into Karafka message
|
187
|
+
#
|
188
|
+
# @param message [Rdkafka::Consumer::Message] raw rdkafka message
|
189
|
+
# @return [::Karafka::Messages::Message]
|
190
|
+
def build_message(message)
|
191
|
+
Messages::Builders::Message.call(
|
192
|
+
message,
|
193
|
+
@routing_topics.fetch(message.topic),
|
194
|
+
Time.now
|
195
|
+
)
|
196
|
+
end
|
197
|
+
|
198
|
+
# Do we have all the data we wanted or did every topic partition has reached eof.
|
199
|
+
# @return [Boolean]
|
200
|
+
def done?
|
201
|
+
@stopped_partitions >= @total_partitions
|
202
|
+
end
|
203
|
+
|
204
|
+
# Builds the tpl representing all the subscriptions we want to run
|
205
|
+
#
|
206
|
+
# Additionally for negative offsets, does the watermark calculation where to start
|
207
|
+
#
|
208
|
+
# @param consumer [Rdkafka::Consumer] consumer we need in case of negative offsets as
|
209
|
+
# negative are going to be used to do "give me last X". We use the already initialized
|
210
|
+
# consumer instance, not to start another one again.
|
211
|
+
# @return [Rdkafka::Consumer::TopicPartitionList]
|
212
|
+
def tpl_with_expanded_offsets(consumer)
|
213
|
+
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
214
|
+
|
215
|
+
@topics_with_partitions.each do |name, partitions|
|
216
|
+
partitions_with_offsets = {}
|
217
|
+
|
218
|
+
# When no offsets defined, we just start from zero
|
219
|
+
if partitions.is_a?(Array) || partitions.is_a?(Range)
|
220
|
+
partitions_with_offsets = partitions.map { |partition| [partition, 0] }.to_h
|
221
|
+
else
|
222
|
+
# When offsets defined, we can either use them if positive or expand and move back
|
223
|
+
# in case of negative (-1000 means last 1000 messages, etc)
|
224
|
+
partitions.each do |partition, offset|
|
225
|
+
if offset.negative?
|
226
|
+
_, high_watermark_offset = consumer.query_watermark_offsets(name, partition)
|
227
|
+
# We add because this offset is negative
|
228
|
+
partitions_with_offsets[partition] = high_watermark_offset + offset
|
229
|
+
else
|
230
|
+
partitions_with_offsets[partition] = offset
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
tpl.add_topic_and_partitions_with_offsets(name, partitions_with_offsets)
|
236
|
+
end
|
237
|
+
|
238
|
+
tpl
|
239
|
+
end
|
240
|
+
|
241
|
+
# @param name [String] topic name
|
242
|
+
# @return [Integer] number of partitions of the topic we want to iterate over
|
243
|
+
def partition_count(name)
|
244
|
+
Admin
|
245
|
+
.cluster_info
|
246
|
+
.topics
|
247
|
+
.find { |topic| topic.fetch(:topic_name) == name }
|
248
|
+
.fetch(:partitions)
|
249
|
+
.count
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
@@ -21,7 +21,8 @@ module Karafka
|
|
21
21
|
#
|
22
22
|
# In general because we collapse processing in virtual partitions to one on errors, there
|
23
23
|
# is no special action that needs to be taken because we warranty that even with VPs
|
24
|
-
# on errors a retry collapses into a single state
|
24
|
+
# on errors a retry collapses into a single state and from this single state we can
|
25
|
+
# mark as consumed the message that we are moving to the DLQ.
|
25
26
|
module Vp
|
26
27
|
# Features for this strategy
|
27
28
|
FEATURES = %i[
|
@@ -44,10 +44,6 @@ module Karafka
|
|
44
44
|
#
|
45
45
|
# @param messages [Array<Karafka::Messages::Message>]
|
46
46
|
def before_enqueue(messages)
|
47
|
-
# the moment we've received the batch or actually the moment we've enqueued it,
|
48
|
-
# but good enough
|
49
|
-
@enqueued_at = Time.now
|
50
|
-
|
51
47
|
# Recreate consumer with each batch if persistence is not enabled
|
52
48
|
# We reload the consumers with each batch instead of relying on some external signals
|
53
49
|
# when needed for consistency. That way devs may have it on or off and not in this
|
@@ -57,8 +53,11 @@ module Karafka
|
|
57
53
|
# First we build messages batch...
|
58
54
|
consumer.messages = Messages::Builders::Messages.call(
|
59
55
|
messages,
|
60
|
-
|
61
|
-
|
56
|
+
topic,
|
57
|
+
partition,
|
58
|
+
# the moment we've received the batch or actually the moment we've enqueued it,
|
59
|
+
# but good enough
|
60
|
+
Time.now
|
62
61
|
)
|
63
62
|
|
64
63
|
consumer.on_before_enqueue
|
@@ -84,6 +83,16 @@ module Karafka
|
|
84
83
|
# This may include house-keeping or other state management changes that can occur but that
|
85
84
|
# not mean there are any new messages available for the end user to process
|
86
85
|
def idle
|
86
|
+
# Initializes the messages set in case idle operation would happen before any processing
|
87
|
+
# This prevents us from having no messages object at all as the messages object and
|
88
|
+
# its metadata may be used for statistics
|
89
|
+
consumer.messages ||= Messages::Builders::Messages.call(
|
90
|
+
[],
|
91
|
+
topic,
|
92
|
+
partition,
|
93
|
+
Time.now
|
94
|
+
)
|
95
|
+
|
87
96
|
consumer.on_idle
|
88
97
|
end
|
89
98
|
|
@@ -32,8 +32,23 @@ module Karafka
|
|
32
32
|
nil
|
33
33
|
end
|
34
34
|
|
35
|
+
# Finds the topic by name (in any consumer group) and if not present, will built a new
|
36
|
+
# representation of the topic with the defaults and default deserializer.
|
37
|
+
#
|
38
|
+
# This is used in places where we may operate on topics that are not part of the routing
|
39
|
+
# but we want to do something on them (display data, iterate over, etc)
|
40
|
+
# @param name [String] name of the topic we are looking for
|
41
|
+
# @return [Karafka::Routing::Topic]
|
42
|
+
#
|
43
|
+
# @note Please note, that in case of a new topic, it will have a newly built consumer group
|
44
|
+
# as well, that is not part of the routing.
|
45
|
+
def find_or_initialize_by_name(name)
|
46
|
+
find_by(name: name) || Topic.new(name, ConsumerGroup.new(name))
|
47
|
+
end
|
48
|
+
|
35
49
|
module_function :find
|
36
50
|
module_function :find_by
|
51
|
+
module_function :find_or_initialize_by_name
|
37
52
|
end
|
38
53
|
end
|
39
54
|
end
|
data/lib/karafka/setup/config.rb
CHANGED
@@ -18,7 +18,13 @@ module Karafka
|
|
18
18
|
KAFKA_DEFAULTS = {
|
19
19
|
# We emit the statistics by default, so all the instrumentation and web-ui work out of
|
20
20
|
# the box, without requiring users to take any extra actions aside from enabling.
|
21
|
-
'statistics.interval.ms': 5_000
|
21
|
+
'statistics.interval.ms': 5_000,
|
22
|
+
'client.software.name': 'karafka',
|
23
|
+
'client.software.version': [
|
24
|
+
"v#{Karafka::VERSION}",
|
25
|
+
"rdkafka-ruby-v#{Rdkafka::VERSION}",
|
26
|
+
"librdkafka-v#{Rdkafka::LIBRDKAFKA_VERSION}"
|
27
|
+
].join('-')
|
22
28
|
}.freeze
|
23
29
|
|
24
30
|
# Contains settings that should not be used in production but make life easier in dev
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.41
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
|
36
36
|
MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2023-04-
|
38
|
+
date: 2023-04-19 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
@@ -224,6 +224,7 @@ files:
|
|
224
224
|
- lib/karafka/pro/encryption/messages/middleware.rb
|
225
225
|
- lib/karafka/pro/encryption/messages/parser.rb
|
226
226
|
- lib/karafka/pro/encryption/setup/config.rb
|
227
|
+
- lib/karafka/pro/iterator.rb
|
227
228
|
- lib/karafka/pro/loader.rb
|
228
229
|
- lib/karafka/pro/performance_tracker.rb
|
229
230
|
- lib/karafka/pro/processing/collapser.rb
|
metadata.gz.sig
CHANGED
Binary file
|