karafka 2.1.6 → 2.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +9 -1
- data/Gemfile.lock +10 -10
- data/lib/karafka/admin.rb +2 -7
- data/lib/karafka/instrumentation/logger_listener.rb +3 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +18 -3
- data/lib/karafka/pro/iterator.rb +7 -5
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f405521c7a6706cc95e764a4740e7570935f7595d34481bbe33fb617e5537978
|
4
|
+
data.tar.gz: cd6671c441c07e31050bbddab290ba4d31e4a580a646cfd965edf58c19ff150c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b5e343a0d2c6e1f885c6eac6509de2f411b54e1a30ce12fac6fa18bb813d82ef666444345b92d8348ac4955cdabfc47ad3658312482f6c500ca169814f10517
|
7
|
+
data.tar.gz: 1b0c319f85dde3bc20b21a842da220d513351b436b3e4de08d56e69a02c36c7c2cd4187c879596ffc73f5dffc2cc3f032c6a8cdbd958ce34138866d27aa00b2b
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.1.7 (2023-07-22)
|
4
|
+
- [Improvement] Always query for watermarks in the Iterator to improve the initial response time.
|
5
|
+
- [Improvement] Add `max_wait_time` option to the Iterator.
|
6
|
+
- [Fix] Fix a case where `Admin#read_topic` would wait for poll interval on non-existing messages instead of early exit.
|
7
|
+
- [Fix] Fix a case where Iterator with per partition offsets with negative lookups would go below the number of available messages.
|
8
|
+
- [Fix] Remove unused constant from Admin module.
|
9
|
+
- [Fix] Add missing `connection.client.rebalance_callback.error` to the `LoggerListener` instrumentation hook.
|
10
|
+
|
3
11
|
## 2.1.6 (2023-06-29)
|
4
12
|
- [Improvement] Provide time support for iterator
|
5
13
|
- [Improvement] Provide time support for admin `#read_topic`
|
@@ -63,7 +71,7 @@
|
|
63
71
|
2. Replace `Karafka::Pro::BaseConsumer` references to `Karafka::BaseConsumer`.
|
64
72
|
3. Replace `Karafka::Instrumentation::Vendors::Datadog:Listener` with `Karafka::Instrumentation::Vendors::Datadog::MetricsListener`.
|
65
73
|
|
66
|
-
## 2.0.41 (2023-
|
74
|
+
## 2.0.41 (2023-04-19)
|
67
75
|
- **[Feature]** Provide `Karafka::Pro::Iterator` for anonymous topic/partitions iterations and messages lookups (#1389 and #1427).
|
68
76
|
- [Improvement] Optimize topic lookup for `read_topic` admin method usage.
|
69
77
|
- [Improvement] Report via `LoggerListener` information about the partition on which a given job has started and finished.
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.1.
|
4
|
+
karafka (2.1.7)
|
5
5
|
karafka-core (>= 2.1.1, < 2.2.0)
|
6
6
|
thor (>= 0.20)
|
7
7
|
waterdrop (>= 2.6.2, < 3.0.0)
|
@@ -10,10 +10,10 @@ PATH
|
|
10
10
|
GEM
|
11
11
|
remote: https://rubygems.org/
|
12
12
|
specs:
|
13
|
-
activejob (7.0.
|
14
|
-
activesupport (= 7.0.
|
13
|
+
activejob (7.0.6)
|
14
|
+
activesupport (= 7.0.6)
|
15
15
|
globalid (>= 0.3.6)
|
16
|
-
activesupport (7.0.
|
16
|
+
activesupport (7.0.6)
|
17
17
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
18
18
|
i18n (>= 1.6, < 2)
|
19
19
|
minitest (>= 5.1)
|
@@ -33,7 +33,7 @@ GEM
|
|
33
33
|
karafka-core (2.1.1)
|
34
34
|
concurrent-ruby (>= 1.1)
|
35
35
|
karafka-rdkafka (>= 0.13.1, < 0.14.0)
|
36
|
-
karafka-rdkafka (0.13.
|
36
|
+
karafka-rdkafka (0.13.3)
|
37
37
|
ffi (~> 1.15)
|
38
38
|
mini_portile2 (~> 2.6)
|
39
39
|
rake (> 12)
|
@@ -47,7 +47,7 @@ GEM
|
|
47
47
|
minitest (5.18.1)
|
48
48
|
rack (3.0.8)
|
49
49
|
rake (13.0.6)
|
50
|
-
roda (3.
|
50
|
+
roda (3.70.0)
|
51
51
|
rack
|
52
52
|
rspec (3.12.0)
|
53
53
|
rspec-core (~> 3.12.0)
|
@@ -58,10 +58,10 @@ GEM
|
|
58
58
|
rspec-expectations (3.12.3)
|
59
59
|
diff-lcs (>= 1.2.0, < 2.0)
|
60
60
|
rspec-support (~> 3.12.0)
|
61
|
-
rspec-mocks (3.12.
|
61
|
+
rspec-mocks (3.12.6)
|
62
62
|
diff-lcs (>= 1.2.0, < 2.0)
|
63
63
|
rspec-support (~> 3.12.0)
|
64
|
-
rspec-support (3.12.
|
64
|
+
rspec-support (3.12.1)
|
65
65
|
simplecov (0.22.0)
|
66
66
|
docile (~> 1.1)
|
67
67
|
simplecov-html (~> 0.11)
|
@@ -72,8 +72,8 @@ GEM
|
|
72
72
|
tilt (2.2.0)
|
73
73
|
tzinfo (2.0.6)
|
74
74
|
concurrent-ruby (~> 1.0)
|
75
|
-
waterdrop (2.6.
|
76
|
-
karafka-core (>= 2.1.
|
75
|
+
waterdrop (2.6.4)
|
76
|
+
karafka-core (>= 2.1.1, < 3.0.0)
|
77
77
|
zeitwerk (~> 2.3)
|
78
78
|
zeitwerk (2.6.8)
|
79
79
|
|
data/lib/karafka/admin.rb
CHANGED
@@ -9,11 +9,6 @@ module Karafka
|
|
9
9
|
# @note It always uses the primary defined cluster and does not support multi-cluster work.
|
10
10
|
# If you need this, just replace the cluster info for the time you use this
|
11
11
|
module Admin
|
12
|
-
# A fake admin topic representation that we use for messages fetched using this API
|
13
|
-
# We cannot use the topics directly because we may want to request data from topics that we
|
14
|
-
# do not have in the routing
|
15
|
-
Topic = Struct.new(:name, :deserializer)
|
16
|
-
|
17
12
|
# We wait only for this amount of time before raising error as we intercept this error and
|
18
13
|
# retry after checking that the operation was finished or failed using external factor.
|
19
14
|
MAX_WAIT_TIMEOUT = 1
|
@@ -37,7 +32,7 @@ module Karafka
|
|
37
32
|
'enable.auto.commit': false
|
38
33
|
}.freeze
|
39
34
|
|
40
|
-
private_constant :
|
35
|
+
private_constant :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :TPL_REQUEST_TIMEOUT,
|
41
36
|
:MAX_ATTEMPTS
|
42
37
|
|
43
38
|
class << self
|
@@ -71,7 +66,7 @@ module Karafka
|
|
71
66
|
requested_range = (start_offset..start_offset + (count - 1))
|
72
67
|
# Establish theoretical available range. Note, that this does not handle cases related to
|
73
68
|
# log retention or compaction
|
74
|
-
available_range = (low_offset..high_offset)
|
69
|
+
available_range = (low_offset..(high_offset - 1))
|
75
70
|
# Select only offset that we can select. This will remove all the potential offsets that
|
76
71
|
# are below the low watermark offset
|
77
72
|
possible_range = requested_range.select { |offset| available_range.include?(offset) }
|
@@ -277,6 +277,9 @@ module Karafka
|
|
277
277
|
when 'connection.client.poll.error'
|
278
278
|
error "Data polling error occurred: #{error}"
|
279
279
|
error details
|
280
|
+
when 'connection.client.rebalance_callback.error'
|
281
|
+
error "Rebalance callback error occurred: #{error}"
|
282
|
+
error details
|
280
283
|
else
|
281
284
|
# This should never happen. Please contact the maintainers
|
282
285
|
raise Errors::UnsupportedCaseError, event
|
@@ -93,12 +93,27 @@ module Karafka
|
|
93
93
|
next unless partitions.is_a?(Hash)
|
94
94
|
|
95
95
|
partitions.each do |partition, offset|
|
96
|
+
# Care only about numerical offsets
|
97
|
+
#
|
98
|
+
# For time based we already resolve them via librdkafka lookup API
|
99
|
+
next unless offset.is_a?(Integer)
|
100
|
+
|
101
|
+
low_offset, high_offset = @consumer.query_watermark_offsets(name, partition)
|
102
|
+
|
96
103
|
# Care only about negative offsets (last n messages)
|
97
|
-
|
104
|
+
#
|
105
|
+
# We reject the above results but we **NEED** to run the `#query_watermark_offsets`
|
106
|
+
# for each topic partition nonetheless. Without this, librdkafka fetches a lot more
|
107
|
+
# metadata about each topic and each partition and this takes much more time than
|
108
|
+
# just getting watermarks. If we do not run watermark, at least an extra second
|
109
|
+
# is added at the beginning of iterator flow
|
110
|
+
#
|
111
|
+
# This may not be significant when this runs in the background but in case of
|
112
|
+
# using iterator in thins like Puma, it heavily impacts the end user experience
|
113
|
+
next unless offset.negative?
|
98
114
|
|
99
|
-
_, high_watermark_offset = @consumer.query_watermark_offsets(name, partition)
|
100
115
|
# We add because this offset is negative
|
101
|
-
@mapped_topics[name][partition] =
|
116
|
+
@mapped_topics[name][partition] = [high_offset + offset, low_offset].max
|
102
117
|
end
|
103
118
|
end
|
104
119
|
end
|
data/lib/karafka/pro/iterator.rb
CHANGED
@@ -39,6 +39,7 @@ module Karafka
|
|
39
39
|
# overwritten, you may want to include `auto.offset.reset` to match your case.
|
40
40
|
# @param yield_nil [Boolean] should we yield also `nil` values when poll returns nothing.
|
41
41
|
# Useful in particular for long-living iterators.
|
42
|
+
# @param max_wait_time [Integer] max wait in ms when iterator did not receive any messages
|
42
43
|
#
|
43
44
|
# @note It is worth keeping in mind, that this API also needs to operate within
|
44
45
|
# `max.poll.interval.ms` limitations on each iteration
|
@@ -48,7 +49,8 @@ module Karafka
|
|
48
49
|
def initialize(
|
49
50
|
topics,
|
50
51
|
settings: { 'auto.offset.reset': 'beginning' },
|
51
|
-
yield_nil: false
|
52
|
+
yield_nil: false,
|
53
|
+
max_wait_time: 200
|
52
54
|
)
|
53
55
|
@topics_with_partitions = Expander.new.call(topics)
|
54
56
|
|
@@ -62,6 +64,7 @@ module Karafka
|
|
62
64
|
|
63
65
|
@settings = settings
|
64
66
|
@yield_nil = yield_nil
|
67
|
+
@max_wait_time = max_wait_time
|
65
68
|
end
|
66
69
|
|
67
70
|
# Iterates over requested topic partitions and yields the results with the iterator itself
|
@@ -80,7 +83,7 @@ module Karafka
|
|
80
83
|
# Stream data until we reach the end of all the partitions or until the end user
|
81
84
|
# indicates that they are done
|
82
85
|
until done?
|
83
|
-
message = poll
|
86
|
+
message = poll
|
84
87
|
|
85
88
|
# Skip nils if not explicitly required
|
86
89
|
next if message.nil? && !@yield_nil
|
@@ -131,10 +134,9 @@ module Karafka
|
|
131
134
|
|
132
135
|
private
|
133
136
|
|
134
|
-
# @param timeout [Integer] timeout in ms
|
135
137
|
# @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
|
136
|
-
def poll
|
137
|
-
@current_consumer.poll(
|
138
|
+
def poll
|
139
|
+
@current_consumer.poll(@max_wait_time)
|
138
140
|
rescue Rdkafka::RdkafkaError => e
|
139
141
|
# End of partition
|
140
142
|
if e.code == :partition_eof
|
@@ -25,6 +25,7 @@ module Karafka
|
|
25
25
|
broker.version.fallback
|
26
26
|
builtin.features
|
27
27
|
check.crcs
|
28
|
+
client.dns.lookup
|
28
29
|
client.id
|
29
30
|
client.rack
|
30
31
|
closesocket_cb
|
@@ -161,6 +162,7 @@ module Karafka
|
|
161
162
|
broker.address.ttl
|
162
163
|
broker.version.fallback
|
163
164
|
builtin.features
|
165
|
+
client.dns.lookup
|
164
166
|
client.id
|
165
167
|
client.rack
|
166
168
|
closesocket_cb
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
|
36
36
|
MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2023-
|
38
|
+
date: 2023-07-22 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
metadata.gz.sig
CHANGED
Binary file
|