karafka 2.1.6 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a6994a6d579728a877f84c87086d093aae8a1f830b891fcb4904883085432fe4
4
- data.tar.gz: 13b21009a471194a72971ca81ddc718e044bb96587db0e8f186974f554e9ec62
3
+ metadata.gz: f405521c7a6706cc95e764a4740e7570935f7595d34481bbe33fb617e5537978
4
+ data.tar.gz: cd6671c441c07e31050bbddab290ba4d31e4a580a646cfd965edf58c19ff150c
5
5
  SHA512:
6
- metadata.gz: e4711880bde1d2cd1cb34959f740459979b74ff4d28a671a232f88adbe7473cf67e366fc2b492fac761c572f3a6dfc147a59d46fc08e1c5e18df8ac5f108afdd
7
- data.tar.gz: c094600c2bd421ce309c0125d60ea82ed0106d5ce4566b3bb8c1aab13c553e7bd2f6651b98029e42ac831b132563b2c502dd1c76defbf8307cd9bd2393b258f7
6
+ metadata.gz: 7b5e343a0d2c6e1f885c6eac6509de2f411b54e1a30ce12fac6fa18bb813d82ef666444345b92d8348ac4955cdabfc47ad3658312482f6c500ca169814f10517
7
+ data.tar.gz: 1b0c319f85dde3bc20b21a842da220d513351b436b3e4de08d56e69a02c36c7c2cd4187c879596ffc73f5dffc2cc3f032c6a8cdbd958ce34138866d27aa00b2b
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.1.7 (2023-07-22)
4
+ - [Improvement] Always query for watermarks in the Iterator to improve the initial response time.
5
+ - [Improvement] Add `max_wait_time` option to the Iterator.
6
+ - [Fix] Fix a case where `Admin#read_topic` would wait for poll interval on non-existing messages instead of early exit.
7
+ - [Fix] Fix a case where Iterator with per partition offsets with negative lookups would go below the number of available messages.
8
+ - [Fix] Remove unused constant from Admin module.
9
+ - [Fix] Add missing `connection.client.rebalance_callback.error` to the `LoggerListener` instrumentation hook.
10
+
3
11
  ## 2.1.6 (2023-06-29)
4
12
  - [Improvement] Provide time support for iterator
5
13
  - [Improvement] Provide time support for admin `#read_topic`
@@ -63,7 +71,7 @@
63
71
  2. Replace `Karafka::Pro::BaseConsumer` references to `Karafka::BaseConsumer`.
64
72
  3. Replace `Karafka::Instrumentation::Vendors::Datadog:Listener` with `Karafka::Instrumentation::Vendors::Datadog::MetricsListener`.
65
73
 
66
- ## 2.0.41 (2023-14-19)
74
+ ## 2.0.41 (2023-04-19)
67
75
  - **[Feature]** Provide `Karafka::Pro::Iterator` for anonymous topic/partitions iterations and messages lookups (#1389 and #1427).
68
76
  - [Improvement] Optimize topic lookup for `read_topic` admin method usage.
69
77
  - [Improvement] Report via `LoggerListener` information about the partition on which a given job has started and finished.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.1.6)
4
+ karafka (2.1.7)
5
5
  karafka-core (>= 2.1.1, < 2.2.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.6.2, < 3.0.0)
@@ -10,10 +10,10 @@ PATH
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- activejob (7.0.5)
14
- activesupport (= 7.0.5)
13
+ activejob (7.0.6)
14
+ activesupport (= 7.0.6)
15
15
  globalid (>= 0.3.6)
16
- activesupport (7.0.5)
16
+ activesupport (7.0.6)
17
17
  concurrent-ruby (~> 1.0, >= 1.0.2)
18
18
  i18n (>= 1.6, < 2)
19
19
  minitest (>= 5.1)
@@ -33,7 +33,7 @@ GEM
33
33
  karafka-core (2.1.1)
34
34
  concurrent-ruby (>= 1.1)
35
35
  karafka-rdkafka (>= 0.13.1, < 0.14.0)
36
- karafka-rdkafka (0.13.1)
36
+ karafka-rdkafka (0.13.3)
37
37
  ffi (~> 1.15)
38
38
  mini_portile2 (~> 2.6)
39
39
  rake (> 12)
@@ -47,7 +47,7 @@ GEM
47
47
  minitest (5.18.1)
48
48
  rack (3.0.8)
49
49
  rake (13.0.6)
50
- roda (3.69.0)
50
+ roda (3.70.0)
51
51
  rack
52
52
  rspec (3.12.0)
53
53
  rspec-core (~> 3.12.0)
@@ -58,10 +58,10 @@ GEM
58
58
  rspec-expectations (3.12.3)
59
59
  diff-lcs (>= 1.2.0, < 2.0)
60
60
  rspec-support (~> 3.12.0)
61
- rspec-mocks (3.12.5)
61
+ rspec-mocks (3.12.6)
62
62
  diff-lcs (>= 1.2.0, < 2.0)
63
63
  rspec-support (~> 3.12.0)
64
- rspec-support (3.12.0)
64
+ rspec-support (3.12.1)
65
65
  simplecov (0.22.0)
66
66
  docile (~> 1.1)
67
67
  simplecov-html (~> 0.11)
@@ -72,8 +72,8 @@ GEM
72
72
  tilt (2.2.0)
73
73
  tzinfo (2.0.6)
74
74
  concurrent-ruby (~> 1.0)
75
- waterdrop (2.6.2)
76
- karafka-core (>= 2.1.0, < 3.0.0)
75
+ waterdrop (2.6.4)
76
+ karafka-core (>= 2.1.1, < 3.0.0)
77
77
  zeitwerk (~> 2.3)
78
78
  zeitwerk (2.6.8)
79
79
 
data/lib/karafka/admin.rb CHANGED
@@ -9,11 +9,6 @@ module Karafka
9
9
  # @note It always uses the primary defined cluster and does not support multi-cluster work.
10
10
  # If you need this, just replace the cluster info for the time you use this
11
11
  module Admin
12
- # A fake admin topic representation that we use for messages fetched using this API
13
- # We cannot use the topics directly because we may want to request data from topics that we
14
- # do not have in the routing
15
- Topic = Struct.new(:name, :deserializer)
16
-
17
12
  # We wait only for this amount of time before raising error as we intercept this error and
18
13
  # retry after checking that the operation was finished or failed using external factor.
19
14
  MAX_WAIT_TIMEOUT = 1
@@ -37,7 +32,7 @@ module Karafka
37
32
  'enable.auto.commit': false
38
33
  }.freeze
39
34
 
40
- private_constant :Topic, :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :TPL_REQUEST_TIMEOUT,
35
+ private_constant :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :TPL_REQUEST_TIMEOUT,
41
36
  :MAX_ATTEMPTS
42
37
 
43
38
  class << self
@@ -71,7 +66,7 @@ module Karafka
71
66
  requested_range = (start_offset..start_offset + (count - 1))
72
67
  # Establish theoretical available range. Note, that this does not handle cases related to
73
68
  # log retention or compaction
74
- available_range = (low_offset..high_offset)
69
+ available_range = (low_offset..(high_offset - 1))
75
70
  # Select only offset that we can select. This will remove all the potential offsets that
76
71
  # are below the low watermark offset
77
72
  possible_range = requested_range.select { |offset| available_range.include?(offset) }
@@ -277,6 +277,9 @@ module Karafka
277
277
  when 'connection.client.poll.error'
278
278
  error "Data polling error occurred: #{error}"
279
279
  error details
280
+ when 'connection.client.rebalance_callback.error'
281
+ error "Rebalance callback error occurred: #{error}"
282
+ error details
280
283
  else
281
284
  # This should never happen. Please contact the maintainers
282
285
  raise Errors::UnsupportedCaseError, event
@@ -93,12 +93,27 @@ module Karafka
93
93
  next unless partitions.is_a?(Hash)
94
94
 
95
95
  partitions.each do |partition, offset|
96
+ # Care only about numerical offsets
97
+ #
98
+ # For time based we already resolve them via librdkafka lookup API
99
+ next unless offset.is_a?(Integer)
100
+
101
+ low_offset, high_offset = @consumer.query_watermark_offsets(name, partition)
102
+
96
103
  # Care only about negative offsets (last n messages)
97
- next unless offset.is_a?(Integer) && offset.negative?
104
+ #
105
+ # We reject the above results but we **NEED** to run the `#query_watermark_offsets`
106
+ # for each topic partition nonetheless. Without this, librdkafka fetches a lot more
107
+ # metadata about each topic and each partition and this takes much more time than
108
+ # just getting watermarks. If we do not run watermark, at least an extra second
109
+ # is added at the beginning of iterator flow
110
+ #
111
+ # This may not be significant when this runs in the background but in case of
112
+ # using iterator in thins like Puma, it heavily impacts the end user experience
113
+ next unless offset.negative?
98
114
 
99
- _, high_watermark_offset = @consumer.query_watermark_offsets(name, partition)
100
115
  # We add because this offset is negative
101
- @mapped_topics[name][partition] = high_watermark_offset + offset
116
+ @mapped_topics[name][partition] = [high_offset + offset, low_offset].max
102
117
  end
103
118
  end
104
119
  end
@@ -39,6 +39,7 @@ module Karafka
39
39
  # overwritten, you may want to include `auto.offset.reset` to match your case.
40
40
  # @param yield_nil [Boolean] should we yield also `nil` values when poll returns nothing.
41
41
  # Useful in particular for long-living iterators.
42
+ # @param max_wait_time [Integer] max wait in ms when iterator did not receive any messages
42
43
  #
43
44
  # @note It is worth keeping in mind, that this API also needs to operate within
44
45
  # `max.poll.interval.ms` limitations on each iteration
@@ -48,7 +49,8 @@ module Karafka
48
49
  def initialize(
49
50
  topics,
50
51
  settings: { 'auto.offset.reset': 'beginning' },
51
- yield_nil: false
52
+ yield_nil: false,
53
+ max_wait_time: 200
52
54
  )
53
55
  @topics_with_partitions = Expander.new.call(topics)
54
56
 
@@ -62,6 +64,7 @@ module Karafka
62
64
 
63
65
  @settings = settings
64
66
  @yield_nil = yield_nil
67
+ @max_wait_time = max_wait_time
65
68
  end
66
69
 
67
70
  # Iterates over requested topic partitions and yields the results with the iterator itself
@@ -80,7 +83,7 @@ module Karafka
80
83
  # Stream data until we reach the end of all the partitions or until the end user
81
84
  # indicates that they are done
82
85
  until done?
83
- message = poll(200)
86
+ message = poll
84
87
 
85
88
  # Skip nils if not explicitly required
86
89
  next if message.nil? && !@yield_nil
@@ -131,10 +134,9 @@ module Karafka
131
134
 
132
135
  private
133
136
 
134
- # @param timeout [Integer] timeout in ms
135
137
  # @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
136
- def poll(timeout)
137
- @current_consumer.poll(timeout)
138
+ def poll
139
+ @current_consumer.poll(@max_wait_time)
138
140
  rescue Rdkafka::RdkafkaError => e
139
141
  # End of partition
140
142
  if e.code == :partition_eof
@@ -25,6 +25,7 @@ module Karafka
25
25
  broker.version.fallback
26
26
  builtin.features
27
27
  check.crcs
28
+ client.dns.lookup
28
29
  client.id
29
30
  client.rack
30
31
  closesocket_cb
@@ -161,6 +162,7 @@ module Karafka
161
162
  broker.address.ttl
162
163
  broker.version.fallback
163
164
  builtin.features
165
+ client.dns.lookup
164
166
  client.id
165
167
  client.rack
166
168
  closesocket_cb
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.1.6'
6
+ VERSION = '2.1.7'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.6
4
+ version: 2.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
36
36
  MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
37
37
  -----END CERTIFICATE-----
38
- date: 2023-06-29 00:00:00.000000000 Z
38
+ date: 2023-07-22 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
metadata.gz.sig CHANGED
Binary file