waterdrop 2.5.3 → 2.6.1.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +23 -3
- data/Gemfile.lock +11 -12
- data/README.md +16 -9
- data/config/locales/errors.yml +3 -0
- data/lib/waterdrop/clients/buffered.rb +47 -0
- data/lib/waterdrop/{producer/dummy_client.rb → clients/dummy.rb} +8 -5
- data/lib/waterdrop/clients/rdkafka.rb +28 -0
- data/lib/waterdrop/config.rb +7 -2
- data/lib/waterdrop/contracts/config.rb +3 -0
- data/lib/waterdrop/producer/builder.rb +6 -11
- data/lib/waterdrop/producer.rb +33 -17
- data/lib/waterdrop/version.rb +1 -1
- data/waterdrop.gemspec +1 -1
- data.tar.gz.sig +0 -0
- metadata +9 -10
- metadata.gz.sig +1 -3
- data/lib/waterdrop/patches/rdkafka/client.rb +0 -34
- data/lib/waterdrop/patches/rdkafka/metadata.rb +0 -45
- data/lib/waterdrop/patches/rdkafka/producer.rb +0 -86
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e637029ce9d9113e2d64cadcec57ac980602d489f2207507194082065604b16
|
4
|
+
data.tar.gz: 21d0a5a643b3e48603fe8e8adb13dbe827a0b9b50871675e57aa62b71abe7573
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e34c6c5225022f191dd80da877126b5c37075b3356b1e99e4e6b79adb0c54b8939e6b43f2efded8f68a0b68424b3cdbdcaa8abe19a781456faa4f653ae9063ea
|
7
|
+
data.tar.gz: 5060d94c549d5d0ac4b2e09f8ab4888643008ea732943e76f75dfedd386f08ada8a60ad278f85dcfa1172bf813c993a38b2feb71ee75ea2ce1a72bf3021edbcb
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,9 +1,29 @@
|
|
1
1
|
# WaterDrop changelog
|
2
2
|
|
3
|
+
### 2.6.1 (Unreleased)
|
4
|
+
- [Refactor] Remove no longer needed patches.
|
5
|
+
- [Fix] Fork detection on a short lived processes seems to fail. Clear the used parent process client reference not to close it in the finalizer (#356).
|
6
|
+
- [Change] Require `karafka-rdkafka` `>= 0.13.0.beta2`.
|
7
|
+
- [Change] Require 'karafka-core' `>= 2.1.0`
|
8
|
+
|
9
|
+
### 2.6.0 (2023-06-11)
|
10
|
+
- [Improvement] Introduce `client_class` setting for ability to replace underlying client with anything specific to a given env (dev, test, etc).
|
11
|
+
- [Improvement] Introduce `Clients::Buffered` useful for writing specs that do not have to talk with Kafka (id-ilych)
|
12
|
+
- [Improvement] Make `#produce` method private to avoid confusion and make sure it is not used directly (it is not part of the official API).
|
13
|
+
- [Change] Change `wait_on_queue_full` from `false` to `true` as a default.
|
14
|
+
- [Change] Rename `wait_on_queue_full_timeout` to `wait_backoff_on_queue_full` to match what it actually does.
|
15
|
+
- [Enhancement] Introduce `wait_timeout_on_queue_full` with proper meaning. That is, this represents time after which despite backoff the error will be raised. This should allow to raise an error in case the backoff attempts were insufficient. This prevents from a case, where upon never deliverable messages we would end up with an infinite loop.
|
16
|
+
- [Fix] Provide `type` for queue full errors that references the appropriate public API method correctly.
|
17
|
+
|
18
|
+
### Upgrade notes
|
19
|
+
|
20
|
+
1. Rename `wait_on_queue_full_timeout` to `wait_backoff_on_queue_full`.
|
21
|
+
2. Set `wait_on_queue_full` to `false` if you did not use it and do not want.
|
22
|
+
|
3
23
|
## 2.5.3 (2023-05-26)
|
4
|
-
-
|
5
|
-
- Include topic name in the `
|
6
|
-
-
|
24
|
+
- [Enhancement] Include topic name in the `error.occurred` notification payload.
|
25
|
+
- [Enhancement] Include topic name in the `message.acknowledged` notification payload.
|
26
|
+
- [Maintenance] Require `karafka-core` `2.0.13`
|
7
27
|
|
8
28
|
## 2.5.2 (2023-04-24)
|
9
29
|
- [Fix] Require missing Pathname (#345)
|
data/Gemfile.lock
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
waterdrop (2.
|
5
|
-
karafka-core (>= 2.0.
|
4
|
+
waterdrop (2.6.1.beta1)
|
5
|
+
karafka-core (>= 2.1.0.beta1, < 3.0.0)
|
6
6
|
zeitwerk (~> 2.3)
|
7
7
|
|
8
8
|
GEM
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
|
-
activesupport (7.0.
|
11
|
+
activesupport (7.0.5)
|
12
12
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
13
13
|
i18n (>= 1.6, < 2)
|
14
14
|
minitest (>= 5.1)
|
@@ -20,25 +20,25 @@ GEM
|
|
20
20
|
factory_bot (6.2.1)
|
21
21
|
activesupport (>= 5.0.0)
|
22
22
|
ffi (1.15.5)
|
23
|
-
i18n (1.
|
23
|
+
i18n (1.14.1)
|
24
24
|
concurrent-ruby (~> 1.0)
|
25
|
-
karafka-core (2.0.
|
25
|
+
karafka-core (2.1.0.beta1)
|
26
26
|
concurrent-ruby (>= 1.1)
|
27
|
-
karafka-rdkafka (>= 0.
|
28
|
-
karafka-rdkafka (0.
|
27
|
+
karafka-rdkafka (>= 0.13.0.beta2, < 0.14.0)
|
28
|
+
karafka-rdkafka (0.13.0.beta2)
|
29
29
|
ffi (~> 1.15)
|
30
30
|
mini_portile2 (~> 2.6)
|
31
31
|
rake (> 12)
|
32
|
-
mini_portile2 (2.8.
|
32
|
+
mini_portile2 (2.8.2)
|
33
33
|
minitest (5.18.0)
|
34
34
|
rake (13.0.6)
|
35
35
|
rspec (3.12.0)
|
36
36
|
rspec-core (~> 3.12.0)
|
37
37
|
rspec-expectations (~> 3.12.0)
|
38
38
|
rspec-mocks (~> 3.12.0)
|
39
|
-
rspec-core (3.12.
|
39
|
+
rspec-core (3.12.2)
|
40
40
|
rspec-support (~> 3.12.0)
|
41
|
-
rspec-expectations (3.12.
|
41
|
+
rspec-expectations (3.12.3)
|
42
42
|
diff-lcs (>= 1.2.0, < 2.0)
|
43
43
|
rspec-support (~> 3.12.0)
|
44
44
|
rspec-mocks (3.12.5)
|
@@ -53,10 +53,9 @@ GEM
|
|
53
53
|
simplecov_json_formatter (0.1.4)
|
54
54
|
tzinfo (2.0.6)
|
55
55
|
concurrent-ruby (~> 1.0)
|
56
|
-
zeitwerk (2.6.
|
56
|
+
zeitwerk (2.6.8)
|
57
57
|
|
58
58
|
PLATFORMS
|
59
|
-
arm64-darwin-21
|
60
59
|
x86_64-linux
|
61
60
|
|
62
61
|
DEPENDENCIES
|
data/README.md
CHANGED
@@ -93,15 +93,16 @@ end
|
|
93
93
|
|
94
94
|
Some of the options are:
|
95
95
|
|
96
|
-
| Option | Description
|
97
|
-
|
98
|
-
| `id` | id of the producer for instrumentation and logging
|
99
|
-
| `logger` | Logger that we want to use
|
100
|
-
| `deliver` | Should we send messages to Kafka or just fake the delivery
|
101
|
-
| `max_wait_timeout` | Waits that long for the delivery report or raises an error
|
102
|
-
| `wait_timeout` | Waits that long before re-check of delivery report availability
|
103
|
-
| `wait_on_queue_full` | Should be wait on queue full or raise an error when that happens
|
104
|
-
| `
|
96
|
+
| Option | Description |
|
97
|
+
|------------------------------|----------------------------------------------------------------------------|
|
98
|
+
| `id` | id of the producer for instrumentation and logging |
|
99
|
+
| `logger` | Logger that we want to use |
|
100
|
+
| `deliver` | Should we send messages to Kafka or just fake the delivery |
|
101
|
+
| `max_wait_timeout` | Waits that long for the delivery report or raises an error |
|
102
|
+
| `wait_timeout` | Waits that long before re-check of delivery report availability |
|
103
|
+
| `wait_on_queue_full` | Should be wait on queue full or raise an error when that happens |
|
104
|
+
| `wait_backoff_on_queue_full` | Waits that long before retry when queue is full |
|
105
|
+
| `wait_timeout_on_queue_full` | If back-offs and attempts that that much time, error won't be retried more |
|
105
106
|
|
106
107
|
Full list of the root configuration options is available [here](https://github.com/karafka/waterdrop/blob/master/lib/waterdrop/config.rb#L25).
|
107
108
|
|
@@ -332,6 +333,12 @@ producer.close
|
|
332
333
|
|
333
334
|
See the `WaterDrop::Instrumentation::Notifications::EVENTS` for the list of all the supported events.
|
334
335
|
|
336
|
+
### Karafka Web-UI
|
337
|
+
|
338
|
+
Karafka [Web UI](https://karafka.io/docs/Web-UI-Getting-Started/) is a user interface for the Karafka framework. The Web UI provides a convenient way for monitor all producers related errors out of the box.
|
339
|
+
|
340
|
+
![Example producer errors in Karafka Web-UI](https://raw.githubusercontent.com/karafka/misc/master/printscreens/web-ui/errors-producer.png)
|
341
|
+
|
335
342
|
### Usage statistics
|
336
343
|
|
337
344
|
WaterDrop is configured to emit internal `librdkafka` metrics every five seconds. You can change this by setting the `kafka` `statistics.interval.ms` configuration property to a value greater of equal `0`. Emitted statistics are available after subscribing to the `statistics.emitted` publisher event. If set to `0`, metrics will not be published.
|
data/config/locales/errors.yml
CHANGED
@@ -10,6 +10,9 @@ en:
|
|
10
10
|
max_wait_timeout_format: must be an integer that is equal or bigger than 0
|
11
11
|
kafka_format: must be a hash with symbol based keys
|
12
12
|
kafka_key_must_be_a_symbol: All keys under the kafka settings scope need to be symbols
|
13
|
+
wait_on_queue_full_format: must be boolean
|
14
|
+
wait_backoff_on_queue_full_format: must be a numeric that is bigger or equal to 0
|
15
|
+
wait_timeout_on_queue_full_format: must be a numeric that is bigger or equal to 0
|
13
16
|
|
14
17
|
message:
|
15
18
|
missing: must be present
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Clients
|
5
|
+
# Client used to buffer messages that we send out in specs and other places.
|
6
|
+
class Buffered < Clients::Dummy
|
7
|
+
attr_accessor :messages
|
8
|
+
|
9
|
+
# Sync fake response for the message delivery to Kafka, since we do not dispatch anything
|
10
|
+
class SyncResponse
|
11
|
+
# @param _args Handler wait arguments (irrelevant as waiting is fake here)
|
12
|
+
def wait(*_args)
|
13
|
+
false
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# @param args [Object] anything accepted by `Clients::Dummy`
|
18
|
+
def initialize(*args)
|
19
|
+
super
|
20
|
+
@messages = []
|
21
|
+
@topics = Hash.new { |k, v| k[v] = [] }
|
22
|
+
end
|
23
|
+
|
24
|
+
# "Produces" message to Kafka: it acknowledges it locally, adds it to the internal buffer
|
25
|
+
# @param message [Hash] `WaterDrop::Producer#produce_sync` message hash
|
26
|
+
def produce(message)
|
27
|
+
topic = message.fetch(:topic) { raise ArgumentError, ':topic is missing' }
|
28
|
+
@topics[topic] << message
|
29
|
+
@messages << message
|
30
|
+
SyncResponse.new
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns messages produced to a given topic
|
34
|
+
# @param topic [String]
|
35
|
+
def messages_for(topic)
|
36
|
+
@topics[topic]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Clears internal buffer
|
40
|
+
# Used in between specs so messages do not leak out
|
41
|
+
def reset
|
42
|
+
@messages.clear
|
43
|
+
@topics.each_value(&:clear)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -1,12 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module WaterDrop
|
4
|
-
|
4
|
+
module Clients
|
5
5
|
# A dummy client that is supposed to be used instead of Rdkafka::Producer in case we don't
|
6
|
-
# want to dispatch anything to Kafka
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
# want to dispatch anything to Kafka.
|
7
|
+
#
|
8
|
+
# It does not store anything and just ignores messages.
|
9
|
+
class Dummy
|
10
|
+
# @param _producer [WaterDrop::Producer]
|
11
|
+
# @return [Dummy] dummy instance
|
12
|
+
def initialize(_producer)
|
10
13
|
@counter = -1
|
11
14
|
end
|
12
15
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
# Namespace for all the clients that WaterDrop may use under the hood
|
5
|
+
module Clients
|
6
|
+
# Default Rdkafka client.
|
7
|
+
# Since we use the ::Rdkafka::Producer under the hood, this is just a module that aligns with
|
8
|
+
# client building API for the convenience.
|
9
|
+
module Rdkafka
|
10
|
+
class << self
|
11
|
+
# @param producer [WaterDrop::Producer] producer instance with its config, etc
|
12
|
+
# @note We overwrite this that way, because we do not care
|
13
|
+
def new(producer)
|
14
|
+
client = ::Rdkafka::Config.new(producer.config.kafka.to_h).producer
|
15
|
+
|
16
|
+
# This callback is not global and is per client, thus we do not have to wrap it with a
|
17
|
+
# callbacks manager to make it work
|
18
|
+
client.delivery_callback = Instrumentation::Callbacks::Delivery.new(
|
19
|
+
producer.id,
|
20
|
+
producer.config.monitor
|
21
|
+
)
|
22
|
+
|
23
|
+
client
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/waterdrop/config.rb
CHANGED
@@ -56,15 +56,20 @@ module WaterDrop
|
|
56
56
|
# in the `error.occurred` notification pipeline with a proper type as while this is
|
57
57
|
# recoverable, in a high number it still may mean issues.
|
58
58
|
# Waiting is one of the recommended strategies.
|
59
|
-
setting :wait_on_queue_full, default:
|
59
|
+
setting :wait_on_queue_full, default: true
|
60
60
|
# option [Integer] how long (in seconds) should we backoff before a retry when queue is full
|
61
61
|
# The retry will happen with the same message and backoff should give us some time to
|
62
62
|
# dispatch previously buffered messages.
|
63
|
-
setting :
|
63
|
+
setting :wait_backoff_on_queue_full, default: 0.1
|
64
|
+
# option [Numeric] how many seconds should we wait with the backoff on queue having space for
|
65
|
+
# more messages before re-raising the error.
|
66
|
+
setting :wait_timeout_on_queue_full, default: 10
|
64
67
|
# option [Boolean] should we send messages. Setting this to false can be really useful when
|
65
68
|
# testing and or developing because when set to false, won't actually ping Kafka but will
|
66
69
|
# run all the validations, etc
|
67
70
|
setting :deliver, default: true
|
71
|
+
# option [Class] class for usage when creating the underlying client used to dispatch messages
|
72
|
+
setting :client_class, default: Clients::Rdkafka
|
68
73
|
# rdkafka options
|
69
74
|
# @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
70
75
|
setting :kafka, default: {}
|
@@ -19,6 +19,9 @@ module WaterDrop
|
|
19
19
|
required(:max_wait_timeout) { |val| val.is_a?(Numeric) && val >= 0 }
|
20
20
|
required(:wait_timeout) { |val| val.is_a?(Numeric) && val.positive? }
|
21
21
|
required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
|
22
|
+
required(:wait_on_queue_full) { |val| [true, false].include?(val) }
|
23
|
+
required(:wait_backoff_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
|
24
|
+
required(:wait_timeout_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
|
22
25
|
|
23
26
|
# rdkafka allows both symbols and strings as keys for config but then casts them to strings
|
24
27
|
# This can be confusing, so we expect all keys to be symbolized
|
@@ -10,18 +10,13 @@ module WaterDrop
|
|
10
10
|
# @return [Rdkafka::Producer, Producer::DummyClient] raw rdkafka producer or a dummy producer
|
11
11
|
# when we don't want to dispatch any messages
|
12
12
|
def call(producer, config)
|
13
|
-
|
13
|
+
klass = config.client_class
|
14
|
+
# This allows us to have backwards compatibility.
|
15
|
+
# If it is the default client and delivery is set to false, we use dummy as we used to
|
16
|
+
# before `client_class` was introduced
|
17
|
+
klass = Clients::Dummy if klass == Clients::Rdkafka && !config.deliver
|
14
18
|
|
15
|
-
|
16
|
-
|
17
|
-
# This callback is not global and is per client, thus we do not have to wrap it with a
|
18
|
-
# callbacks manager to make it work
|
19
|
-
client.delivery_callback = Instrumentation::Callbacks::Delivery.new(
|
20
|
-
producer.id,
|
21
|
-
config.monitor
|
22
|
-
)
|
23
|
-
|
24
|
-
client
|
19
|
+
klass.new(producer)
|
25
20
|
end
|
26
21
|
end
|
27
22
|
end
|
data/lib/waterdrop/producer.rb
CHANGED
@@ -7,6 +7,7 @@ module WaterDrop
|
|
7
7
|
include Sync
|
8
8
|
include Async
|
9
9
|
include Buffer
|
10
|
+
include ::Karafka::Core::Helpers::Time
|
10
11
|
|
11
12
|
# Which of the inline flow errors do we want to intercept and re-bind
|
12
13
|
SUPPORTED_FLOW_ERRORS = [
|
@@ -75,13 +76,18 @@ module WaterDrop
|
|
75
76
|
@connecting_mutex.synchronize do
|
76
77
|
return @client if @client && @pid == Process.pid
|
77
78
|
|
78
|
-
# We should raise an error when trying to use a producer from a fork, that is already
|
79
|
-
# connected to Kafka. We allow forking producers only before they are used
|
80
|
-
raise Errors::ProducerUsedInParentProcess, Process.pid if @status.connected?
|
81
|
-
|
82
79
|
# We undefine all the finalizers, in case it was a fork, so the finalizers from the parent
|
83
80
|
# process don't leak
|
84
81
|
ObjectSpace.undefine_finalizer(id)
|
82
|
+
|
83
|
+
# We should raise an error when trying to use a producer with client from a fork. Always.
|
84
|
+
if @client
|
85
|
+
# We need to reset the client, otherwise there might be attempt to close the parent
|
86
|
+
# client
|
87
|
+
@client = nil
|
88
|
+
raise Errors::ProducerUsedInParentProcess, Process.pid
|
89
|
+
end
|
90
|
+
|
85
91
|
# Finalizer tracking is needed for handling shutdowns gracefully.
|
86
92
|
# I don't expect everyone to remember about closing all the producers all the time, thus
|
87
93
|
# this approach is better. Although it is still worth keeping in mind, that this will
|
@@ -137,7 +143,7 @@ module WaterDrop
|
|
137
143
|
# It is safe to run it several times but not exactly the same moment
|
138
144
|
# We also mark it as closed only if it was connected, if not, it would trigger a new
|
139
145
|
# connection that anyhow would be immediately closed
|
140
|
-
client.close
|
146
|
+
client.close if @client
|
141
147
|
|
142
148
|
# Remove callbacks runners that were registered
|
143
149
|
::Karafka::Core::Instrumentation.statistics_callbacks.delete(@id)
|
@@ -168,15 +174,35 @@ module WaterDrop
|
|
168
174
|
@contract.validate!(message, Errors::MessageInvalidError)
|
169
175
|
end
|
170
176
|
|
177
|
+
# Waits on a given handler
|
178
|
+
#
|
179
|
+
# @param handler [Rdkafka::Producer::DeliveryHandle]
|
180
|
+
def wait(handler)
|
181
|
+
handler.wait(
|
182
|
+
max_wait_timeout: @config.max_wait_timeout,
|
183
|
+
wait_timeout: @config.wait_timeout
|
184
|
+
)
|
185
|
+
end
|
186
|
+
|
187
|
+
private
|
188
|
+
|
171
189
|
# Runs the client produce method with a given message
|
172
190
|
#
|
173
191
|
# @param message [Hash] message we want to send
|
174
192
|
def produce(message)
|
193
|
+
produce_time ||= monotonic_now
|
194
|
+
|
175
195
|
client.produce(**message)
|
176
196
|
rescue SUPPORTED_FLOW_ERRORS.first => e
|
177
197
|
# Unless we want to wait and retry and it's a full queue, we raise normally
|
178
198
|
raise unless @config.wait_on_queue_full
|
179
199
|
raise unless e.code == :queue_full
|
200
|
+
# If we're running for longer than the timeout, we need to re-raise the queue full.
|
201
|
+
# This will prevent from situation where cluster is down forever and we just retry and retry
|
202
|
+
# in an infinite loop, effectively hanging the processing
|
203
|
+
raise unless monotonic_now - produce_time < @config.wait_timeout_on_queue_full * 1_000
|
204
|
+
|
205
|
+
label = caller_locations(2, 1)[0].label.split(' ').last
|
180
206
|
|
181
207
|
# We use this syntax here because we want to preserve the original `#cause` when we
|
182
208
|
# instrument the error and there is no way to manually assign `#cause` value. We want to keep
|
@@ -195,25 +221,15 @@ module WaterDrop
|
|
195
221
|
producer_id: id,
|
196
222
|
message: message,
|
197
223
|
error: e,
|
198
|
-
type:
|
224
|
+
type: "message.#{label}"
|
199
225
|
)
|
200
226
|
|
201
227
|
# We do not poll the producer because polling happens in a background thread
|
202
228
|
# It also should not be a frequent case (queue full), hence it's ok to just throttle.
|
203
|
-
sleep @config.
|
229
|
+
sleep @config.wait_backoff_on_queue_full
|
204
230
|
end
|
205
231
|
|
206
232
|
retry
|
207
233
|
end
|
208
|
-
|
209
|
-
# Waits on a given handler
|
210
|
-
#
|
211
|
-
# @param handler [Rdkafka::Producer::DeliveryHandle]
|
212
|
-
def wait(handler)
|
213
|
-
handler.wait(
|
214
|
-
max_wait_timeout: @config.max_wait_timeout,
|
215
|
-
wait_timeout: @config.wait_timeout
|
216
|
-
)
|
217
|
-
end
|
218
234
|
end
|
219
235
|
end
|
data/lib/waterdrop/version.rb
CHANGED
data/waterdrop.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.description = spec.summary
|
17
17
|
spec.license = 'MIT'
|
18
18
|
|
19
|
-
spec.add_dependency 'karafka-core', '>= 2.0.
|
19
|
+
spec.add_dependency 'karafka-core', '>= 2.1.0.beta1', '< 3.0.0'
|
20
20
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
21
21
|
|
22
22
|
if $PROGRAM_NAME.end_with?('gem')
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: waterdrop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.6.1.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
|
36
36
|
MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2023-
|
38
|
+
date: 2023-06-17 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
requirements:
|
44
44
|
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: 2.0.
|
46
|
+
version: 2.1.0.beta1
|
47
47
|
- - "<"
|
48
48
|
- !ruby/object:Gem::Version
|
49
49
|
version: 3.0.0
|
@@ -53,7 +53,7 @@ dependencies:
|
|
53
53
|
requirements:
|
54
54
|
- - ">="
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: 2.0.
|
56
|
+
version: 2.1.0.beta1
|
57
57
|
- - "<"
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: 3.0.0
|
@@ -95,6 +95,9 @@ files:
|
|
95
95
|
- config/locales/errors.yml
|
96
96
|
- docker-compose.yml
|
97
97
|
- lib/waterdrop.rb
|
98
|
+
- lib/waterdrop/clients/buffered.rb
|
99
|
+
- lib/waterdrop/clients/dummy.rb
|
100
|
+
- lib/waterdrop/clients/rdkafka.rb
|
98
101
|
- lib/waterdrop/config.rb
|
99
102
|
- lib/waterdrop/contracts.rb
|
100
103
|
- lib/waterdrop/contracts/config.rb
|
@@ -109,14 +112,10 @@ files:
|
|
109
112
|
- lib/waterdrop/instrumentation/vendors/datadog/dashboard.json
|
110
113
|
- lib/waterdrop/instrumentation/vendors/datadog/metrics_listener.rb
|
111
114
|
- lib/waterdrop/middleware.rb
|
112
|
-
- lib/waterdrop/patches/rdkafka/client.rb
|
113
|
-
- lib/waterdrop/patches/rdkafka/metadata.rb
|
114
|
-
- lib/waterdrop/patches/rdkafka/producer.rb
|
115
115
|
- lib/waterdrop/producer.rb
|
116
116
|
- lib/waterdrop/producer/async.rb
|
117
117
|
- lib/waterdrop/producer/buffer.rb
|
118
118
|
- lib/waterdrop/producer/builder.rb
|
119
|
-
- lib/waterdrop/producer/dummy_client.rb
|
120
119
|
- lib/waterdrop/producer/status.rb
|
121
120
|
- lib/waterdrop/producer/sync.rb
|
122
121
|
- lib/waterdrop/version.rb
|
@@ -145,9 +144,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
145
144
|
version: '0'
|
146
145
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
147
146
|
requirements:
|
148
|
-
- - "
|
147
|
+
- - ">"
|
149
148
|
- !ruby/object:Gem::Version
|
150
|
-
version:
|
149
|
+
version: 1.3.1
|
151
150
|
requirements: []
|
152
151
|
rubygems_version: 3.4.10
|
153
152
|
signing_key:
|
metadata.gz.sig
CHANGED
@@ -1,34 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module WaterDrop
|
4
|
-
module Patches
|
5
|
-
module Rdkafka
|
6
|
-
# Patches for the producer client
|
7
|
-
module Client
|
8
|
-
# @param _object_id [nil] rdkafka API compatibility argument
|
9
|
-
# @param timeout_ms [Integer] final flush timeout in ms
|
10
|
-
def close(_object_id = nil, timeout_ms = 5_000)
|
11
|
-
return unless @native
|
12
|
-
|
13
|
-
# Indicate to polling thread that we're closing
|
14
|
-
@polling_thread[:closing] = true
|
15
|
-
# Wait for the polling thread to finish up
|
16
|
-
@polling_thread.join
|
17
|
-
|
18
|
-
::Rdkafka::Bindings.rd_kafka_flush(@native, timeout_ms)
|
19
|
-
::Rdkafka::Bindings.rd_kafka_destroy(@native)
|
20
|
-
|
21
|
-
@native = nil
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
::Rdkafka::Bindings.attach_function(
|
29
|
-
:rd_kafka_flush,
|
30
|
-
%i[pointer int],
|
31
|
-
:void
|
32
|
-
)
|
33
|
-
|
34
|
-
Rdkafka::Producer::Client.prepend WaterDrop::Patches::Rdkafka::Client
|
@@ -1,45 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module WaterDrop
|
4
|
-
# Patches to external components
|
5
|
-
module Patches
|
6
|
-
# Rdkafka related patches
|
7
|
-
module Rdkafka
|
8
|
-
# Rdkafka::Metadata patches
|
9
|
-
module Metadata
|
10
|
-
# Errors upon which we retry the metadata fetch
|
11
|
-
RETRIED_ERRORS = %i[
|
12
|
-
timed_out
|
13
|
-
leader_not_available
|
14
|
-
].freeze
|
15
|
-
|
16
|
-
private_constant :RETRIED_ERRORS
|
17
|
-
|
18
|
-
# We overwrite this method because there were reports of metadata operation timing out
|
19
|
-
# when Kafka was under stress. While the messages dispatch will be retried, metadata
|
20
|
-
# fetch happens prior to that, effectively crashing the process. Metadata fetch was not
|
21
|
-
# being retried at all.
|
22
|
-
#
|
23
|
-
# @param args [Array<Object>] all the metadata original arguments
|
24
|
-
def initialize(*args)
|
25
|
-
attempt ||= 0
|
26
|
-
attempt += 1
|
27
|
-
|
28
|
-
super(*args)
|
29
|
-
rescue ::Rdkafka::RdkafkaError => e
|
30
|
-
raise unless RETRIED_ERRORS.include?(e.code)
|
31
|
-
raise if attempt > 10
|
32
|
-
|
33
|
-
backoff_factor = 2**attempt
|
34
|
-
timeout = backoff_factor * 0.1
|
35
|
-
|
36
|
-
sleep(timeout)
|
37
|
-
|
38
|
-
retry
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
::Rdkafka::Metadata.prepend ::WaterDrop::Patches::Rdkafka::Metadata
|
@@ -1,86 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module WaterDrop
|
4
|
-
# Patches to external components
|
5
|
-
module Patches
|
6
|
-
# Rdkafka related patches
|
7
|
-
module Rdkafka
|
8
|
-
# Rdkafka::Producer patches
|
9
|
-
module Producer
|
10
|
-
include ::Karafka::Core::Helpers::Time
|
11
|
-
|
12
|
-
# Cache partitions count for 30 seconds
|
13
|
-
PARTITIONS_COUNT_TTL = 30_000
|
14
|
-
|
15
|
-
private_constant :PARTITIONS_COUNT_TTL
|
16
|
-
|
17
|
-
# @param args [Object] arguments accepted by the original rdkafka producer
|
18
|
-
def initialize(*args)
|
19
|
-
super
|
20
|
-
|
21
|
-
@_partitions_count_cache = Concurrent::Hash.new do |cache, topic|
|
22
|
-
topic_metadata = ::Rdkafka::Metadata.new(inner_kafka, topic).topics&.first
|
23
|
-
|
24
|
-
cache[topic] = [
|
25
|
-
monotonic_now,
|
26
|
-
topic_metadata ? topic_metadata[:partition_count] : nil
|
27
|
-
]
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
# Adds a method that allows us to get the native kafka producer name
|
32
|
-
#
|
33
|
-
# In between rdkafka versions, there are internal changes that force us to add some extra
|
34
|
-
# magic to support all the versions.
|
35
|
-
#
|
36
|
-
# @return [String] producer instance name
|
37
|
-
def name
|
38
|
-
@_name ||= ::Rdkafka::Bindings.rd_kafka_name(inner_kafka)
|
39
|
-
end
|
40
|
-
|
41
|
-
# This patch makes sure we cache the partition count for a given topic for given time
|
42
|
-
# This prevents us in case someone uses `partition_key` from querying for the count with
|
43
|
-
# each message. Instead we query once every 30 seconds at most
|
44
|
-
#
|
45
|
-
# @param topic [String] topic name
|
46
|
-
# @return [Integer] partition count for a given topic
|
47
|
-
def partition_count(topic)
|
48
|
-
closed_producer_check(__method__)
|
49
|
-
|
50
|
-
@_partitions_count_cache.delete_if do |_, cached|
|
51
|
-
monotonic_now - cached.first > PARTITIONS_COUNT_TTL
|
52
|
-
end
|
53
|
-
|
54
|
-
@_partitions_count_cache[topic].last
|
55
|
-
end
|
56
|
-
|
57
|
-
# @return [FFI::Pointer] pointer to the raw librdkafka
|
58
|
-
def inner_kafka
|
59
|
-
unless @_inner_kafka
|
60
|
-
version = ::Gem::Version.new(::Rdkafka::VERSION)
|
61
|
-
|
62
|
-
if version < ::Gem::Version.new('0.12.0')
|
63
|
-
@_inner_kafka = @native_kafka
|
64
|
-
elsif version < ::Gem::Version.new('0.13.0.beta.1')
|
65
|
-
@_inner_kafka = @client.native
|
66
|
-
else
|
67
|
-
@_inner_kafka = @native_kafka.inner
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
@_inner_kafka
|
72
|
-
end
|
73
|
-
|
74
|
-
# Closes our librdkafka instance with the flush patch
|
75
|
-
# @param timeout_ms [Integer] flush timeout
|
76
|
-
def close(timeout_ms = 5_000)
|
77
|
-
ObjectSpace.undefine_finalizer(self)
|
78
|
-
|
79
|
-
@client.close(nil, timeout_ms)
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
::Rdkafka::Producer.prepend ::WaterDrop::Patches::Rdkafka::Producer
|