waterdrop 2.6.14 → 2.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +5 -16
- data/.ruby-version +1 -1
- data/CHANGELOG.md +128 -0
- data/Gemfile.lock +17 -20
- data/config/locales/errors.yml +8 -3
- data/docker-compose.yml +1 -1
- data/lib/waterdrop/clients/rdkafka.rb +34 -4
- data/lib/waterdrop/config.rb +22 -11
- data/lib/waterdrop/contracts/config.rb +11 -1
- data/lib/waterdrop/instrumentation/callbacks/delivery.rb +11 -0
- data/lib/waterdrop/instrumentation/callbacks/error.rb +11 -0
- data/lib/waterdrop/instrumentation/callbacks/oauthbearer_token_refresh.rb +49 -0
- data/lib/waterdrop/instrumentation/callbacks/statistics.rb +11 -0
- data/lib/waterdrop/instrumentation/notifications.rb +2 -0
- data/lib/waterdrop/instrumentation/vendors/datadog/metrics_listener.rb +1 -1
- data/lib/waterdrop/producer/buffer.rb +0 -6
- data/lib/waterdrop/producer/sync.rb +2 -2
- data/lib/waterdrop/producer/transactions.rb +7 -3
- data/lib/waterdrop/producer.rb +30 -31
- data/lib/waterdrop/version.rb +1 -1
- data/waterdrop.gemspec +3 -1
- data.tar.gz.sig +0 -0
- metadata +7 -6
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5308262b20199b02906783387f294a58beb01fa8850db3db19bb7be39395121a
|
4
|
+
data.tar.gz: d35c18c4b7352c20c8eeb623f54581476f108cb656912a571ef067cc796e884c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ac6693e44080e4edf9b201a5e735b283bb7fa81d36ae10bf0d7501faa00e5f099917144966beb7febc4b95c50ed78feb7c659e59753a78e4495111e3d00af322
|
7
|
+
data.tar.gz: 100439b79cc59bd668f40e4fed8086c49f13bfedebb68409981d8c70a39c692eb2b4f453d9a45c367f2612578b2c6ac8303bd56acdbbd27d3a02d5aab803d57a
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/.github/workflows/ci.yml
CHANGED
@@ -22,7 +22,6 @@ jobs:
|
|
22
22
|
- '3.2'
|
23
23
|
- '3.1'
|
24
24
|
- '3.0'
|
25
|
-
- '2.7'
|
26
25
|
include:
|
27
26
|
- ruby: '3.3'
|
28
27
|
coverage: 'true'
|
@@ -39,9 +38,9 @@ jobs:
|
|
39
38
|
bundler-cache: true
|
40
39
|
bundler: 'latest'
|
41
40
|
|
42
|
-
- name: Run Kafka with docker
|
41
|
+
- name: Run Kafka with docker compose
|
43
42
|
run: |
|
44
|
-
docker
|
43
|
+
docker compose up -d || (sleep 5 && docker compose up -d)
|
45
44
|
|
46
45
|
- name: Wait for Kafka
|
47
46
|
run: |
|
@@ -49,25 +48,15 @@ jobs:
|
|
49
48
|
|
50
49
|
- name: Install latest bundler
|
51
50
|
run: |
|
52
|
-
|
53
|
-
|
54
|
-
gem update --system 3.4.22 --no-document
|
55
|
-
else
|
56
|
-
gem install bundler --no-document
|
57
|
-
gem update --system --no-document
|
58
|
-
fi
|
51
|
+
gem install bundler --no-document
|
52
|
+
gem update --system --no-document
|
59
53
|
|
60
54
|
bundle config set without 'tools benchmarks docs'
|
61
55
|
|
62
56
|
- name: Bundle install
|
63
57
|
run: |
|
64
58
|
bundle config set without development
|
65
|
-
|
66
|
-
if [[ "$(ruby -v | awk '{print $2}')" == 2.7.8* ]]; then
|
67
|
-
BUNDLER_VERSION=2.4.22 bundle install --jobs 4 --retry 3
|
68
|
-
else
|
69
|
-
bundle install --jobs 4 --retry 3
|
70
|
-
fi
|
59
|
+
bundle install --jobs 4 --retry 3
|
71
60
|
|
72
61
|
- name: Run all tests
|
73
62
|
env:
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.3.
|
1
|
+
3.3.1
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,133 @@
|
|
1
1
|
# WaterDrop changelog
|
2
2
|
|
3
|
+
## 2.7.0 (2024-04-26)
|
4
|
+
|
5
|
+
This release contains **BREAKING** changes. Make sure to read and apply upgrade notes.
|
6
|
+
|
7
|
+
- **[Feature]** Support custom OAuth providers.
|
8
|
+
- **[Breaking]** Drop Ruby `2.7` support.
|
9
|
+
- **[Breaking]** Change default timeouts so final delivery `message.timeout.ms` is less that `max_wait_time` so we do not end up with not final verdict.
|
10
|
+
- **[Breaking]** Update all the time related configuration settings to be in `ms` and not mixed.
|
11
|
+
- **[Breaking]** Remove no longer needed `wait_timeout` configuration option.
|
12
|
+
- **[Breaking]** Do **not** validate or morph (via middleware) messages added to the buffer prior to `flush_sync` or `flush_async`.
|
13
|
+
- [Enhancement] Provide `WaterDrop::Producer#transaction?` that returns only when producer has an active transaction running.
|
14
|
+
- [Enhancement] Introduce `instrument_on_wait_queue_full` flag (defaults to `true`) to be able to configure whether non critical (retryable) queue full errors should be instrumented in the error pipeline. Useful when building high-performance pipes with WaterDrop queue retry backoff as a throttler.
|
15
|
+
- [Enhancement] Protect critical `rdkafka` thread executable code sections.
|
16
|
+
- [Enhancement] Treat the queue size as a gauge rather than a cumulative stat (isturdy).
|
17
|
+
- [Fix] Fix a case where purge on non-initialized client would crash.
|
18
|
+
- [Fix] Middlewares run twice when using buffered produce.
|
19
|
+
- [Fix] Validations run twice when using buffered produce.
|
20
|
+
|
21
|
+
### Upgrade Notes
|
22
|
+
|
23
|
+
**PLEASE MAKE SURE TO READ AND APPLY THEM!**
|
24
|
+
|
25
|
+
#### `wait_timeout` Configuration No Longer Needed
|
26
|
+
|
27
|
+
The `wait_timeout` WaterDrop configuration option is no longer needed. You can safely remove it.
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
producer = WaterDrop::Producer.new
|
31
|
+
|
32
|
+
producer.setup do |config|
|
33
|
+
# Other config...
|
34
|
+
|
35
|
+
# Remove this, no longer needed
|
36
|
+
config.wait_timeout = 30
|
37
|
+
end
|
38
|
+
```
|
39
|
+
|
40
|
+
#### Time Settings Format Alignment
|
41
|
+
|
42
|
+
**All** time-related values are now configured in milliseconds instead of some being in seconds and some in milliseconds.
|
43
|
+
|
44
|
+
The values that were changed from seconds to milliseconds are:
|
45
|
+
|
46
|
+
- `max_wait_timeout`
|
47
|
+
- `wait_backoff_on_queue_full`
|
48
|
+
- `wait_timeout_on_queue_full`
|
49
|
+
- `wait_backoff_on_transaction_command, default`
|
50
|
+
|
51
|
+
If you have configured any of those yourself, please replace the seconds representation with milliseconds:
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
producer = WaterDrop::Producer.new
|
55
|
+
|
56
|
+
producer.setup do |config|
|
57
|
+
config.deliver = true
|
58
|
+
|
59
|
+
# Replace this:
|
60
|
+
config.max_wait_timeout = 30
|
61
|
+
|
62
|
+
# With
|
63
|
+
config.max_wait_timeout = 30_000
|
64
|
+
# ...
|
65
|
+
end
|
66
|
+
```
|
67
|
+
|
68
|
+
#### Defaults Alignment
|
69
|
+
|
70
|
+
In this release, we've updated our default settings to address a crucial issue: previous defaults could lead to inconclusive outcomes in synchronous operations due to wait timeout errors. Users often mistakenly believed that a message dispatch was halted because of these errors when, in fact, the timeout was related to awaiting the final dispatch verdict, not the dispatch action itself.
|
71
|
+
|
72
|
+
The new defaults in WaterDrop 2.7.0 eliminate this confusion by ensuring synchronous operation results are always transparent and conclusive. This change aims to provide a straightforward understanding of wait timeout errors, reinforcing that they reflect the wait state, not the dispatch success.
|
73
|
+
|
74
|
+
Below, you can find a table with what has changed, the new defaults, and the current ones in case you want to retain the previous behavior:
|
75
|
+
|
76
|
+
<table>
|
77
|
+
<thead>
|
78
|
+
<tr>
|
79
|
+
<th>Config</th>
|
80
|
+
<th>Previous Default</th>
|
81
|
+
<th>New Default</th>
|
82
|
+
</tr>
|
83
|
+
</thead>
|
84
|
+
<tbody>
|
85
|
+
<tr>
|
86
|
+
<td>root <code>max_wait_timeout</code></td>
|
87
|
+
<td>5000 ms (5 seconds)</td>
|
88
|
+
<td>60000 ms (60 seconds)</td>
|
89
|
+
</tr>
|
90
|
+
<tr>
|
91
|
+
<td>kafka <code>message.timeout.ms</code></td>
|
92
|
+
<td>300000 ms (5 minutes)</td>
|
93
|
+
<td>50000 ms (50 seconds)</td>
|
94
|
+
</tr>
|
95
|
+
<tr>
|
96
|
+
<td>kafka <code>transaction.timeout.ms</code></td>
|
97
|
+
<td>60000 ms (1 minute)</td>
|
98
|
+
<td>55000 ms (55 seconds)</td>
|
99
|
+
</tr>
|
100
|
+
</tbody>
|
101
|
+
</table>
|
102
|
+
|
103
|
+
This alignment ensures that when using sync operations or invoking `#wait`, any exception you get should give you a conclusive and final delivery verdict.
|
104
|
+
|
105
|
+
#### Buffering No Longer Early Validates Messages
|
106
|
+
|
107
|
+
As of version `2.7.0`, WaterDrop has changed how message buffering works. Previously, messages underwent validation and middleware processing when they were buffered. Now, these steps are deferred until just before dispatching the messages. The buffer functions strictly as a thread-safe storage area without performing any validations or middleware operations until the messages are ready to be sent.
|
108
|
+
|
109
|
+
This adjustment was made primarily to ensure that middleware runs and validations are applied when most relevant—shortly before message dispatch. This approach addresses potential issues with buffers that might hold messages for extended periods:
|
110
|
+
|
111
|
+
- **Temporal Relevance**: Validating and processing messages near their dispatch time helps ensure that actions such as partition assignments reflect the current system state. This is crucial in dynamic environments where system states are subject to rapid changes.
|
112
|
+
|
113
|
+
- **Stale State Management**: By delaying validations and middleware to the dispatch phase, the system minimizes the risk of acting on outdated information, which could lead to incorrect processing or partitioning decisions.
|
114
|
+
|
115
|
+
```ruby
|
116
|
+
# Prior to 2.7.0 this would raise an error
|
117
|
+
producer.buffer(topic: nil, payload: '')
|
118
|
+
# => WaterDrop::Errors::MessageInvalidError
|
119
|
+
|
120
|
+
# After 2.7.0 buffer will not, but flush_async will
|
121
|
+
producer.buffer(topic: nil, payload: '')
|
122
|
+
# => all good here
|
123
|
+
producer.flush_async(topic: nil, payload: '')
|
124
|
+
# => WaterDrop::Errors::MessageInvalidError
|
125
|
+
```
|
126
|
+
|
127
|
+
#### Middleware Execution Prior to Flush When Buffering
|
128
|
+
|
129
|
+
The timing of middleware execution has been adjusted. Middleware, which was previously run when messages were added to the buffer, will now only execute immediately before the messages are flushed from the buffer and dispatched. This change is similar to the validation-related changes.
|
130
|
+
|
3
131
|
## 2.6.14 (2024-02-06)
|
4
132
|
- [Enhancement] Instrument `producer.connected` and `producer.closing` lifecycle events.
|
5
133
|
|
data/Gemfile.lock
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
waterdrop (2.
|
5
|
-
karafka-core (>= 2.
|
4
|
+
waterdrop (2.7.0)
|
5
|
+
karafka-core (>= 2.4.0, < 3.0.0)
|
6
6
|
zeitwerk (~> 2.3)
|
7
7
|
|
8
8
|
GEM
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
|
-
activesupport (7.1.3)
|
11
|
+
activesupport (7.1.3.2)
|
12
12
|
base64
|
13
13
|
bigdecimal
|
14
14
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
@@ -19,30 +19,28 @@ GEM
|
|
19
19
|
mutex_m
|
20
20
|
tzinfo (~> 2.0)
|
21
21
|
base64 (0.2.0)
|
22
|
-
bigdecimal (3.1.
|
22
|
+
bigdecimal (3.1.7)
|
23
23
|
byebug (11.1.3)
|
24
24
|
concurrent-ruby (1.2.3)
|
25
25
|
connection_pool (2.4.1)
|
26
26
|
diff-lcs (1.5.1)
|
27
27
|
docile (1.4.0)
|
28
|
-
drb (2.2.
|
29
|
-
|
30
|
-
factory_bot (6.4.5)
|
28
|
+
drb (2.2.1)
|
29
|
+
factory_bot (6.4.6)
|
31
30
|
activesupport (>= 5.0.0)
|
32
31
|
ffi (1.16.3)
|
33
|
-
i18n (1.14.
|
32
|
+
i18n (1.14.4)
|
34
33
|
concurrent-ruby (~> 1.0)
|
35
|
-
karafka-core (2.
|
36
|
-
|
37
|
-
|
38
|
-
karafka-rdkafka (0.14.7)
|
34
|
+
karafka-core (2.4.0)
|
35
|
+
karafka-rdkafka (>= 0.15.0, < 0.16.0)
|
36
|
+
karafka-rdkafka (0.15.0)
|
39
37
|
ffi (~> 1.15)
|
40
38
|
mini_portile2 (~> 2.6)
|
41
39
|
rake (> 12)
|
42
|
-
mini_portile2 (2.8.
|
43
|
-
minitest (5.
|
40
|
+
mini_portile2 (2.8.6)
|
41
|
+
minitest (5.22.3)
|
44
42
|
mutex_m (0.2.0)
|
45
|
-
rake (13.1
|
43
|
+
rake (13.2.1)
|
46
44
|
rspec (3.13.0)
|
47
45
|
rspec-core (~> 3.13.0)
|
48
46
|
rspec-expectations (~> 3.13.0)
|
@@ -55,8 +53,7 @@ GEM
|
|
55
53
|
rspec-mocks (3.13.0)
|
56
54
|
diff-lcs (>= 1.2.0, < 2.0)
|
57
55
|
rspec-support (~> 3.13.0)
|
58
|
-
rspec-support (3.13.
|
59
|
-
ruby2_keywords (0.0.5)
|
56
|
+
rspec-support (3.13.1)
|
60
57
|
simplecov (0.22.0)
|
61
58
|
docile (~> 1.1)
|
62
59
|
simplecov-html (~> 0.11)
|
@@ -65,10 +62,10 @@ GEM
|
|
65
62
|
simplecov_json_formatter (0.1.4)
|
66
63
|
tzinfo (2.0.6)
|
67
64
|
concurrent-ruby (~> 1.0)
|
68
|
-
zeitwerk (2.6.
|
65
|
+
zeitwerk (2.6.13)
|
69
66
|
|
70
67
|
PLATFORMS
|
71
|
-
|
68
|
+
arm64-darwin-22
|
72
69
|
x86_64-linux
|
73
70
|
|
74
71
|
DEPENDENCIES
|
@@ -79,4 +76,4 @@ DEPENDENCIES
|
|
79
76
|
waterdrop!
|
80
77
|
|
81
78
|
BUNDLED WITH
|
82
|
-
2.5.
|
79
|
+
2.5.9
|
data/config/locales/errors.yml
CHANGED
@@ -4,15 +4,20 @@ en:
|
|
4
4
|
missing: must be present
|
5
5
|
logger_format: must be present
|
6
6
|
deliver_format: must be boolean
|
7
|
+
instrument_on_wait_queue_full_format: must be boolean
|
7
8
|
id_format: must be a non-empty string
|
9
|
+
monitor_format: must be present
|
10
|
+
client_class_format: must be present
|
8
11
|
max_payload_size_format: must be an integer that is equal or bigger than 1
|
9
|
-
wait_timeout_format: must be a numeric that is bigger than 0
|
10
12
|
max_wait_timeout_format: must be an integer that is equal or bigger than 0
|
11
13
|
kafka_format: must be a hash with symbol based keys
|
12
14
|
kafka_key_must_be_a_symbol: All keys under the kafka settings scope need to be symbols
|
13
15
|
wait_on_queue_full_format: must be boolean
|
14
|
-
wait_backoff_on_queue_full_format: must be a numeric that is
|
15
|
-
wait_timeout_on_queue_full_format: must be a numeric that is
|
16
|
+
wait_backoff_on_queue_full_format: must be a numeric that is equal or bigger to 0
|
17
|
+
wait_timeout_on_queue_full_format: must be a numeric that is equal or bigger to 0
|
18
|
+
wait_backoff_on_transaction_command_format: must be a numeric that is equal or bigger to 0
|
19
|
+
max_attempts_on_transaction_command_format: must be an integer that is equal or bigger than 1
|
20
|
+
oauth.token_provider_listener_format: 'must be false or respond to #on_oauthbearer_token_refresh'
|
16
21
|
|
17
22
|
message:
|
18
23
|
missing: must be present
|
data/docker-compose.yml
CHANGED
@@ -11,20 +11,50 @@ module WaterDrop
|
|
11
11
|
# @param producer [WaterDrop::Producer] producer instance with its config, etc
|
12
12
|
# @note We overwrite this that way, because we do not care
|
13
13
|
def new(producer)
|
14
|
-
|
14
|
+
kafka_config = producer.config.kafka.to_h
|
15
|
+
monitor = producer.config.monitor
|
15
16
|
|
16
|
-
client = ::Rdkafka::Config.new(
|
17
|
+
client = ::Rdkafka::Config.new(kafka_config).producer(native_kafka_auto_start: false)
|
18
|
+
|
19
|
+
# Register statistics runner for this particular type of callbacks
|
20
|
+
::Karafka::Core::Instrumentation.statistics_callbacks.add(
|
21
|
+
producer.id,
|
22
|
+
Instrumentation::Callbacks::Statistics.new(producer.id, client.name, monitor)
|
23
|
+
)
|
24
|
+
|
25
|
+
# Register error tracking callback
|
26
|
+
::Karafka::Core::Instrumentation.error_callbacks.add(
|
27
|
+
producer.id,
|
28
|
+
Instrumentation::Callbacks::Error.new(producer.id, client.name, monitor)
|
29
|
+
)
|
30
|
+
|
31
|
+
# Register oauth bearer refresh for this particular type of callbacks
|
32
|
+
::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.add(
|
33
|
+
producer.id,
|
34
|
+
Instrumentation::Callbacks::OauthbearerTokenRefresh.new(client, monitor)
|
35
|
+
)
|
17
36
|
|
18
37
|
# This callback is not global and is per client, thus we do not have to wrap it with a
|
19
38
|
# callbacks manager to make it work
|
20
39
|
client.delivery_callback = Instrumentation::Callbacks::Delivery.new(
|
21
40
|
producer.id,
|
22
41
|
producer.transactional?,
|
23
|
-
|
42
|
+
monitor
|
24
43
|
)
|
25
44
|
|
45
|
+
oauth_listener = producer.config.oauth.token_provider_listener
|
46
|
+
# We need to subscribe the oauth listener here because we want it to be ready before
|
47
|
+
# any producer callbacks run. In theory because WaterDrop rdkafka producer is lazy loaded
|
48
|
+
# we would have enough time to make user subscribe it himself, but then it would not
|
49
|
+
# coop with auto-configuration coming from Karafka. The way it is done below, if it is
|
50
|
+
# configured it will be subscribed and if not, user always can subscribe it himself as
|
51
|
+
# long as it is done prior to first usage
|
52
|
+
monitor.subscribe(oauth_listener) if oauth_listener
|
53
|
+
|
54
|
+
client.start
|
55
|
+
|
26
56
|
# Switch to the transactional mode if user provided the transactional id
|
27
|
-
client.init_transactions if
|
57
|
+
client.init_transactions if kafka_config.key?(:'transactional.id')
|
28
58
|
|
29
59
|
client
|
30
60
|
end
|
data/lib/waterdrop/config.rb
CHANGED
@@ -12,7 +12,12 @@ module WaterDrop
|
|
12
12
|
'client.id': 'waterdrop',
|
13
13
|
# emit librdkafka statistics every five seconds. This is used in instrumentation.
|
14
14
|
# When disabled, part of metrics will not be published and available.
|
15
|
-
'statistics.interval.ms': 5_000
|
15
|
+
'statistics.interval.ms': 5_000,
|
16
|
+
# We set it to a value that is lower than `max_wait_timeout` to have a final verdict upon
|
17
|
+
# sync delivery
|
18
|
+
'message.timeout.ms': 50_000,
|
19
|
+
# Must be more or equal to `message.timeout.ms` defaults
|
20
|
+
'transaction.timeout.ms': 55_000
|
16
21
|
}.freeze
|
17
22
|
|
18
23
|
private_constant :KAFKA_DEFAULTS
|
@@ -44,12 +49,8 @@ module WaterDrop
|
|
44
49
|
# option [Integer] max payload size allowed for delivery to Kafka
|
45
50
|
setting :max_payload_size, default: 1_000_012
|
46
51
|
# option [Integer] Wait that long for the delivery report or raise an error if this takes
|
47
|
-
# longer than the timeout.
|
48
|
-
setting :max_wait_timeout, default:
|
49
|
-
# option [Numeric] how long should we wait between re-checks on the availability of the
|
50
|
-
# delivery report. In a really robust systems, this describes the min-delivery time
|
51
|
-
# for a single sync message when produced in isolation
|
52
|
-
setting :wait_timeout, default: 0.005 # 5 milliseconds
|
52
|
+
# longer than the timeout ms.
|
53
|
+
setting :max_wait_timeout, default: 60_000
|
53
54
|
# option [Boolean] should we upon detecting full librdkafka queue backoff and retry or should
|
54
55
|
# we raise an exception.
|
55
56
|
# When this is set to `true`, upon full queue, we won't raise an error. There will be error
|
@@ -60,12 +61,14 @@ module WaterDrop
|
|
60
61
|
# option [Integer] how long (in seconds) should we backoff before a retry when queue is full
|
61
62
|
# The retry will happen with the same message and backoff should give us some time to
|
62
63
|
# dispatch previously buffered messages.
|
63
|
-
setting :wait_backoff_on_queue_full, default:
|
64
|
-
# option [Numeric] how many
|
64
|
+
setting :wait_backoff_on_queue_full, default: 100
|
65
|
+
# option [Numeric] how many ms should we wait with the backoff on queue having space for
|
65
66
|
# more messages before re-raising the error.
|
66
|
-
setting :wait_timeout_on_queue_full, default:
|
67
|
+
setting :wait_timeout_on_queue_full, default: 10_000
|
68
|
+
# option [Boolean] should we instrument non-critical, retryable queue full errors
|
69
|
+
setting :instrument_on_wait_queue_full, default: true
|
67
70
|
# option [Numeric] How long to wait before retrying a retryable transaction related error
|
68
|
-
setting :wait_backoff_on_transaction_command, default:
|
71
|
+
setting :wait_backoff_on_transaction_command, default: 500
|
69
72
|
# option [Numeric] How many times to retry a retryable transaction related error before
|
70
73
|
# giving up
|
71
74
|
setting :max_attempts_on_transaction_command, default: 5
|
@@ -86,6 +89,14 @@ module WaterDrop
|
|
86
89
|
constructor: ->(middleware) { middleware || WaterDrop::Middleware.new }
|
87
90
|
)
|
88
91
|
|
92
|
+
# Namespace for oauth related configuration
|
93
|
+
setting :oauth do
|
94
|
+
# option [false, #call] Listener for using oauth bearer. This listener will be able to
|
95
|
+
# get the client name to decide whether to use a single multi-client token refreshing
|
96
|
+
# or have separate tokens per instance.
|
97
|
+
setting :token_provider_listener, default: false
|
98
|
+
end
|
99
|
+
|
89
100
|
# Configuration method
|
90
101
|
# @yield Runs a block of code providing a config singleton instance to it
|
91
102
|
# @yieldparam [WaterDrop::Config] WaterDrop config instance
|
@@ -14,14 +14,24 @@ module WaterDrop
|
|
14
14
|
|
15
15
|
required(:id) { |val| val.is_a?(String) && !val.empty? }
|
16
16
|
required(:logger) { |val| !val.nil? }
|
17
|
+
required(:monitor) { |val| !val.nil? }
|
17
18
|
required(:deliver) { |val| [true, false].include?(val) }
|
18
19
|
required(:max_payload_size) { |val| val.is_a?(Integer) && val >= 1 }
|
19
20
|
required(:max_wait_timeout) { |val| val.is_a?(Numeric) && val >= 0 }
|
20
|
-
required(:
|
21
|
+
required(:client_class) { |val| !val.nil? }
|
21
22
|
required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
|
22
23
|
required(:wait_on_queue_full) { |val| [true, false].include?(val) }
|
24
|
+
required(:instrument_on_wait_queue_full) { |val| [true, false].include?(val) }
|
23
25
|
required(:wait_backoff_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
|
24
26
|
required(:wait_timeout_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
|
27
|
+
required(:wait_backoff_on_transaction_command) { |val| val.is_a?(Numeric) && val >= 0 }
|
28
|
+
required(:max_attempts_on_transaction_command) { |val| val.is_a?(Integer) && val >= 1 }
|
29
|
+
|
30
|
+
nested(:oauth) do
|
31
|
+
required(:token_provider_listener) do |val|
|
32
|
+
val == false || val.respond_to?(:on_oauthbearer_token_refresh)
|
33
|
+
end
|
34
|
+
end
|
25
35
|
|
26
36
|
# rdkafka allows both symbols and strings as keys for config but then casts them to strings
|
27
37
|
# This can be confusing, so we expect all keys to be symbolized
|
@@ -44,6 +44,17 @@ module WaterDrop
|
|
44
44
|
else
|
45
45
|
instrument_error(delivery_report)
|
46
46
|
end
|
47
|
+
# This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
|
48
|
+
# crashes even if the instrumentation code fails. If it would bubble-up, it could crash
|
49
|
+
# the rdkafka background thread
|
50
|
+
rescue StandardError => e
|
51
|
+
@monitor.instrument(
|
52
|
+
'error.occurred',
|
53
|
+
caller: self,
|
54
|
+
error: e,
|
55
|
+
producer_id: @producer_id,
|
56
|
+
type: 'callbacks.delivery.error'
|
57
|
+
)
|
47
58
|
end
|
48
59
|
|
49
60
|
private
|
@@ -32,6 +32,17 @@ module WaterDrop
|
|
32
32
|
producer_id: @producer_id,
|
33
33
|
type: 'librdkafka.error'
|
34
34
|
)
|
35
|
+
# This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
|
36
|
+
# crashes even if the instrumentation code fails. If it would bubble-up, it could crash
|
37
|
+
# the rdkafka background thread
|
38
|
+
rescue StandardError => e
|
39
|
+
@monitor.instrument(
|
40
|
+
'error.occurred',
|
41
|
+
caller: self,
|
42
|
+
error: e,
|
43
|
+
producer_id: @producer_id,
|
44
|
+
type: 'callbacks.error.error'
|
45
|
+
)
|
35
46
|
end
|
36
47
|
end
|
37
48
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Instrumentation
|
5
|
+
module Callbacks
|
6
|
+
# Callback that is triggered when oauth token needs to be refreshed.
|
7
|
+
class OauthbearerTokenRefresh
|
8
|
+
# @param bearer [Rdkafka::Producer] given rdkafka instance. It is needed as
|
9
|
+
# we need to have a reference to call `#oauthbearer_set_token` or
|
10
|
+
# `#oauthbearer_set_token_failure` upon the event.
|
11
|
+
# @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
|
12
|
+
def initialize(bearer, monitor)
|
13
|
+
@bearer = bearer
|
14
|
+
@monitor = monitor
|
15
|
+
end
|
16
|
+
|
17
|
+
# Upon receiving of this event, user is required to invoke either `#oauthbearer_set_token`
|
18
|
+
# or `#oauthbearer_set_token_failure` on the `event[:bearer]` depending whether token
|
19
|
+
# obtaining was successful or not.
|
20
|
+
#
|
21
|
+
# Please refer to WaterDrop and Karafka documentation or `Rdkafka::Helpers::OAuth`
|
22
|
+
# documentation directly for exact parameters of those methods.
|
23
|
+
#
|
24
|
+
# @param _rd_config [Rdkafka::Config]
|
25
|
+
# @param bearer_name [String] name of the bearer for which we refresh
|
26
|
+
def call(_rd_config, bearer_name)
|
27
|
+
return unless @bearer.name == bearer_name
|
28
|
+
|
29
|
+
@monitor.instrument(
|
30
|
+
'oauthbearer.token_refresh',
|
31
|
+
bearer: @bearer,
|
32
|
+
caller: self
|
33
|
+
)
|
34
|
+
# This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
|
35
|
+
# crashes even if the instrumentation code fails. If it would bubble-up, it could crash
|
36
|
+
# the rdkafka background thread
|
37
|
+
rescue StandardError => e
|
38
|
+
@monitor.instrument(
|
39
|
+
'error.occurred',
|
40
|
+
caller: self,
|
41
|
+
error: e,
|
42
|
+
producer_id: @producer_id,
|
43
|
+
type: 'callbacks.oauthbearer_token_refresh.error'
|
44
|
+
)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -34,6 +34,17 @@ module WaterDrop
|
|
34
34
|
producer_id: @producer_id,
|
35
35
|
statistics: @statistics_decorator.call(statistics)
|
36
36
|
)
|
37
|
+
# This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
|
38
|
+
# crashes even if the instrumentation code fails. If it would bubble-up, it could crash
|
39
|
+
# the rdkafka background thread
|
40
|
+
rescue StandardError => e
|
41
|
+
@monitor.instrument(
|
42
|
+
'error.occurred',
|
43
|
+
caller: self,
|
44
|
+
error: e,
|
45
|
+
producer_id: @producer_id,
|
46
|
+
type: 'callbacks.statistics.error'
|
47
|
+
)
|
37
48
|
end
|
38
49
|
end
|
39
50
|
end
|
@@ -36,7 +36,7 @@ module WaterDrop
|
|
36
36
|
setting :rd_kafka_metrics, default: [
|
37
37
|
# Client metrics
|
38
38
|
RdKafkaMetric.new(:count, :root, 'calls', 'tx_d'),
|
39
|
-
RdKafkaMetric.new(:histogram, :root, 'queue.size', '
|
39
|
+
RdKafkaMetric.new(:histogram, :root, 'queue.size', 'msg_cnt'),
|
40
40
|
|
41
41
|
# Broker metrics
|
42
42
|
RdKafkaMetric.new(:count, :brokers, 'deliver.attempts', 'txretries_d'),
|
@@ -12,9 +12,6 @@ module WaterDrop
|
|
12
12
|
def buffer(message)
|
13
13
|
ensure_active!
|
14
14
|
|
15
|
-
message = middleware.run(message)
|
16
|
-
validate_message!(message)
|
17
|
-
|
18
15
|
@monitor.instrument(
|
19
16
|
'message.buffered',
|
20
17
|
producer_id: id,
|
@@ -32,9 +29,6 @@ module WaterDrop
|
|
32
29
|
def buffer_many(messages)
|
33
30
|
ensure_active!
|
34
31
|
|
35
|
-
messages = middleware.run_many(messages)
|
36
|
-
messages.each { |message| validate_message!(message) }
|
37
|
-
|
38
32
|
@monitor.instrument(
|
39
33
|
'messages.buffered',
|
40
34
|
producer_id: id,
|
@@ -52,8 +52,8 @@ module WaterDrop
|
|
52
52
|
# @return [Array<Rdkafka::Producer::DeliveryReport>] delivery reports
|
53
53
|
#
|
54
54
|
# @raise [Rdkafka::RdkafkaError] When adding the messages to rdkafka's queue failed
|
55
|
-
# @raise [Rdkafka::Producer::WaitTimeoutError] When the timeout has been reached and
|
56
|
-
#
|
55
|
+
# @raise [Rdkafka::Producer::WaitTimeoutError] When the timeout has been reached and some
|
56
|
+
# handles are still pending
|
57
57
|
# @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
|
58
58
|
# and the message could not be sent to Kafka
|
59
59
|
def produce_many_sync(messages)
|
@@ -89,6 +89,11 @@ module WaterDrop
|
|
89
89
|
end
|
90
90
|
end
|
91
91
|
|
92
|
+
# @return [Boolean] true if we are in an active transaction
|
93
|
+
def transaction?
|
94
|
+
@transaction_mutex.owned?
|
95
|
+
end
|
96
|
+
|
92
97
|
# @return [Boolean] Is this producer a transactional one
|
93
98
|
def transactional?
|
94
99
|
return @transactional if instance_variable_defined?(:'@transactional')
|
@@ -132,8 +137,7 @@ module WaterDrop
|
|
132
137
|
client.send_offsets_to_transaction(
|
133
138
|
consumer,
|
134
139
|
tpl,
|
135
|
-
|
136
|
-
@config.max_wait_timeout * 1_000
|
140
|
+
@config.max_wait_timeout
|
137
141
|
)
|
138
142
|
end
|
139
143
|
end
|
@@ -197,7 +201,7 @@ module WaterDrop
|
|
197
201
|
|
198
202
|
if do_retry
|
199
203
|
# Backoff more and more before retries
|
200
|
-
sleep(config.wait_backoff_on_transaction_command * attempt)
|
204
|
+
sleep((config.wait_backoff_on_transaction_command / 1_000.0) * attempt)
|
201
205
|
|
202
206
|
retry
|
203
207
|
end
|
data/lib/waterdrop/producer.rb
CHANGED
@@ -104,18 +104,6 @@ module WaterDrop
|
|
104
104
|
@pid = Process.pid
|
105
105
|
@client = Builder.new.call(self, @config)
|
106
106
|
|
107
|
-
# Register statistics runner for this particular type of callbacks
|
108
|
-
::Karafka::Core::Instrumentation.statistics_callbacks.add(
|
109
|
-
@id,
|
110
|
-
Instrumentation::Callbacks::Statistics.new(@id, @client.name, @config.monitor)
|
111
|
-
)
|
112
|
-
|
113
|
-
# Register error tracking callback
|
114
|
-
::Karafka::Core::Instrumentation.error_callbacks.add(
|
115
|
-
@id,
|
116
|
-
Instrumentation::Callbacks::Error.new(@id, @client.name, @config.monitor)
|
117
|
-
)
|
118
|
-
|
119
107
|
@status.connected!
|
120
108
|
@monitor.instrument('producer.connected', producer_id: id)
|
121
109
|
end
|
@@ -145,7 +133,12 @@ module WaterDrop
|
|
145
133
|
@messages = []
|
146
134
|
end
|
147
135
|
|
148
|
-
|
136
|
+
# We should not purge if there is no client initialized
|
137
|
+
# It may not be initialized if we created a new producer that never connected to kafka,
|
138
|
+
# we used buffer and purged. In cases like this client won't exist
|
139
|
+
@connecting_mutex.synchronize do
|
140
|
+
@client&.purge
|
141
|
+
end
|
149
142
|
end
|
150
143
|
end
|
151
144
|
|
@@ -188,8 +181,7 @@ module WaterDrop
|
|
188
181
|
# The linger.ms time will be ignored for the duration of the call,
|
189
182
|
# queued messages will be sent to the broker as soon as possible.
|
190
183
|
begin
|
191
|
-
|
192
|
-
@client.flush(@config.max_wait_timeout * 1_000) unless @client.closed?
|
184
|
+
@client.flush(@config.max_wait_timeout) unless @client.closed?
|
193
185
|
# We can safely ignore timeouts here because any left outstanding requests
|
194
186
|
# will anyhow force wait on close if not forced.
|
195
187
|
# If forced, we will purge the queue and just close
|
@@ -210,6 +202,7 @@ module WaterDrop
|
|
210
202
|
# Remove callbacks runners that were registered
|
211
203
|
::Karafka::Core::Instrumentation.statistics_callbacks.delete(@id)
|
212
204
|
::Karafka::Core::Instrumentation.error_callbacks.delete(@id)
|
205
|
+
::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.delete(@id)
|
213
206
|
|
214
207
|
@status.closed!
|
215
208
|
end
|
@@ -250,8 +243,8 @@ module WaterDrop
|
|
250
243
|
# @param handler [Rdkafka::Producer::DeliveryHandle]
|
251
244
|
def wait(handler)
|
252
245
|
handler.wait(
|
253
|
-
max_wait_timeout
|
254
|
-
|
246
|
+
# rdkafka max_wait_timeout is in seconds and we use ms
|
247
|
+
max_wait_timeout: @config.max_wait_timeout / 1_000.0
|
255
248
|
)
|
256
249
|
end
|
257
250
|
|
@@ -286,7 +279,7 @@ module WaterDrop
|
|
286
279
|
# If we're running for longer than the timeout, we need to re-raise the queue full.
|
287
280
|
# This will prevent from situation where cluster is down forever and we just retry and retry
|
288
281
|
# in an infinite loop, effectively hanging the processing
|
289
|
-
raise unless monotonic_now - produce_time < @config.wait_timeout_on_queue_full
|
282
|
+
raise unless monotonic_now - produce_time < @config.wait_timeout_on_queue_full
|
290
283
|
|
291
284
|
label = caller_locations(2, 1)[0].label.split(' ').last
|
292
285
|
|
@@ -297,22 +290,28 @@ module WaterDrop
|
|
297
290
|
begin
|
298
291
|
raise Errors::ProduceError, e.inspect
|
299
292
|
rescue Errors::ProduceError => e
|
300
|
-
#
|
301
|
-
#
|
302
|
-
#
|
303
|
-
#
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
293
|
+
# Users can configure this because in pipe-like flows with high throughput, queue full with
|
294
|
+
# retry may be used as a throttling system that will backoff and wait.
|
295
|
+
# In such scenarios this error notification can be removed and until queue full is
|
296
|
+
# retryable, it will not be raised as an error.
|
297
|
+
if @config.instrument_on_wait_queue_full
|
298
|
+
# We want to instrument on this event even when we restart it.
|
299
|
+
# The reason is simple: instrumentation and visibility.
|
300
|
+
# We can recover from this, but despite that we should be able to instrument this.
|
301
|
+
# If this type of event happens too often, it may indicate that the buffer settings are
|
302
|
+
# not well configured.
|
303
|
+
@monitor.instrument(
|
304
|
+
'error.occurred',
|
305
|
+
producer_id: id,
|
306
|
+
message: message,
|
307
|
+
error: e,
|
308
|
+
type: "message.#{label}"
|
309
|
+
)
|
310
|
+
end
|
312
311
|
|
313
312
|
# We do not poll the producer because polling happens in a background thread
|
314
313
|
# It also should not be a frequent case (queue full), hence it's ok to just throttle.
|
315
|
-
sleep @config.wait_backoff_on_queue_full
|
314
|
+
sleep @config.wait_backoff_on_queue_full / 1_000.0
|
316
315
|
end
|
317
316
|
|
318
317
|
@operations_in_progress.decrement
|
data/lib/waterdrop/version.rb
CHANGED
data/waterdrop.gemspec
CHANGED
@@ -16,9 +16,11 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.description = spec.summary
|
17
17
|
spec.license = 'MIT'
|
18
18
|
|
19
|
-
spec.add_dependency 'karafka-core', '>= 2.
|
19
|
+
spec.add_dependency 'karafka-core', '>= 2.4.0', '< 3.0.0'
|
20
20
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
21
21
|
|
22
|
+
spec.required_ruby_version = '>= 3.0.0'
|
23
|
+
|
22
24
|
if $PROGRAM_NAME.end_with?('gem')
|
23
25
|
spec.signing_key = File.expand_path('~/.ssh/gem-private_key.pem')
|
24
26
|
end
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: waterdrop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
|
36
36
|
msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2024-
|
38
|
+
date: 2024-04-26 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
requirements:
|
44
44
|
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: 2.
|
46
|
+
version: 2.4.0
|
47
47
|
- - "<"
|
48
48
|
- !ruby/object:Gem::Version
|
49
49
|
version: 3.0.0
|
@@ -53,7 +53,7 @@ dependencies:
|
|
53
53
|
requirements:
|
54
54
|
- - ">="
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: 2.
|
56
|
+
version: 2.4.0
|
57
57
|
- - "<"
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: 3.0.0
|
@@ -107,6 +107,7 @@ files:
|
|
107
107
|
- lib/waterdrop/helpers/counter.rb
|
108
108
|
- lib/waterdrop/instrumentation/callbacks/delivery.rb
|
109
109
|
- lib/waterdrop/instrumentation/callbacks/error.rb
|
110
|
+
- lib/waterdrop/instrumentation/callbacks/oauthbearer_token_refresh.rb
|
110
111
|
- lib/waterdrop/instrumentation/callbacks/statistics.rb
|
111
112
|
- lib/waterdrop/instrumentation/logger_listener.rb
|
112
113
|
- lib/waterdrop/instrumentation/monitor.rb
|
@@ -144,14 +145,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
144
145
|
requirements:
|
145
146
|
- - ">="
|
146
147
|
- !ruby/object:Gem::Version
|
147
|
-
version:
|
148
|
+
version: 3.0.0
|
148
149
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
150
|
requirements:
|
150
151
|
- - ">="
|
151
152
|
- !ruby/object:Gem::Version
|
152
153
|
version: '0'
|
153
154
|
requirements: []
|
154
|
-
rubygems_version: 3.5.
|
155
|
+
rubygems_version: 3.5.9
|
155
156
|
signing_key:
|
156
157
|
specification_version: 4
|
157
158
|
summary: Kafka messaging made easy!
|
metadata.gz.sig
CHANGED
Binary file
|