waterdrop 2.8.11 → 2.8.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Gemfile.lock +1 -1
- data/config/locales/errors.yml +5 -0
- data/lib/waterdrop/config.rb +21 -1
- data/lib/waterdrop/connection_pool.rb +8 -12
- data/lib/waterdrop/contracts/config.rb +12 -11
- data/lib/waterdrop/contracts/message.rb +3 -5
- data/lib/waterdrop/contracts/transactional_offset.rb +2 -4
- data/lib/waterdrop/contracts/variant.rb +22 -32
- data/lib/waterdrop/instrumentation/class_monitor.rb +1 -1
- data/lib/waterdrop/instrumentation/monitor.rb +1 -1
- data/lib/waterdrop/instrumentation/vendors/datadog/metrics_listener.rb +33 -3
- data/lib/waterdrop/producer/idempotence.rb +73 -0
- data/lib/waterdrop/producer/status.rb +8 -0
- data/lib/waterdrop/producer/transactions.rb +28 -16
- data/lib/waterdrop/producer/variant.rb +9 -0
- data/lib/waterdrop/producer.rb +56 -19
- data/lib/waterdrop/version.rb +1 -1
- data/waterdrop.gemspec +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a0890fcd73147d293340b55cca58c975d81726208c609be1f338ce56d87b4d18
|
4
|
+
data.tar.gz: 7a2743c068af9936186fa852d6e663c85c172aa93de76b16aaf09e2c1de24a25
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66136311cb010a1648a0a5440cd930fcb96bb75fe30575d5c14effb8550a7cea4295dda98828790ac567ec9f3346dde7901c11228f6ca251fdf454a5354fad9d
|
7
|
+
data.tar.gz: a701a63fe3a171084b63ac46ca9f66d75572b7efbfed0b794884df0fdb2b272404a6594ba239f16956cdc9c5fbecb31dbb5574fc0e5c1f12225ce61514c776a3
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,21 @@
|
|
1
1
|
# WaterDrop changelog
|
2
2
|
|
3
|
+
## 2.8.12 (2025-10-10)
|
4
|
+
- [Enhancement] Introduce `reload_on_idempotent_fatal_error` to automatically reload librdkafka producer after fatal errors on idempotent (non-transactional) producers.
|
5
|
+
- [Enhancement] Add configurable backoff and retry limits for fatal error recovery to prevent infinite reload loops:
|
6
|
+
- `wait_backoff_on_idempotent_fatal_error` (default: 5000ms) - backoff before retrying after idempotent fatal error reload
|
7
|
+
- `max_attempts_on_idempotent_fatal_error` (default: 5) - max reload attempts for idempotent fatal errors
|
8
|
+
- `wait_backoff_on_transaction_fatal_error` (default: 1000ms) - backoff after transactional fatal error reload
|
9
|
+
- `max_attempts_on_transaction_fatal_error` (default: 10) - max reload attempts for transactional fatal errors
|
10
|
+
- [Enhancement] Ensure `error.occurred` is instrumented before idempotent fatal error reload for visibility.
|
11
|
+
- [Enhancement] Automatically reset fatal error reload attempts counter on successful produce/transaction to allow recovery.
|
12
|
+
- [Refactor] Extract idempotence-related logic into separate `WaterDrop::Producer::Idempotence` module.
|
13
|
+
- [Refactor] Initialize `@idempotent` and `@transactional` instance variables in Producer#initialize for consistent Ruby object shapes optimization.
|
14
|
+
- [Refactor] Add `idempotent_reloadable?` and `idempotent_retryable?` methods to encapsulate idempotent fatal error reload checks.
|
15
|
+
- [Refactor] Add `transactional_retryable?` method to encapsulate transactional fatal error reload retry checks.
|
16
|
+
- [Fix] Waterdrop `config.kafka` errors on frozen hash.
|
17
|
+
- [Fix] `Producer#transactional?` method now correctly computes transactional status when `@transactional` is initialized to nil.
|
18
|
+
|
3
19
|
## 2.8.11 (2025-09-27)
|
4
20
|
- [Enhancement] Provide fast-track for middleware-less flows (20% faster) for single message, 5000x faster for batches.
|
5
21
|
- [Enhancement] Optimize middlewares application by around 20%.
|
data/Gemfile.lock
CHANGED
data/config/locales/errors.yml
CHANGED
@@ -17,7 +17,12 @@ en:
|
|
17
17
|
wait_timeout_on_queue_full_format: must be a numeric that is equal or bigger to 0
|
18
18
|
wait_backoff_on_transaction_command_format: must be a numeric that is equal or bigger to 0
|
19
19
|
max_attempts_on_transaction_command_format: must be an integer that is equal or bigger than 1
|
20
|
+
reload_on_idempotent_fatal_error_format: must be boolean
|
21
|
+
wait_backoff_on_idempotent_fatal_error_format: must be a numeric that is equal or bigger to 0
|
22
|
+
max_attempts_on_idempotent_fatal_error_format: must be an integer that is equal or bigger than 1
|
20
23
|
reload_on_transaction_fatal_error_format: must be boolean
|
24
|
+
wait_backoff_on_transaction_fatal_error_format: must be a numeric that is equal or bigger to 0
|
25
|
+
max_attempts_on_transaction_fatal_error_format: must be an integer that is equal or bigger than 1
|
21
26
|
oauth.token_provider_listener_format: 'must be false or respond to #on_oauthbearer_token_refresh'
|
22
27
|
idle_disconnect_timeout_format: 'must be an integer that is equal to 0 or bigger than 30 000 (30 seconds)'
|
23
28
|
|
data/lib/waterdrop/config.rb
CHANGED
@@ -61,7 +61,7 @@ module WaterDrop
|
|
61
61
|
# recoverable, in a high number it still may mean issues.
|
62
62
|
# Waiting is one of the recommended strategies.
|
63
63
|
setting :wait_on_queue_full, default: true
|
64
|
-
# option [Integer] how long (in
|
64
|
+
# option [Integer] how long (in ms) should we backoff before a retry when queue is full
|
65
65
|
# The retry will happen with the same message and backoff should give us some time to
|
66
66
|
# dispatch previously buffered messages.
|
67
67
|
setting :wait_backoff_on_queue_full, default: 100
|
@@ -79,6 +79,22 @@ module WaterDrop
|
|
79
79
|
# to keep going or should we stop. Since we will open a new instance and the failed transaction
|
80
80
|
# anyhow rolls back, we should be able to safely reload.
|
81
81
|
setting :reload_on_transaction_fatal_error, default: true
|
82
|
+
# option [Boolean] When a fatal error occurs on idempotent producer, should we automatically
|
83
|
+
# close and recreate the underlying producer to recover and continue sending messages. This
|
84
|
+
# allows automatic recovery from fatal librdkafka errors in idempotent producers.
|
85
|
+
setting :reload_on_idempotent_fatal_error, default: false
|
86
|
+
# option [Numeric] How long to wait (in ms) before retrying after reloading on idempotent
|
87
|
+
# fatal error. This backoff prevents rapid reload loops from overloading the system.
|
88
|
+
setting :wait_backoff_on_idempotent_fatal_error, default: 5_000
|
89
|
+
# option [Integer] How many times to attempt reloading on idempotent fatal error before giving
|
90
|
+
# up. This prevents infinite reload loops if the producer never recovers.
|
91
|
+
setting :max_attempts_on_idempotent_fatal_error, default: 5
|
92
|
+
# option [Numeric] How long to wait (in ms) before continuing after reloading on transactional
|
93
|
+
# fatal error. This backoff prevents rapid reload loops from overloading the system.
|
94
|
+
setting :wait_backoff_on_transaction_fatal_error, default: 1_000
|
95
|
+
# option [Integer] How many times to attempt reloading on transactional fatal error before
|
96
|
+
# giving up. This prevents infinite reload loops if the producer never recovers.
|
97
|
+
setting :max_attempts_on_transaction_fatal_error, default: 10
|
82
98
|
# option [Integer] Idle disconnect timeout in milliseconds. When set to 0, idle disconnection
|
83
99
|
# is disabled. When set to a positive value, WaterDrop will automatically disconnect
|
84
100
|
# producers that haven't sent any messages for the specified time period. This helps preserve
|
@@ -134,6 +150,10 @@ module WaterDrop
|
|
134
150
|
# to overwrite the whole hash if they want to
|
135
151
|
# @param config [Karafka::Core::Configurable::Node] config of this producer
|
136
152
|
def merge_kafka_defaults!(config)
|
153
|
+
# Duplicate the kafka config so we can mutate it internally if needed without polluting the
|
154
|
+
# user provided object
|
155
|
+
config.kafka = config.kafka.dup
|
156
|
+
|
137
157
|
KAFKA_DEFAULTS.each do |key, value|
|
138
158
|
next if config.kafka.key?(key)
|
139
159
|
|
@@ -84,7 +84,6 @@ module WaterDrop
|
|
84
84
|
|
85
85
|
# Executes a block with a producer from the global pool
|
86
86
|
#
|
87
|
-
# @param block [Proc] Block to execute with a producer
|
88
87
|
# @yield [producer] Producer from the global pool
|
89
88
|
# @return [Object] Result of the block
|
90
89
|
# @raise [RuntimeError] If no global pool is configured
|
@@ -93,10 +92,10 @@ module WaterDrop
|
|
93
92
|
# WaterDrop::ConnectionPool.with do |producer|
|
94
93
|
# producer.produce_sync(topic: 'events', payload: 'data')
|
95
94
|
# end
|
96
|
-
def with(
|
95
|
+
def with(...)
|
97
96
|
raise 'No global connection pool configured. Call setup first.' unless @default_pool
|
98
97
|
|
99
|
-
@default_pool.with(
|
98
|
+
@default_pool.with(...)
|
100
99
|
end
|
101
100
|
|
102
101
|
# Get statistics about the global pool
|
@@ -154,7 +153,6 @@ module WaterDrop
|
|
154
153
|
# Execute a transaction with a producer from the global connection pool
|
155
154
|
# Only available when connection pool is configured
|
156
155
|
#
|
157
|
-
# @param block [Proc] Block to execute within a transaction
|
158
156
|
# @yield [producer] Producer from the global pool with an active transaction
|
159
157
|
# @return [Object] Result of the block
|
160
158
|
# @raise [RuntimeError] If no global pool is configured
|
@@ -164,10 +162,10 @@ module WaterDrop
|
|
164
162
|
# producer.produce(topic: 'events', payload: 'data1')
|
165
163
|
# producer.produce(topic: 'events', payload: 'data2')
|
166
164
|
# end
|
167
|
-
def transaction(
|
165
|
+
def transaction(...)
|
168
166
|
raise 'No global connection pool configured. Call setup first.' unless @default_pool
|
169
167
|
|
170
|
-
@default_pool.transaction(
|
168
|
+
@default_pool.transaction(...)
|
171
169
|
end
|
172
170
|
|
173
171
|
private
|
@@ -298,7 +296,6 @@ module WaterDrop
|
|
298
296
|
# Execute a block with a producer from the global connection pool
|
299
297
|
# Only available when connection pool is configured
|
300
298
|
#
|
301
|
-
# @param block [Proc] Block to execute with a producer
|
302
299
|
# @yield [producer] Producer from the global pool
|
303
300
|
# @return [Object] Result of the block
|
304
301
|
#
|
@@ -306,14 +303,13 @@ module WaterDrop
|
|
306
303
|
# WaterDrop.with do |producer|
|
307
304
|
# producer.produce_sync(topic: 'events', payload: 'data')
|
308
305
|
# end
|
309
|
-
def with(
|
310
|
-
ConnectionPool.with(
|
306
|
+
def with(...)
|
307
|
+
ConnectionPool.with(...)
|
311
308
|
end
|
312
309
|
|
313
310
|
# Execute a transaction with a producer from the global connection pool
|
314
311
|
# Only available when connection pool is configured
|
315
312
|
#
|
316
|
-
# @param block [Proc] Block to execute within a transaction
|
317
313
|
# @yield [producer] Producer from the global pool with an active transaction
|
318
314
|
# @return [Object] Result of the block
|
319
315
|
#
|
@@ -322,8 +318,8 @@ module WaterDrop
|
|
322
318
|
# producer.produce(topic: 'events', payload: 'data1')
|
323
319
|
# producer.produce(topic: 'events', payload: 'data2')
|
324
320
|
# end
|
325
|
-
def transaction(
|
326
|
-
ConnectionPool.transaction(
|
321
|
+
def transaction(...)
|
322
|
+
ConnectionPool.transaction(...)
|
327
323
|
end
|
328
324
|
|
329
325
|
# Access the global connection pool
|
@@ -5,10 +5,8 @@ module WaterDrop
|
|
5
5
|
# Contract with validation rules for WaterDrop configuration details
|
6
6
|
class Config < ::Karafka::Core::Contractable::Contract
|
7
7
|
configure do |config|
|
8
|
-
config.error_messages = YAML.
|
9
|
-
File.
|
10
|
-
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
11
|
-
)
|
8
|
+
config.error_messages = YAML.safe_load_file(
|
9
|
+
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
12
10
|
).fetch('en').fetch('validations').fetch('config')
|
13
11
|
end
|
14
12
|
|
@@ -27,6 +25,11 @@ module WaterDrop
|
|
27
25
|
required(:wait_backoff_on_transaction_command) { |val| val.is_a?(Numeric) && val >= 0 }
|
28
26
|
required(:max_attempts_on_transaction_command) { |val| val.is_a?(Integer) && val >= 1 }
|
29
27
|
required(:reload_on_transaction_fatal_error) { |val| [true, false].include?(val) }
|
28
|
+
required(:reload_on_idempotent_fatal_error) { |val| [true, false].include?(val) }
|
29
|
+
required(:wait_backoff_on_idempotent_fatal_error) { |val| val.is_a?(Numeric) && val >= 0 }
|
30
|
+
required(:max_attempts_on_idempotent_fatal_error) { |val| val.is_a?(Integer) && val >= 1 }
|
31
|
+
required(:wait_backoff_on_transaction_fatal_error) { |val| val.is_a?(Numeric) && val >= 0 }
|
32
|
+
required(:max_attempts_on_transaction_fatal_error) { |val| val.is_a?(Integer) && val >= 1 }
|
30
33
|
required(:idle_disconnect_timeout) do |val|
|
31
34
|
val.is_a?(Integer) && (val.zero? || val >= 30_000)
|
32
35
|
end
|
@@ -42,13 +45,11 @@ module WaterDrop
|
|
42
45
|
virtual do |config, errors|
|
43
46
|
next true unless errors.empty?
|
44
47
|
|
45
|
-
errors =
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
.reject { |key| key.is_a?(Symbol) }
|
51
|
-
.each { |key| errors << [[:kafka, key], :kafka_key_must_be_a_symbol] }
|
48
|
+
errors = config
|
49
|
+
.fetch(:kafka)
|
50
|
+
.keys
|
51
|
+
.reject { |key| key.is_a?(Symbol) }
|
52
|
+
.map { |key| [[:kafka, key], :kafka_key_must_be_a_symbol] }
|
52
53
|
|
53
54
|
errors
|
54
55
|
end
|
@@ -6,10 +6,8 @@ module WaterDrop
|
|
6
6
|
# we provide to producer are valid and usable
|
7
7
|
class Message < ::Karafka::Core::Contractable::Contract
|
8
8
|
configure do |config|
|
9
|
-
config.error_messages = YAML.
|
10
|
-
File.
|
11
|
-
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
12
|
-
)
|
9
|
+
config.error_messages = YAML.safe_load_file(
|
10
|
+
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
13
11
|
).fetch('en').fetch('validations').fetch('message')
|
14
12
|
end
|
15
13
|
|
@@ -49,7 +47,7 @@ module WaterDrop
|
|
49
47
|
|
50
48
|
# Headers can be either strings or arrays of strings
|
51
49
|
next if value.is_a?(String)
|
52
|
-
next if value.is_a?(Array) && value.all?
|
50
|
+
next if value.is_a?(Array) && value.all?(String)
|
53
51
|
|
54
52
|
errors << [%i[headers], :invalid_value_type]
|
55
53
|
end
|
@@ -6,10 +6,8 @@ module WaterDrop
|
|
6
6
|
# and match our expectations
|
7
7
|
class TransactionalOffset < ::Karafka::Core::Contractable::Contract
|
8
8
|
configure do |config|
|
9
|
-
config.error_messages = YAML.
|
10
|
-
File.
|
11
|
-
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
12
|
-
)
|
9
|
+
config.error_messages = YAML.safe_load_file(
|
10
|
+
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
13
11
|
).fetch('en').fetch('validations').fetch('transactional_offset')
|
14
12
|
end
|
15
13
|
|
@@ -27,10 +27,8 @@ module WaterDrop
|
|
27
27
|
private_constant :TOPIC_CONFIG_KEYS, :BOOLEANS
|
28
28
|
|
29
29
|
configure do |config|
|
30
|
-
config.error_messages = YAML.
|
31
|
-
File.
|
32
|
-
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
33
|
-
)
|
30
|
+
config.error_messages = YAML.safe_load_file(
|
31
|
+
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
34
32
|
).fetch('en').fetch('validations').fetch('variant')
|
35
33
|
end
|
36
34
|
|
@@ -41,13 +39,11 @@ module WaterDrop
|
|
41
39
|
virtual do |config, errors|
|
42
40
|
next true unless errors.empty?
|
43
41
|
|
44
|
-
errors =
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
.reject { |key| key.is_a?(Symbol) }
|
50
|
-
.each { |key| errors << [[:kafka, key], :kafka_key_must_be_a_symbol] }
|
42
|
+
errors = config
|
43
|
+
.fetch(:topic_config)
|
44
|
+
.keys
|
45
|
+
.reject { |key| key.is_a?(Symbol) }
|
46
|
+
.map { |key| [[:kafka, key], :kafka_key_must_be_a_symbol] }
|
51
47
|
|
52
48
|
errors
|
53
49
|
end
|
@@ -56,13 +52,11 @@ module WaterDrop
|
|
56
52
|
virtual do |config, errors|
|
57
53
|
next true unless errors.empty?
|
58
54
|
|
59
|
-
errors =
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
.reject { |key| TOPIC_CONFIG_KEYS.include?(key) }
|
65
|
-
.each { |key| errors << [[:kafka, key], :kafka_key_not_per_topic] }
|
55
|
+
errors = config
|
56
|
+
.fetch(:topic_config)
|
57
|
+
.keys
|
58
|
+
.reject { |key| TOPIC_CONFIG_KEYS.include?(key) }
|
59
|
+
.map { |key| [[:kafka, key], :kafka_key_not_per_topic] }
|
66
60
|
|
67
61
|
errors
|
68
62
|
end
|
@@ -75,13 +69,11 @@ module WaterDrop
|
|
75
69
|
# Relevant only for the transactional producer
|
76
70
|
next true unless config.fetch(:transactional)
|
77
71
|
|
78
|
-
errors =
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
.select { |key| key.to_s.include?('acks') }
|
84
|
-
.each { |key| errors << [[:kafka, key], :kafka_key_acks_not_changeable] }
|
72
|
+
errors = config
|
73
|
+
.fetch(:topic_config)
|
74
|
+
.keys
|
75
|
+
.select { |key| key.to_s.include?('acks') }
|
76
|
+
.map { |key| [[:kafka, key], :kafka_key_acks_not_changeable] }
|
85
77
|
|
86
78
|
errors
|
87
79
|
end
|
@@ -92,13 +84,11 @@ module WaterDrop
|
|
92
84
|
# Relevant only for the transactional producer
|
93
85
|
next true unless config.fetch(:idempotent)
|
94
86
|
|
95
|
-
errors =
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
.select { |key| key.to_s.include?('acks') }
|
101
|
-
.each { |key| errors << [[:kafka, key], :kafka_key_acks_not_changeable] }
|
87
|
+
errors = config
|
88
|
+
.fetch(:topic_config)
|
89
|
+
.keys
|
90
|
+
.select { |key| key.to_s.include?('acks') }
|
91
|
+
.map { |key| [[:kafka, key], :kafka_key_acks_not_changeable] }
|
102
92
|
|
103
93
|
errors
|
104
94
|
end
|
@@ -58,10 +58,9 @@ module WaterDrop
|
|
58
58
|
setup(&block) if block
|
59
59
|
end
|
60
60
|
|
61
|
-
# @param block [Proc] configuration block
|
62
61
|
# @note We define this alias to be consistent with `WaterDrop#setup`
|
63
|
-
def setup(
|
64
|
-
configure(
|
62
|
+
def setup(...)
|
63
|
+
configure(...)
|
65
64
|
end
|
66
65
|
|
67
66
|
# Hooks up to WaterDrop instrumentation for emitted statistics
|
@@ -92,6 +91,16 @@ module WaterDrop
|
|
92
91
|
produced_sync
|
93
92
|
produced_async
|
94
93
|
].each do |event_scope|
|
94
|
+
# @example
|
95
|
+
# def on_message_produced_sync(event)
|
96
|
+
# report_message(event[:message][:topic], :produced_sync)
|
97
|
+
# end
|
98
|
+
#
|
99
|
+
# def on_messages_produced_sync(event)
|
100
|
+
# event[:messages].each do |message|
|
101
|
+
# report_message(message[:topic], :produced_sync)
|
102
|
+
# end
|
103
|
+
# end
|
95
104
|
class_eval <<~METHODS, __FILE__, __LINE__ + 1
|
96
105
|
# @param event [Karafka::Core::Monitoring::Event]
|
97
106
|
def on_message_#{event_scope}(event)
|
@@ -112,6 +121,14 @@ module WaterDrop
|
|
112
121
|
message_buffered
|
113
122
|
messages_buffered
|
114
123
|
].each do |event_scope|
|
124
|
+
# @example
|
125
|
+
# def on_message_buffered(event)
|
126
|
+
# histogram(
|
127
|
+
# 'buffer.size',
|
128
|
+
# event[:buffer].size,
|
129
|
+
# tags: default_tags
|
130
|
+
# )
|
131
|
+
# end
|
115
132
|
class_eval <<~METHODS, __FILE__, __LINE__ + 1
|
116
133
|
# @param event [Karafka::Core::Monitoring::Event]
|
117
134
|
def on_#{event_scope}(event)
|
@@ -130,6 +147,12 @@ module WaterDrop
|
|
130
147
|
flushed_sync
|
131
148
|
flushed_async
|
132
149
|
].each do |event_scope|
|
150
|
+
# @example
|
151
|
+
# def on_buffer_flushed_sync(event)
|
152
|
+
# event[:messages].each do |message|
|
153
|
+
# report_message(message[:topic], :flushed_sync)
|
154
|
+
# end
|
155
|
+
# end
|
133
156
|
class_eval <<~METHODS, __FILE__, __LINE__ + 1
|
134
157
|
# @param event [Karafka::Core::Monitoring::Event]
|
135
158
|
def on_buffer_#{event_scope}(event)
|
@@ -149,6 +172,13 @@ module WaterDrop
|
|
149
172
|
increment
|
150
173
|
decrement
|
151
174
|
].each do |metric_type|
|
175
|
+
# @example
|
176
|
+
# def count(key, *args)
|
177
|
+
# client.count(
|
178
|
+
# namespaced_metric(key),
|
179
|
+
# *args
|
180
|
+
# )
|
181
|
+
# end
|
152
182
|
class_eval <<~METHODS, __FILE__, __LINE__ + 1
|
153
183
|
def #{metric_type}(key, *args)
|
154
184
|
client.#{metric_type}(
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Component for idempotent producer operations and error recovery
|
6
|
+
module Idempotence
|
7
|
+
# @return [Boolean] true if current producer is idempotent
|
8
|
+
def idempotent?
|
9
|
+
# Every transactional producer is idempotent by default always
|
10
|
+
return true if transactional?
|
11
|
+
return @idempotent unless @idempotent.nil?
|
12
|
+
|
13
|
+
@idempotent = config.kafka.to_h.fetch(:'enable.idempotence', false)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Checks if the given error should trigger an idempotent producer reload
|
17
|
+
#
|
18
|
+
# @param error [Rdkafka::RdkafkaError] the error to check
|
19
|
+
# @return [Boolean] true if the error should trigger a reload
|
20
|
+
#
|
21
|
+
# @note Returns true only if all of the following conditions are met:
|
22
|
+
# - Error is fatal
|
23
|
+
# - Producer is idempotent
|
24
|
+
# - Producer is not transactional
|
25
|
+
# - reload_on_idempotent_fatal_error config is enabled
|
26
|
+
# - Error is not in the NON_RELOADABLE_FATAL_ERRORS list
|
27
|
+
def idempotent_reloadable?(error)
|
28
|
+
return false unless error.fatal?
|
29
|
+
return false unless idempotent?
|
30
|
+
return false if transactional?
|
31
|
+
return false unless config.reload_on_idempotent_fatal_error
|
32
|
+
return false if NON_RELOADABLE_FATAL_ERRORS.include?(error.code)
|
33
|
+
|
34
|
+
true
|
35
|
+
end
|
36
|
+
|
37
|
+
# Checks if we can still retry reloading after an idempotent fatal error
|
38
|
+
#
|
39
|
+
# @return [Boolean] true if we haven't exceeded the max reload attempts yet
|
40
|
+
def idempotent_retryable?
|
41
|
+
@idempotent_fatal_error_attempts < config.max_attempts_on_idempotent_fatal_error
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
# Reloads the underlying client instance when a fatal error occurs on an idempotent producer
|
47
|
+
#
|
48
|
+
# This method handles fatal errors that can occur in idempotent (non-transactional) producers
|
49
|
+
# When a fatal error is detected, it will flush pending messages, purge the queue, close the
|
50
|
+
# old client, and create a new client instance to continue operations.
|
51
|
+
#
|
52
|
+
# @param attempt [Integer] the current reload attempt number
|
53
|
+
#
|
54
|
+
# @note This is only called for idempotent, non-transactional producers when
|
55
|
+
# `reload_on_idempotent_fatal_error` is enabled
|
56
|
+
# @note After reload, the producer will automatically retry the failed operation
|
57
|
+
def idempotent_reload_client_on_fatal_error(attempt)
|
58
|
+
@operating_mutex.synchronize do
|
59
|
+
@monitor.instrument(
|
60
|
+
'producer.reloaded',
|
61
|
+
producer_id: id,
|
62
|
+
attempt: attempt
|
63
|
+
) do
|
64
|
+
@client.flush(current_variant.max_wait_timeout)
|
65
|
+
purge
|
66
|
+
@client.close
|
67
|
+
@client = Builder.new.call(self, @config)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -38,6 +38,14 @@ module WaterDrop
|
|
38
38
|
end
|
39
39
|
|
40
40
|
LIFECYCLE.each do |state|
|
41
|
+
# @example
|
42
|
+
# def initial?
|
43
|
+
# @current == :initial
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# def initial!
|
47
|
+
# @current = :initial
|
48
|
+
# end
|
41
49
|
module_eval <<-RUBY, __FILE__, __LINE__ + 1
|
42
50
|
# @return [Boolean] true if current status is as we want, otherwise false
|
43
51
|
def #{state}?
|
@@ -4,16 +4,10 @@ module WaterDrop
|
|
4
4
|
class Producer
|
5
5
|
# Transactions related producer functionalities
|
6
6
|
module Transactions
|
7
|
-
# We should never reload producer if it was fenced, otherwise we could end up with some sort
|
8
|
-
# of weird race-conditions
|
9
|
-
NON_RELOADABLE_ERRORS = %i[
|
10
|
-
fenced
|
11
|
-
].freeze
|
12
|
-
|
13
7
|
# Contract to validate that input for transactional offset storage is correct
|
14
8
|
CONTRACT = Contracts::TransactionalOffset.new
|
15
9
|
|
16
|
-
private_constant :CONTRACT
|
10
|
+
private_constant :CONTRACT
|
17
11
|
|
18
12
|
# Creates a transaction.
|
19
13
|
#
|
@@ -99,6 +93,9 @@ module WaterDrop
|
|
99
93
|
transactional_instrument(:committed) { client.commit_transaction }
|
100
94
|
end
|
101
95
|
|
96
|
+
# Reset attempts counter on successful transaction commit
|
97
|
+
@transaction_fatal_error_attempts = 0
|
98
|
+
|
102
99
|
result
|
103
100
|
# We need to handle any interrupt including critical in order not to have the transaction
|
104
101
|
# running. This will also handle things like `IRB::Abort`
|
@@ -135,11 +132,18 @@ module WaterDrop
|
|
135
132
|
|
136
133
|
# @return [Boolean] Is this producer a transactional one
|
137
134
|
def transactional?
|
138
|
-
return @transactional
|
135
|
+
return @transactional unless @transactional.nil?
|
139
136
|
|
140
137
|
@transactional = config.kafka.to_h.key?(:'transactional.id')
|
141
138
|
end
|
142
139
|
|
140
|
+
# Checks if we can still retry reloading after a transactional fatal error
|
141
|
+
#
|
142
|
+
# @return [Boolean] true if we haven't exceeded the max reload attempts yet
|
143
|
+
def transactional_retryable?
|
144
|
+
@transaction_fatal_error_attempts < config.max_attempts_on_transaction_fatal_error
|
145
|
+
end
|
146
|
+
|
143
147
|
# Marks given message as consumed inside of a transaction.
|
144
148
|
#
|
145
149
|
# @param consumer [#consumer_group_metadata_pointer] any consumer from which we can obtain
|
@@ -188,18 +192,16 @@ module WaterDrop
|
|
188
192
|
# This allows us to simplify the async and sync batch dispatchers because we can ensure that
|
189
193
|
# their internal dispatches will be wrapped only with a single transaction and not
|
190
194
|
# a transaction per message
|
191
|
-
|
192
|
-
|
193
|
-
transactional? ? transaction(&block) : yield
|
195
|
+
def with_transaction_if_transactional(...)
|
196
|
+
transactional? ? transaction(...) : yield
|
194
197
|
end
|
195
198
|
|
196
199
|
# Instruments the transactional operation with producer id
|
197
200
|
#
|
198
201
|
# @param key [Symbol] transaction operation key
|
199
202
|
# @param details [Hash] additional instrumentation details
|
200
|
-
|
201
|
-
|
202
|
-
@monitor.instrument("transaction.#{key}", details.merge(producer_id: id), &block)
|
203
|
+
def transactional_instrument(key, details = EMPTY_HASH, ...)
|
204
|
+
@monitor.instrument("transaction.#{key}", details.merge(producer_id: id), ...)
|
203
205
|
end
|
204
206
|
|
205
207
|
# Error handling for transactional operations is a bit special. There are three types of
|
@@ -279,12 +281,19 @@ module WaterDrop
|
|
279
281
|
|
280
282
|
return unless rd_error.is_a?(Rdkafka::RdkafkaError)
|
281
283
|
return unless config.reload_on_transaction_fatal_error
|
282
|
-
return if
|
284
|
+
return if NON_RELOADABLE_FATAL_ERRORS.include?(rd_error.code)
|
285
|
+
|
286
|
+
# Check if we've exceeded max reload attempts
|
287
|
+
return unless transactional_retryable?
|
288
|
+
|
289
|
+
# Increment attempts before reload
|
290
|
+
@transaction_fatal_error_attempts += 1
|
283
291
|
|
284
292
|
@operating_mutex.synchronize do
|
285
293
|
@monitor.instrument(
|
286
294
|
'producer.reloaded',
|
287
|
-
producer_id: id
|
295
|
+
producer_id: id,
|
296
|
+
attempt: @transaction_fatal_error_attempts
|
288
297
|
) do
|
289
298
|
@client.flush(current_variant.max_wait_timeout)
|
290
299
|
purge
|
@@ -292,6 +301,9 @@ module WaterDrop
|
|
292
301
|
@client = Builder.new.call(self, @config)
|
293
302
|
end
|
294
303
|
end
|
304
|
+
|
305
|
+
# Wait before continuing to avoid rapid reload loops
|
306
|
+
sleep(config.wait_backoff_on_transaction_fatal_error / 1_000.0)
|
295
307
|
end
|
296
308
|
end
|
297
309
|
end
|
@@ -74,6 +74,15 @@ module WaterDrop
|
|
74
74
|
Transactions
|
75
75
|
].each do |scope|
|
76
76
|
scope.instance_methods(false).each do |method_name|
|
77
|
+
# @example
|
78
|
+
# def produce_async(*args, &block)
|
79
|
+
# ref = Fiber.current.waterdrop_clients ||= {}
|
80
|
+
# ref[@producer.id] = self
|
81
|
+
#
|
82
|
+
# @producer.produce_async(*args, &block)
|
83
|
+
# ensure
|
84
|
+
# ref[@producer.id] = nil
|
85
|
+
# end
|
77
86
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
78
87
|
def #{method_name}(*args, &block)
|
79
88
|
ref = Fiber.current.waterdrop_clients ||= {}
|
data/lib/waterdrop/producer.rb
CHANGED
@@ -8,6 +8,7 @@ module WaterDrop
|
|
8
8
|
include Async
|
9
9
|
include Buffer
|
10
10
|
include Transactions
|
11
|
+
include Idempotence
|
11
12
|
include ClassMonitor
|
12
13
|
include ::Karafka::Core::Helpers::Time
|
13
14
|
include ::Karafka::Core::Taggable
|
@@ -21,13 +22,21 @@ module WaterDrop
|
|
21
22
|
Rdkafka::Producer::DeliveryHandle::WaitTimeoutError
|
22
23
|
].freeze
|
23
24
|
|
25
|
+
# We should never reload producer on certain fatal errors as they may indicate state that
|
26
|
+
# cannot be recovered by simply recreating the client
|
27
|
+
NON_RELOADABLE_FATAL_ERRORS = %i[
|
28
|
+
fenced
|
29
|
+
].freeze
|
30
|
+
|
24
31
|
# Empty hash to save on memory allocations
|
25
32
|
EMPTY_HASH = {}.freeze
|
26
33
|
|
27
34
|
# Empty array to save on memory allocations
|
28
35
|
EMPTY_ARRAY = [].freeze
|
29
36
|
|
30
|
-
private_constant
|
37
|
+
private_constant(
|
38
|
+
:SUPPORTED_FLOW_ERRORS, :NON_RELOADABLE_FATAL_ERRORS, :EMPTY_HASH, :EMPTY_ARRAY
|
39
|
+
)
|
31
40
|
|
32
41
|
def_delegators :config
|
33
42
|
|
@@ -57,6 +66,10 @@ module WaterDrop
|
|
57
66
|
@default_variant = nil
|
58
67
|
@client = nil
|
59
68
|
@closing_thread_id = nil
|
69
|
+
@idempotent = nil
|
70
|
+
@transactional = nil
|
71
|
+
@idempotent_fatal_error_attempts = 0
|
72
|
+
@transaction_fatal_error_attempts = 0
|
60
73
|
|
61
74
|
@status = Status.new
|
62
75
|
@messages = []
|
@@ -74,13 +87,12 @@ module WaterDrop
|
|
74
87
|
end
|
75
88
|
|
76
89
|
# Sets up the whole configuration and initializes all that is needed
|
77
|
-
|
78
|
-
def setup(&block)
|
90
|
+
def setup(...)
|
79
91
|
raise Errors::ProducerAlreadyConfiguredError, id unless @status.initial?
|
80
92
|
|
81
93
|
@config = Config
|
82
94
|
.new
|
83
|
-
.setup(
|
95
|
+
.setup(...)
|
84
96
|
.config
|
85
97
|
|
86
98
|
@id = @config.id
|
@@ -209,15 +221,6 @@ module WaterDrop
|
|
209
221
|
|
210
222
|
alias variant with
|
211
223
|
|
212
|
-
# @return [Boolean] true if current producer is idempotent
|
213
|
-
def idempotent?
|
214
|
-
# Every transactional producer is idempotent by default always
|
215
|
-
return true if transactional?
|
216
|
-
return @idempotent if instance_variable_defined?(:'@idempotent')
|
217
|
-
|
218
|
-
@idempotent = config.kafka.to_h.fetch(:'enable.idempotence', false)
|
219
|
-
end
|
220
|
-
|
221
224
|
# Returns and caches the middleware object that may be used
|
222
225
|
# @return [WaterDrop::Producer::Middleware]
|
223
226
|
def middleware
|
@@ -476,12 +479,46 @@ module WaterDrop
|
|
476
479
|
message[:topic_config] = current_variant.topic_config
|
477
480
|
end
|
478
481
|
|
479
|
-
if transactional?
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
482
|
+
result = if transactional?
|
483
|
+
transaction { client.produce(**message) }
|
484
|
+
else
|
485
|
+
client.produce(**message)
|
486
|
+
end
|
487
|
+
|
488
|
+
# Reset attempts counter on successful produce
|
489
|
+
@idempotent_fatal_error_attempts = 0
|
490
|
+
|
491
|
+
result
|
484
492
|
rescue SUPPORTED_FLOW_ERRORS.first => e
|
493
|
+
# Check if this is a fatal error on an idempotent producer and we should reload
|
494
|
+
if idempotent_reloadable?(e)
|
495
|
+
# Check if we've exceeded max reload attempts
|
496
|
+
raise unless idempotent_retryable?
|
497
|
+
|
498
|
+
# Increment attempts before reload
|
499
|
+
@idempotent_fatal_error_attempts += 1
|
500
|
+
|
501
|
+
# Instrument error.occurred before attempting reload for visibility
|
502
|
+
@monitor.instrument(
|
503
|
+
'error.occurred',
|
504
|
+
producer_id: id,
|
505
|
+
error: e,
|
506
|
+
type: 'librdkafka.idempotent_fatal_error',
|
507
|
+
attempt: @idempotent_fatal_error_attempts
|
508
|
+
)
|
509
|
+
|
510
|
+
# Attempt to reload the producer
|
511
|
+
idempotent_reload_client_on_fatal_error(@idempotent_fatal_error_attempts)
|
512
|
+
|
513
|
+
# Wait before retrying to avoid rapid reload loops
|
514
|
+
sleep(@config.wait_backoff_on_idempotent_fatal_error / 1_000.0)
|
515
|
+
|
516
|
+
# After reload, retry the produce operation
|
517
|
+
@operations_in_progress.decrement
|
518
|
+
|
519
|
+
retry
|
520
|
+
end
|
521
|
+
|
485
522
|
# Unless we want to wait and retry and it's a full queue, we raise normally
|
486
523
|
raise unless @config.wait_on_queue_full
|
487
524
|
raise unless e.code == :queue_full
|
@@ -490,7 +527,7 @@ module WaterDrop
|
|
490
527
|
# in an infinite loop, effectively hanging the processing
|
491
528
|
raise unless monotonic_now - produce_time < @config.wait_timeout_on_queue_full
|
492
529
|
|
493
|
-
label = caller_locations(2, 1)[0].label.split
|
530
|
+
label = caller_locations(2, 1)[0].label.split.last.split('#').last
|
494
531
|
|
495
532
|
# We use this syntax here because we want to preserve the original `#cause` when we
|
496
533
|
# instrument the error and there is no way to manually assign `#cause` value. We want to keep
|
data/lib/waterdrop/version.rb
CHANGED
data/waterdrop.gemspec
CHANGED
@@ -7,7 +7,7 @@ require 'waterdrop/version'
|
|
7
7
|
|
8
8
|
Gem::Specification.new do |spec|
|
9
9
|
spec.name = 'waterdrop'
|
10
|
-
spec.version =
|
10
|
+
spec.version = WaterDrop::VERSION
|
11
11
|
spec.platform = Gem::Platform::RUBY
|
12
12
|
spec.authors = ['Maciej Mensfeld']
|
13
13
|
spec.email = %w[contact@karafka.io]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: waterdrop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.8.
|
4
|
+
version: 2.8.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -120,6 +120,7 @@ files:
|
|
120
120
|
- lib/waterdrop/producer/buffer.rb
|
121
121
|
- lib/waterdrop/producer/builder.rb
|
122
122
|
- lib/waterdrop/producer/class_monitor.rb
|
123
|
+
- lib/waterdrop/producer/idempotence.rb
|
123
124
|
- lib/waterdrop/producer/status.rb
|
124
125
|
- lib/waterdrop/producer/sync.rb
|
125
126
|
- lib/waterdrop/producer/transactions.rb
|