waterdrop 2.8.16 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -1
- data/.rubocop.yml +2 -41
- data/.ruby-version +1 -1
- data/.yard-lint.yml +1 -1
- data/CHANGELOG.md +17 -0
- data/Gemfile +2 -1
- data/Gemfile.lint +2 -2
- data/Gemfile.lint.lock +11 -26
- data/Gemfile.lock +43 -66
- data/Rakefile +12 -0
- data/bin/integrations +5 -3
- data/bin/verify_kafka_warnings +1 -0
- data/config/locales/errors.yml +6 -0
- data/docker-compose.oauth.yml +3 -3
- data/docker-compose.sasl.yml +26 -0
- data/docker-compose.yml +1 -1
- data/lib/waterdrop/clients/rdkafka.rb +153 -31
- data/lib/waterdrop/config.rb +23 -6
- data/lib/waterdrop/contracts/config.rb +3 -0
- data/lib/waterdrop/contracts/tombstone.rb +21 -0
- data/lib/waterdrop/errors.rb +7 -0
- data/lib/waterdrop/instrumentation/callbacks/statistics.rb +4 -2
- data/lib/waterdrop/instrumentation/monitor.rb +61 -0
- data/lib/waterdrop/polling/latch.rb +1 -0
- data/lib/waterdrop/polling/poller.rb +4 -0
- data/lib/waterdrop/producer/tombstone.rb +78 -0
- data/lib/waterdrop/producer/variant.rb +3 -2
- data/lib/waterdrop/producer.rb +1 -0
- data/lib/waterdrop/version.rb +1 -1
- data/package-lock.json +3 -3
- data/renovate.json +6 -3
- data/waterdrop.gemspec +2 -2
- metadata +7 -14
- data/.github/CODEOWNERS +0 -3
- data/.github/FUNDING.yml +0 -1
- data/.github/ISSUE_TEMPLATE/bug_report.md +0 -43
- data/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
- data/.github/workflows/ci.yml +0 -322
- data/.github/workflows/push.yml +0 -35
- data/.github/workflows/trigger-wiki-refresh.yml +0 -30
- data/.github/workflows/verify-action-pins.yml +0 -16
- data/.rspec +0 -2
- data/log/.gitkeep +0 -0
|
@@ -11,63 +11,185 @@ module WaterDrop
|
|
|
11
11
|
# @param producer [WaterDrop::Producer] producer instance with its config, etc
|
|
12
12
|
# @note We overwrite this that way, because we do not care
|
|
13
13
|
def new(producer)
|
|
14
|
-
kafka_config = producer.config.kafka.to_h
|
|
15
14
|
monitor = producer.config.monitor
|
|
15
|
+
kafka_config, statistics_enabled = prepare_statistics(
|
|
16
|
+
producer.config.kafka.to_h,
|
|
17
|
+
monitor
|
|
18
|
+
)
|
|
16
19
|
|
|
17
|
-
|
|
18
|
-
# and use our own Ruby-based poller instead
|
|
19
|
-
producer_options = { native_kafka_auto_start: false }
|
|
20
|
-
|
|
21
|
-
producer_options[:run_polling_thread] = false if producer.fd_polling?
|
|
20
|
+
client = build_rdkafka_client(producer, kafka_config)
|
|
22
21
|
|
|
23
|
-
|
|
22
|
+
register_instrumentation_callbacks(
|
|
23
|
+
producer,
|
|
24
|
+
client,
|
|
25
|
+
monitor,
|
|
26
|
+
statistics_enabled: statistics_enabled
|
|
27
|
+
)
|
|
24
28
|
|
|
25
|
-
#
|
|
26
|
-
|
|
29
|
+
# This callback is not global and is per client, thus we do not have to wrap it with a
|
|
30
|
+
# callbacks manager to make it work
|
|
31
|
+
client.delivery_callback = Instrumentation::Callbacks::Delivery.new(
|
|
27
32
|
producer.id,
|
|
28
|
-
|
|
33
|
+
producer.transactional?,
|
|
34
|
+
monitor
|
|
29
35
|
)
|
|
30
36
|
|
|
31
|
-
|
|
37
|
+
subscribe_oauth_listener(producer, monitor)
|
|
38
|
+
activate_client(producer, client, kafka_config)
|
|
39
|
+
|
|
40
|
+
client
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
# Decides whether librdkafka statistics should be enabled for this client and returns
|
|
46
|
+
# the (possibly mutated) kafka config together with the decision.
|
|
47
|
+
#
|
|
48
|
+
# When no one is subscribed to `statistics.emitted` at the time the underlying rdkafka
|
|
49
|
+
# client is being built, we force `statistics.interval.ms` to 0 regardless of user
|
|
50
|
+
# configuration. This prevents librdkafka from computing statistics periodically and
|
|
51
|
+
# saves a significant number of allocations on the Ruby side (no JSON parsing, no
|
|
52
|
+
# statistics hash materialization, no decorator work). Any listener subscribed after
|
|
53
|
+
# the client has been built will not receive `statistics.emitted` events because
|
|
54
|
+
# librdkafka never emits them in the first place — to use statistics, subscribe a
|
|
55
|
+
# listener BEFORE the first producer use.
|
|
56
|
+
#
|
|
57
|
+
# When statistics end up disabled (either because the user explicitly set the interval
|
|
58
|
+
# to 0, or because we forced it to 0 here), we freeze the statistics listener slot on
|
|
59
|
+
# the monitor. Any later subscription attempt raises instead of silently being a no-op,
|
|
60
|
+
# surfacing the timing mistake to the user immediately.
|
|
61
|
+
#
|
|
62
|
+
# @param kafka_config [Hash] kafka config hash taken from the producer config
|
|
63
|
+
# @param monitor [WaterDrop::Instrumentation::Monitor] per-producer monitor
|
|
64
|
+
# @return [Array] two-element array `[kafka_config, statistics_enabled]`. The returned
|
|
65
|
+
# hash is a duped copy when we need to mutate the interval, so the producer's own
|
|
66
|
+
# config hash is never touched.
|
|
67
|
+
def prepare_statistics(kafka_config, monitor)
|
|
68
|
+
statistics_enabled = kafka_config[:"statistics.interval.ms"].to_i.positive?
|
|
69
|
+
|
|
70
|
+
if statistics_enabled && !statistics_listener?(monitor)
|
|
71
|
+
kafka_config = kafka_config.dup
|
|
72
|
+
kafka_config[:"statistics.interval.ms"] = 0
|
|
73
|
+
statistics_enabled = false
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
monitor.freeze_statistics_listeners! unless statistics_enabled
|
|
77
|
+
|
|
78
|
+
[kafka_config, statistics_enabled]
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Instantiates the underlying rdkafka producer with the correct polling options. When
|
|
82
|
+
# FD polling is enabled, we disable librdkafka's native background polling thread and
|
|
83
|
+
# use our own Ruby-based poller instead.
|
|
84
|
+
#
|
|
85
|
+
# @param producer [WaterDrop::Producer]
|
|
86
|
+
# @param kafka_config [Hash]
|
|
87
|
+
# @return [::Rdkafka::Producer]
|
|
88
|
+
def build_rdkafka_client(producer, kafka_config)
|
|
89
|
+
producer_options = { native_kafka_auto_start: false }
|
|
90
|
+
producer_options[:run_polling_thread] = false if producer.fd_polling?
|
|
91
|
+
|
|
92
|
+
::Rdkafka::Config.new(kafka_config).producer(**producer_options)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Registers the global callbacks (statistics, error, oauth refresh) for this producer
|
|
96
|
+
# on the shared `Karafka::Core::Instrumentation` managers. The statistics callback is
|
|
97
|
+
# only registered when librdkafka is actually going to emit statistics — otherwise it
|
|
98
|
+
# would never fire and would only waste memory and a manager slot.
|
|
99
|
+
#
|
|
100
|
+
# @param producer [WaterDrop::Producer]
|
|
101
|
+
# @param client [::Rdkafka::Producer]
|
|
102
|
+
# @param monitor [WaterDrop::Instrumentation::Monitor]
|
|
103
|
+
# @param statistics_enabled [Boolean]
|
|
104
|
+
def register_instrumentation_callbacks(producer, client, monitor, statistics_enabled:)
|
|
105
|
+
if statistics_enabled
|
|
106
|
+
::Karafka::Core::Instrumentation.statistics_callbacks.add(
|
|
107
|
+
producer.id,
|
|
108
|
+
Instrumentation::Callbacks::Statistics.new(
|
|
109
|
+
producer.id,
|
|
110
|
+
client.name,
|
|
111
|
+
monitor,
|
|
112
|
+
producer.config.statistics_decorator
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
end
|
|
116
|
+
|
|
32
117
|
::Karafka::Core::Instrumentation.error_callbacks.add(
|
|
33
118
|
producer.id,
|
|
34
119
|
Instrumentation::Callbacks::Error.new(producer.id, client.name, monitor)
|
|
35
120
|
)
|
|
36
121
|
|
|
37
|
-
# Register oauth bearer refresh for this particular type of callbacks
|
|
38
122
|
::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.add(
|
|
39
123
|
producer.id,
|
|
40
124
|
Instrumentation::Callbacks::OauthbearerTokenRefresh.new(client, monitor)
|
|
41
125
|
)
|
|
126
|
+
end
|
|
42
127
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
128
|
+
# Subscribes the oauth bearer token refresh listener to the monitor if one is configured.
|
|
129
|
+
#
|
|
130
|
+
# We need to subscribe it here because we want it to be ready before any producer
|
|
131
|
+
# callbacks run. In theory because the WaterDrop rdkafka producer is lazy loaded, the
|
|
132
|
+
# user would have enough time to subscribe it himself, but then it would not coop with
|
|
133
|
+
# auto-configuration coming from Karafka. The way it is done here, if it is configured
|
|
134
|
+
# it will be subscribed and if not, the user always can subscribe it himself as long as
|
|
135
|
+
# it is done prior to first usage.
|
|
136
|
+
#
|
|
137
|
+
# @param producer [WaterDrop::Producer]
|
|
138
|
+
# @param monitor [WaterDrop::Instrumentation::Monitor]
|
|
139
|
+
def subscribe_oauth_listener(producer, monitor)
|
|
51
140
|
oauth_listener = producer.config.oauth.token_provider_listener
|
|
52
|
-
# We need to subscribe the oauth listener here because we want it to be ready before
|
|
53
|
-
# any producer callbacks run. In theory because WaterDrop rdkafka producer is lazy loaded
|
|
54
|
-
# we would have enough time to make user subscribe it himself, but then it would not
|
|
55
|
-
# coop with auto-configuration coming from Karafka. The way it is done below, if it is
|
|
56
|
-
# configured it will be subscribed and if not, user always can subscribe it himself as
|
|
57
|
-
# long as it is done prior to first usage
|
|
58
141
|
monitor.subscribe(oauth_listener) if oauth_listener
|
|
142
|
+
end
|
|
59
143
|
|
|
144
|
+
# Transitions the freshly built client into an active state: starts the native side,
|
|
145
|
+
# registers it with our FD poller (when FD polling is enabled), and initializes
|
|
146
|
+
# transactions if the user configured a transactional id. Must run last so all
|
|
147
|
+
# callbacks are already wired up before the client goes live.
|
|
148
|
+
#
|
|
149
|
+
# If any step after `client.start` fails (most commonly `init_transactions` timing
|
|
150
|
+
# out when Kafka is unreachable), we must clean up everything that was already set up:
|
|
151
|
+
# unregister from the poller, remove the global instrumentation callbacks, and close
|
|
152
|
+
# the native client. Without this, each failed attempt leaks native threads, pipe
|
|
153
|
+
# file descriptors, and callback registry entries permanently.
|
|
154
|
+
#
|
|
155
|
+
# @param producer [WaterDrop::Producer]
|
|
156
|
+
# @param client [::Rdkafka::Producer]
|
|
157
|
+
# @param kafka_config [Hash]
|
|
158
|
+
def activate_client(producer, client, kafka_config)
|
|
60
159
|
client.start
|
|
61
160
|
|
|
62
|
-
# Register with poller if FD polling is enabled
|
|
63
|
-
#
|
|
64
|
-
#
|
|
161
|
+
# Register with poller if FD polling is enabled. Uses the producer's configured poller
|
|
162
|
+
# (custom or global singleton). This must happen after client.start to ensure the
|
|
163
|
+
# client is ready.
|
|
65
164
|
producer.poller.register(producer, client) if producer.fd_polling?
|
|
66
165
|
|
|
67
|
-
# Switch to
|
|
166
|
+
# Switch to transactional mode if user provided a transactional id
|
|
68
167
|
client.init_transactions if kafka_config.key?(:"transactional.id")
|
|
168
|
+
rescue
|
|
169
|
+
# Unwind everything we set up before re-raising:
|
|
170
|
+
# 1. Unregister from poller (if we registered)
|
|
171
|
+
producer.poller.unregister(producer) if producer.fd_polling?
|
|
69
172
|
|
|
70
|
-
|
|
173
|
+
# 2. Remove global instrumentation callbacks so they don't accumulate
|
|
174
|
+
::Karafka::Core::Instrumentation.statistics_callbacks.delete(producer.id)
|
|
175
|
+
::Karafka::Core::Instrumentation.error_callbacks.delete(producer.id)
|
|
176
|
+
::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.delete(producer.id)
|
|
177
|
+
|
|
178
|
+
# 3. Close the native client to join its threads and release pipe FDs
|
|
179
|
+
client.close
|
|
180
|
+
|
|
181
|
+
raise
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Checks whether there is at least one subscriber to the `statistics.emitted` event on
|
|
185
|
+
# the per-producer monitor. We use this at client build time to decide whether to enable
|
|
186
|
+
# librdkafka statistics at all.
|
|
187
|
+
#
|
|
188
|
+
# @param monitor [WaterDrop::Instrumentation::Monitor] per-producer monitor
|
|
189
|
+
# @return [Boolean] true if any listener is registered for `statistics.emitted`
|
|
190
|
+
def statistics_listener?(monitor)
|
|
191
|
+
listeners = monitor.listeners["statistics.emitted"]
|
|
192
|
+
listeners && !listeners.empty?
|
|
71
193
|
end
|
|
72
194
|
end
|
|
73
195
|
end
|
data/lib/waterdrop/config.rb
CHANGED
|
@@ -15,9 +15,9 @@ module WaterDrop
|
|
|
15
15
|
"statistics.interval.ms": 5_000,
|
|
16
16
|
# We set it to a value that is lower than `max_wait_timeout` to have a final verdict upon
|
|
17
17
|
# sync delivery
|
|
18
|
-
"message.timeout.ms":
|
|
18
|
+
"message.timeout.ms": 150_000,
|
|
19
19
|
# Must be more or equal to `message.timeout.ms` defaults
|
|
20
|
-
"transaction.timeout.ms":
|
|
20
|
+
"transaction.timeout.ms": 165_000,
|
|
21
21
|
# Lowers latency. Default in newer librdkafka but we want to make sure it is shipped to
|
|
22
22
|
# users despite what librdkafka they run on
|
|
23
23
|
"socket.nagle.disable": true
|
|
@@ -53,7 +53,7 @@ module WaterDrop
|
|
|
53
53
|
setting :max_payload_size, default: 1_000_012
|
|
54
54
|
# option [Integer] Wait that long for the delivery report or raise an error if this takes
|
|
55
55
|
# longer than the timeout ms.
|
|
56
|
-
setting :max_wait_timeout, default:
|
|
56
|
+
setting :max_wait_timeout, default: 180_000
|
|
57
57
|
# option [Boolean] should we upon detecting full librdkafka queue backoff and retry or should
|
|
58
58
|
# we raise an exception.
|
|
59
59
|
# When this is set to `true`, upon full queue, we won't raise an error. There will be error
|
|
@@ -116,6 +116,23 @@ module WaterDrop
|
|
|
116
116
|
# prevent overly aggressive disconnections.
|
|
117
117
|
setting :idle_disconnect_timeout, default: 0
|
|
118
118
|
|
|
119
|
+
# option [Karafka::Core::Monitoring::StatisticsDecorator] decorator instance used to compute
|
|
120
|
+
# deltas (_d) and freeze durations (_fd) on raw librdkafka statistics. The default is
|
|
121
|
+
# pre-configured with `only_keys` covering keys used by the built-in Datadog metrics
|
|
122
|
+
# listener and `excluded_keys` skipping subtrees not needed by producers (topics, broker
|
|
123
|
+
# window stats). Users who need additional decorated keys or full decoration can provide
|
|
124
|
+
# a custom decorator instance.
|
|
125
|
+
setting(
|
|
126
|
+
:statistics_decorator,
|
|
127
|
+
default: false,
|
|
128
|
+
constructor: lambda { |decorator|
|
|
129
|
+
decorator || ::Karafka::Core::Monitoring::StatisticsDecorator.new(
|
|
130
|
+
only_keys: %w[tx txretries txerrs rxerrs],
|
|
131
|
+
excluded_keys: %w[int_latency outbuf_latency rtt throttle req toppars topics]
|
|
132
|
+
)
|
|
133
|
+
}
|
|
134
|
+
)
|
|
135
|
+
|
|
119
136
|
# option [Boolean] should we send messages. Setting this to false can be really useful when
|
|
120
137
|
# testing and or developing because when set to false, won't actually ping Kafka but will
|
|
121
138
|
# run all the validations, etc
|
|
@@ -143,9 +160,9 @@ module WaterDrop
|
|
|
143
160
|
# Namespace for polling configuration
|
|
144
161
|
setting :polling do
|
|
145
162
|
# option [Symbol] Polling mode for handling producer callbacks
|
|
146
|
-
# :
|
|
147
|
-
# :
|
|
148
|
-
setting :mode, default: :
|
|
163
|
+
# :fd - uses a single global Ruby thread with IO.select-based multiplexing (default)
|
|
164
|
+
# :thread - uses librdkafka's native background polling threads
|
|
165
|
+
setting :mode, default: :fd
|
|
149
166
|
|
|
150
167
|
# option [WaterDrop::Polling::Poller, nil] Custom poller instance for isolation
|
|
151
168
|
# When nil (default), uses the global singleton poller shared by all FD-mode producers
|
|
@@ -33,10 +33,13 @@ module WaterDrop
|
|
|
33
33
|
required(:non_reloadable_errors) do |val|
|
|
34
34
|
val.is_a?(Array) && val.all?(Symbol)
|
|
35
35
|
end
|
|
36
|
+
|
|
36
37
|
required(:idle_disconnect_timeout) do |val|
|
|
37
38
|
val.is_a?(Integer) && (val.zero? || val >= 30_000)
|
|
38
39
|
end
|
|
39
40
|
|
|
41
|
+
required(:statistics_decorator) { |val| val.respond_to?(:call) }
|
|
42
|
+
|
|
40
43
|
nested(:oauth) do
|
|
41
44
|
required(:token_provider_listener) do |val|
|
|
42
45
|
val == false || val.respond_to?(:on_oauthbearer_token_refresh)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module WaterDrop
|
|
4
|
+
module Contracts
|
|
5
|
+
# Contract for validating tombstone-specific message requirements.
|
|
6
|
+
# Tombstones require a non-nil key and an explicit partition.
|
|
7
|
+
#
|
|
8
|
+
# @note Topic, headers, and other standard message attributes are validated separately
|
|
9
|
+
# by the {Message} contract during the produce delegation flow.
|
|
10
|
+
class Tombstone < ::Karafka::Core::Contractable::Contract
|
|
11
|
+
configure do |config|
|
|
12
|
+
config.error_messages = YAML.safe_load_file(
|
|
13
|
+
File.join(WaterDrop.gem_root, "config", "locales", "errors.yml")
|
|
14
|
+
).fetch("en").fetch("validations").fetch("tombstone")
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
required(:key) { |val| val.is_a?(String) && !val.empty? }
|
|
18
|
+
required(:partition) { |val| val.is_a?(Integer) && val >= 0 }
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
data/lib/waterdrop/errors.rb
CHANGED
|
@@ -59,6 +59,13 @@ module WaterDrop
|
|
|
59
59
|
# Raised when an error occurs in the polling loop
|
|
60
60
|
PollerError = Class.new(BaseError)
|
|
61
61
|
|
|
62
|
+
# Raised when trying to subscribe to `statistics.emitted` after the underlying rdkafka client
|
|
63
|
+
# has been built without any listener present at build time. In that case, librdkafka
|
|
64
|
+
# statistics are disabled entirely for performance, and late subscriptions would silently
|
|
65
|
+
# receive nothing. To fix: subscribe the listener BEFORE first producer use (i.e. before the
|
|
66
|
+
# underlying client is lazily initialized).
|
|
67
|
+
StatisticsNotEnabledError = Class.new(BaseError)
|
|
68
|
+
|
|
62
69
|
# Raised when during messages producing something bad happened inline
|
|
63
70
|
class ProduceManyError < ProduceError
|
|
64
71
|
attr_reader :dispatched
|
|
@@ -13,11 +13,13 @@ module WaterDrop
|
|
|
13
13
|
# @param producer_id [String]
|
|
14
14
|
# @param client_name [String] rdkafka client name
|
|
15
15
|
# @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
|
|
16
|
-
|
|
16
|
+
# @param statistics_decorator [Karafka::Core::Monitoring::StatisticsDecorator] decorator
|
|
17
|
+
# instance to use for computing deltas and freeze durations on raw librdkafka statistics
|
|
18
|
+
def initialize(producer_id, client_name, monitor, statistics_decorator)
|
|
17
19
|
@producer_id = producer_id
|
|
18
20
|
@client_name = client_name
|
|
19
21
|
@monitor = monitor
|
|
20
|
-
@statistics_decorator =
|
|
22
|
+
@statistics_decorator = statistics_decorator
|
|
21
23
|
end
|
|
22
24
|
|
|
23
25
|
# Emits decorated statistics to the monitor
|
|
@@ -6,6 +6,15 @@ module WaterDrop
|
|
|
6
6
|
# By default uses our internal notifications bus but can be used with
|
|
7
7
|
# `ActiveSupport::Notifications` as well
|
|
8
8
|
class Monitor < ::Karafka::Core::Monitoring::Monitor
|
|
9
|
+
# Event name for librdkafka statistics emissions
|
|
10
|
+
STATISTICS_EVENT = "statistics.emitted"
|
|
11
|
+
|
|
12
|
+
# Method name a listener object must implement in order to receive
|
|
13
|
+
# `statistics.emitted` events via object-based subscription
|
|
14
|
+
STATISTICS_LISTENER_METHOD = :on_statistics_emitted
|
|
15
|
+
|
|
16
|
+
private_constant :STATISTICS_EVENT, :STATISTICS_LISTENER_METHOD
|
|
17
|
+
|
|
9
18
|
# @param notifications_bus [Object] either our internal notifications bus or
|
|
10
19
|
# `ActiveSupport::Notifications`
|
|
11
20
|
# @param namespace [String, nil] namespace for events or nil if no namespace
|
|
@@ -14,6 +23,58 @@ module WaterDrop
|
|
|
14
23
|
namespace = nil
|
|
15
24
|
)
|
|
16
25
|
super
|
|
26
|
+
@statistics_listeners_frozen = false
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Marks this monitor as no longer accepting new subscriptions to `statistics.emitted`.
|
|
30
|
+
# Called by the rdkafka client builder when it decides to leave librdkafka statistics
|
|
31
|
+
# disabled (because no listener was present at build time). Any subsequent attempt to
|
|
32
|
+
# subscribe to `statistics.emitted` — either via a block or via a listener object that
|
|
33
|
+
# responds to `on_statistics_emitted` — will raise
|
|
34
|
+
# `WaterDrop::Errors::StatisticsNotEnabledError` instead of silently doing nothing.
|
|
35
|
+
def freeze_statistics_listeners!
|
|
36
|
+
@statistics_listeners_frozen = true
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Subscribes to the notifications bus, raising if the user tries to subscribe to
|
|
40
|
+
# `statistics.emitted` after statistics have been disabled at client build time. This
|
|
41
|
+
# prevents the "silent nothing" pitfall where a user expects statistics but no events
|
|
42
|
+
# ever arrive because librdkafka statistics were turned off entirely.
|
|
43
|
+
#
|
|
44
|
+
# @param event_id_or_listener [String, Symbol, Object] event id (with block) or listener
|
|
45
|
+
# @param block [Proc, nil] handler block when subscribing to a named event
|
|
46
|
+
# @raise [WaterDrop::Errors::StatisticsNotEnabledError] when the subscription targets
|
|
47
|
+
# `statistics.emitted` and this monitor has been frozen for statistics
|
|
48
|
+
def subscribe(event_id_or_listener, &block)
|
|
49
|
+
if @statistics_listeners_frozen && targets_statistics?(event_id_or_listener, block)
|
|
50
|
+
raise Errors::StatisticsNotEnabledError, <<~MSG.tr("\n", " ").strip
|
|
51
|
+
Cannot subscribe to `statistics.emitted` after the producer has been connected.
|
|
52
|
+
Statistics are disabled because no listener was subscribed before the underlying
|
|
53
|
+
rdkafka client was built, so librdkafka is not emitting statistics at all.
|
|
54
|
+
Subscribe your listener BEFORE the first producer use (before the underlying
|
|
55
|
+
client is lazily initialized), or explicitly keep statistics enabled by leaving
|
|
56
|
+
a listener in place at build time.
|
|
57
|
+
MSG
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
super
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
# Determines whether a subscription call targets `statistics.emitted`. Handles both
|
|
66
|
+
# block-based subscription (where the first argument is the event id string) and
|
|
67
|
+
# listener-object subscription (where the listener responds to `on_statistics_emitted`).
|
|
68
|
+
#
|
|
69
|
+
# @param event_id_or_listener [String, Symbol, Object]
|
|
70
|
+
# @param block [Proc, nil]
|
|
71
|
+
# @return [Boolean]
|
|
72
|
+
def targets_statistics?(event_id_or_listener, block)
|
|
73
|
+
if block
|
|
74
|
+
event_id_or_listener.to_s == STATISTICS_EVENT
|
|
75
|
+
else
|
|
76
|
+
event_id_or_listener.respond_to?(STATISTICS_LISTENER_METHOD)
|
|
77
|
+
end
|
|
17
78
|
end
|
|
18
79
|
end
|
|
19
80
|
end
|
|
@@ -18,6 +18,7 @@ module WaterDrop
|
|
|
18
18
|
# This ensures the producer is fully drained and removed from the poller
|
|
19
19
|
# before returning control to the caller, preventing race conditions.
|
|
20
20
|
class Latch
|
|
21
|
+
# Initializes a new latch in the unreleased state.
|
|
21
22
|
def initialize
|
|
22
23
|
@mutex = Mutex.new
|
|
23
24
|
@cv = ConditionVariable.new
|
|
@@ -47,6 +47,8 @@ module WaterDrop
|
|
|
47
47
|
# @return [Integer] unique identifier for this poller instance
|
|
48
48
|
attr_reader :id
|
|
49
49
|
|
|
50
|
+
# Initializes an empty poller with no registered producers. The background thread is
|
|
51
|
+
# not started until the first producer is registered.
|
|
50
52
|
def initialize
|
|
51
53
|
@id = self.class.next_id
|
|
52
54
|
@mutex = Mutex.new
|
|
@@ -142,6 +144,8 @@ module WaterDrop
|
|
|
142
144
|
# This matches the threaded polling behavior which drains without timeout
|
|
143
145
|
# @param producer [WaterDrop::Producer] the producer instance
|
|
144
146
|
def unregister(producer)
|
|
147
|
+
ensure_same_process!
|
|
148
|
+
|
|
145
149
|
state, thread = @mutex.synchronize { [@producers[producer.id], @thread] }
|
|
146
150
|
|
|
147
151
|
return unless state
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module WaterDrop
|
|
4
|
+
class Producer
|
|
5
|
+
# Component for tombstone producer operations
|
|
6
|
+
#
|
|
7
|
+
# Tombstone records are Kafka messages with a nil payload, used to signal deletion of a key
|
|
8
|
+
# in compacted topics. This module provides a dedicated API so users don't have to manually
|
|
9
|
+
# construct `produce_*(topic:, key:, payload: nil, ...)` calls.
|
|
10
|
+
module Tombstone
|
|
11
|
+
# Produces a tombstone message to Kafka and waits for it to be delivered
|
|
12
|
+
#
|
|
13
|
+
# @param message [Hash] hash with at least `:topic`, `:key`, and `:partition` keys.
|
|
14
|
+
# `:payload` is not accepted — it will be silently removed if present.
|
|
15
|
+
#
|
|
16
|
+
# @return [Rdkafka::Producer::DeliveryReport] delivery report
|
|
17
|
+
#
|
|
18
|
+
# @raise [Errors::MessageInvalidError] When `:key` or `:partition` is missing
|
|
19
|
+
def tombstone_sync(message)
|
|
20
|
+
produce_sync(prepare_tombstone(message))
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Produces a tombstone message to Kafka and does not wait for results
|
|
24
|
+
#
|
|
25
|
+
# @param message [Hash] hash with at least `:topic`, `:key`, and `:partition` keys.
|
|
26
|
+
# `:payload` is not accepted — it will be silently removed if present.
|
|
27
|
+
#
|
|
28
|
+
# @return [Rdkafka::Producer::DeliveryHandle] delivery handle
|
|
29
|
+
#
|
|
30
|
+
# @raise [Errors::MessageInvalidError] When `:key` or `:partition` is missing
|
|
31
|
+
def tombstone_async(message)
|
|
32
|
+
produce_async(prepare_tombstone(message))
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Produces many tombstone messages to Kafka and waits for them to be delivered
|
|
36
|
+
#
|
|
37
|
+
# @param messages [Array<Hash>] array of hashes, each with `:topic`, `:key`, and
|
|
38
|
+
# `:partition` keys
|
|
39
|
+
#
|
|
40
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle>] delivery handles
|
|
41
|
+
#
|
|
42
|
+
# @raise [Errors::MessageInvalidError] When any message is missing `:key` or `:partition`
|
|
43
|
+
def tombstone_many_sync(messages)
|
|
44
|
+
produce_many_sync(messages.map { |message| prepare_tombstone(message) })
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Produces many tombstone messages to Kafka and does not wait for them to be delivered
|
|
48
|
+
#
|
|
49
|
+
# @param messages [Array<Hash>] array of hashes, each with `:topic`, `:key`, and
|
|
50
|
+
# `:partition` keys
|
|
51
|
+
#
|
|
52
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle>] delivery handles
|
|
53
|
+
#
|
|
54
|
+
# @raise [Errors::MessageInvalidError] When any message is missing `:key` or `:partition`
|
|
55
|
+
def tombstone_many_async(messages)
|
|
56
|
+
produce_many_async(messages.map { |message| prepare_tombstone(message) })
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
# Validates and prepares a tombstone message by ensuring required keys are present
|
|
62
|
+
# and setting payload to nil
|
|
63
|
+
#
|
|
64
|
+
# @param message [Hash] the original message hash
|
|
65
|
+
# @return [Hash] a new message hash with payload set to nil
|
|
66
|
+
# @raise [Errors::MessageInvalidError] when key or partition is missing
|
|
67
|
+
def prepare_tombstone(message)
|
|
68
|
+
message = message.dup
|
|
69
|
+
message.delete(:payload)
|
|
70
|
+
message[:payload] = nil
|
|
71
|
+
|
|
72
|
+
Contracts::Tombstone.new.validate!(message, Errors::MessageInvalidError)
|
|
73
|
+
|
|
74
|
+
message
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -71,6 +71,7 @@ module WaterDrop
|
|
|
71
71
|
Async,
|
|
72
72
|
Buffer,
|
|
73
73
|
Sync,
|
|
74
|
+
Tombstone,
|
|
74
75
|
Transactions
|
|
75
76
|
].each do |scope|
|
|
76
77
|
scope.instance_methods(false).each do |method_name|
|
|
@@ -81,7 +82,7 @@ module WaterDrop
|
|
|
81
82
|
#
|
|
82
83
|
# @producer.produce_async(*args, &block)
|
|
83
84
|
# ensure
|
|
84
|
-
# ref
|
|
85
|
+
# ref.delete(@producer.id)
|
|
85
86
|
# end
|
|
86
87
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
|
87
88
|
def #{method_name}(*args, &block)
|
|
@@ -90,7 +91,7 @@ module WaterDrop
|
|
|
90
91
|
|
|
91
92
|
@producer.#{method_name}(*args, &block)
|
|
92
93
|
ensure
|
|
93
|
-
ref
|
|
94
|
+
ref.delete(@producer.id)
|
|
94
95
|
end
|
|
95
96
|
RUBY
|
|
96
97
|
end
|
data/lib/waterdrop/producer.rb
CHANGED
data/lib/waterdrop/version.rb
CHANGED
data/package-lock.json
CHANGED
|
@@ -217,9 +217,9 @@
|
|
|
217
217
|
}
|
|
218
218
|
},
|
|
219
219
|
"node_modules/picomatch": {
|
|
220
|
-
"version": "2.3.
|
|
221
|
-
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.
|
|
222
|
-
"integrity": "sha512-
|
|
220
|
+
"version": "2.3.2",
|
|
221
|
+
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
|
|
222
|
+
"integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
|
|
223
223
|
"dev": true,
|
|
224
224
|
"license": "MIT",
|
|
225
225
|
"engines": {
|
data/renovate.json
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
"Gemfile",
|
|
9
9
|
"Gemfile.lint",
|
|
10
10
|
"waterdrop.gemspec",
|
|
11
|
-
"
|
|
11
|
+
"test/integrations/**/Gemfile",
|
|
12
12
|
".github/workflows/**",
|
|
13
13
|
"docker-compose*.yml",
|
|
14
14
|
"package.json"
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
},
|
|
29
29
|
{
|
|
30
30
|
"matchFileNames": [
|
|
31
|
-
"
|
|
31
|
+
"test/integrations/**/Gemfile"
|
|
32
32
|
],
|
|
33
33
|
"groupName": "integration test dependencies",
|
|
34
34
|
"commitMessageTopic": "integration test dependencies"
|
|
@@ -45,5 +45,8 @@
|
|
|
45
45
|
"minimumReleaseAge": "7 days",
|
|
46
46
|
"labels": [
|
|
47
47
|
"dependencies"
|
|
48
|
-
]
|
|
48
|
+
],
|
|
49
|
+
"lockFileMaintenance": {
|
|
50
|
+
"enabled": true
|
|
51
|
+
}
|
|
49
52
|
}
|
data/waterdrop.gemspec
CHANGED
|
@@ -16,13 +16,13 @@ Gem::Specification.new do |spec|
|
|
|
16
16
|
spec.description = spec.summary
|
|
17
17
|
spec.licenses = %w[LGPL-3.0-only Commercial]
|
|
18
18
|
|
|
19
|
-
spec.add_dependency "karafka-core", ">= 2.
|
|
19
|
+
spec.add_dependency "karafka-core", ">= 2.5.12", "< 3.0.0"
|
|
20
20
|
spec.add_dependency "karafka-rdkafka", ">= 0.24.0"
|
|
21
21
|
spec.add_dependency "zeitwerk", "~> 2.3"
|
|
22
22
|
|
|
23
23
|
spec.required_ruby_version = ">= 3.2.0"
|
|
24
24
|
|
|
25
|
-
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec)/}) }
|
|
25
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match?(%r{^(spec|test|\.github|log)/}) }
|
|
26
26
|
spec.executables = []
|
|
27
27
|
spec.require_paths = %w[lib]
|
|
28
28
|
|