waterdrop 2.8.14 → 2.8.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +215 -36
- data/.github/workflows/push.yml +3 -3
- data/.github/workflows/trigger-wiki-refresh.yml +1 -1
- data/.github/workflows/verify-action-pins.yml +1 -1
- data/.gitignore +0 -1
- data/.rubocop.yml +87 -0
- data/.ruby-version +1 -1
- data/.yard-lint.yml +172 -72
- data/CHANGELOG.md +13 -0
- data/Gemfile +8 -9
- data/Gemfile.lint +14 -0
- data/Gemfile.lint.lock +123 -0
- data/Gemfile.lock +27 -28
- data/README.md +1 -1
- data/Rakefile +2 -2
- data/bin/integrations +28 -29
- data/bin/verify_topics_naming +8 -8
- data/config/locales/errors.yml +12 -0
- data/docker-compose.oauth.yml +56 -0
- data/docker-compose.yml +1 -1
- data/lib/waterdrop/clients/dummy.rb +9 -0
- data/lib/waterdrop/clients/rdkafka.rb +13 -2
- data/lib/waterdrop/config.rb +32 -5
- data/lib/waterdrop/connection_pool.rb +13 -11
- data/lib/waterdrop/contracts/config.rb +30 -6
- data/lib/waterdrop/contracts/message.rb +2 -2
- data/lib/waterdrop/contracts/poller_config.rb +26 -0
- data/lib/waterdrop/contracts/transactional_offset.rb +2 -2
- data/lib/waterdrop/contracts/variant.rb +18 -18
- data/lib/waterdrop/errors.rb +3 -0
- data/lib/waterdrop/instrumentation/callbacks/delivery.rb +8 -8
- data/lib/waterdrop/instrumentation/callbacks/error.rb +5 -5
- data/lib/waterdrop/instrumentation/callbacks/oauthbearer_token_refresh.rb +4 -4
- data/lib/waterdrop/instrumentation/callbacks/statistics.rb +18 -5
- data/lib/waterdrop/instrumentation/idle_disconnector_listener.rb +4 -4
- data/lib/waterdrop/instrumentation/logger_listener.rb +10 -10
- data/lib/waterdrop/instrumentation/notifications.rb +3 -0
- data/lib/waterdrop/instrumentation/vendors/datadog/metrics_listener.rb +19 -19
- data/lib/waterdrop/polling/config.rb +52 -0
- data/lib/waterdrop/polling/latch.rb +49 -0
- data/lib/waterdrop/polling/poller.rb +415 -0
- data/lib/waterdrop/polling/queue_pipe.rb +63 -0
- data/lib/waterdrop/polling/state.rb +151 -0
- data/lib/waterdrop/polling.rb +22 -0
- data/lib/waterdrop/producer/async.rb +6 -6
- data/lib/waterdrop/producer/buffer.rb +8 -8
- data/lib/waterdrop/producer/idempotence.rb +3 -3
- data/lib/waterdrop/producer/sync.rb +15 -8
- data/lib/waterdrop/producer/testing.rb +1 -1
- data/lib/waterdrop/producer/transactions.rb +6 -6
- data/lib/waterdrop/producer.rb +113 -30
- data/lib/waterdrop/version.rb +1 -1
- data/lib/waterdrop.rb +15 -10
- data/package-lock.json +331 -0
- data/package.json +9 -0
- data/renovate.json +25 -6
- data/waterdrop.gemspec +23 -23
- metadata +17 -5
- data/.coditsu/ci.yml +0 -3
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module WaterDrop
|
|
4
|
+
# Namespace for FD-based polling components
|
|
5
|
+
# Contains the global Poller singleton and State class for managing producer polling
|
|
6
|
+
module Polling
|
|
7
|
+
# Global poller singleton that manages a single polling thread for all FD-mode producers
|
|
8
|
+
# This replaces librdkafka's native background polling threads with a single Ruby thread
|
|
9
|
+
# that uses IO.select for efficient multiplexing
|
|
10
|
+
#
|
|
11
|
+
# Spawning one thread per producer is acceptable for 1-2 producers but in case of a system
|
|
12
|
+
# with several (transactional for example) the cost becomes bigger and bigger.
|
|
13
|
+
#
|
|
14
|
+
# This implementation handles things by being event-driven instead of GVL releasing blocking.
|
|
15
|
+
#
|
|
16
|
+
# @note Newly registered producers may experience up to 1 second delay before their first
|
|
17
|
+
# poll cycle, as the poller thread only rebuilds its IO list when IO.select times out.
|
|
18
|
+
# This is acceptable because producers are expected to be long-lived and the initial
|
|
19
|
+
# connection overhead to Kafka typically exceeds this delay anyway.
|
|
20
|
+
class Poller
|
|
21
|
+
include Singleton
|
|
22
|
+
include ::Karafka::Core::Helpers::Time
|
|
23
|
+
|
|
24
|
+
# Make new public so users can create dedicated poller instances for isolation
|
|
25
|
+
# The singleton instance remains available via Poller.instance for the default behavior
|
|
26
|
+
public_class_method :new
|
|
27
|
+
|
|
28
|
+
# Mutex for thread-safe ID generation - initialized at class load time
|
|
29
|
+
# to avoid race conditions with lazy initialization
|
|
30
|
+
ID_MUTEX = Mutex.new
|
|
31
|
+
|
|
32
|
+
# Counter for generating unique poller IDs
|
|
33
|
+
@id_counter = 0
|
|
34
|
+
|
|
35
|
+
class << self
|
|
36
|
+
# Generates incremental IDs for poller instances (starting from 0)
|
|
37
|
+
# @return [Integer] next poller ID
|
|
38
|
+
def next_id
|
|
39
|
+
ID_MUTEX.synchronize do
|
|
40
|
+
id = @id_counter
|
|
41
|
+
@id_counter += 1
|
|
42
|
+
id
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# @return [Integer] unique identifier for this poller instance
|
|
48
|
+
attr_reader :id
|
|
49
|
+
|
|
50
|
+
def initialize
|
|
51
|
+
@id = self.class.next_id
|
|
52
|
+
@mutex = Mutex.new
|
|
53
|
+
@producers = {}
|
|
54
|
+
@thread = nil
|
|
55
|
+
@shutdown = false
|
|
56
|
+
@pid = Process.pid
|
|
57
|
+
|
|
58
|
+
# Cached collections - rebuilt only when producers change
|
|
59
|
+
@cached_ios = []
|
|
60
|
+
@cached_io_to_state = {}
|
|
61
|
+
@cached_states = []
|
|
62
|
+
@cached_result = nil
|
|
63
|
+
@ios_dirty = true
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Checks if the current thread is the poller thread
|
|
67
|
+
# Used to detect when close is called from within a callback to avoid deadlock
|
|
68
|
+
# @return [Boolean] true if current thread is the poller thread
|
|
69
|
+
def in_poller_thread?
|
|
70
|
+
Thread.current == @thread
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Checks if the poller thread is alive
|
|
74
|
+
# @return [Boolean] true if the poller thread is running
|
|
75
|
+
def alive?
|
|
76
|
+
@thread&.alive? || false
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Returns the number of registered producers
|
|
80
|
+
# @return [Integer] number of producers
|
|
81
|
+
def count
|
|
82
|
+
@mutex.synchronize { @producers.size }
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Shuts down the poller and resets state
|
|
86
|
+
# @note This is primarily for testing to reset singleton state between tests
|
|
87
|
+
def shutdown!
|
|
88
|
+
@mutex.synchronize { @shutdown = true }
|
|
89
|
+
|
|
90
|
+
thread = @thread
|
|
91
|
+
if thread&.alive?
|
|
92
|
+
thread.join(5)
|
|
93
|
+
thread.kill if thread.alive?
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
@mutex.synchronize do
|
|
97
|
+
@producers.each_value { |state| state.close unless state.closed? }
|
|
98
|
+
@producers.clear
|
|
99
|
+
@thread = nil
|
|
100
|
+
@shutdown = false
|
|
101
|
+
@ios_dirty = true
|
|
102
|
+
@cached_ios = []
|
|
103
|
+
@cached_io_to_state = {}
|
|
104
|
+
@cached_states = []
|
|
105
|
+
@cached_result = nil
|
|
106
|
+
@poll_timeout_s = nil
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Registers a producer with the poller
|
|
111
|
+
# @param producer [WaterDrop::Producer] the producer instance
|
|
112
|
+
# @param client [Rdkafka::Producer] the rdkafka client
|
|
113
|
+
def register(producer, client)
|
|
114
|
+
ensure_same_process!
|
|
115
|
+
|
|
116
|
+
state = State.new(
|
|
117
|
+
producer.id,
|
|
118
|
+
client,
|
|
119
|
+
producer.monitor,
|
|
120
|
+
producer.config.polling.fd.max_time,
|
|
121
|
+
producer.config.polling.fd.periodic_poll_interval
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
@mutex.synchronize do
|
|
125
|
+
@producers[producer.id] = state
|
|
126
|
+
@ios_dirty = true
|
|
127
|
+
# Reset shutdown flag in case thread is exiting but hasn't yet
|
|
128
|
+
# This prevents race where new producer is closed by exiting thread
|
|
129
|
+
@shutdown = false
|
|
130
|
+
ensure_thread_running!
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
producer.monitor.instrument(
|
|
134
|
+
"poller.producer_registered",
|
|
135
|
+
producer_id: producer.id
|
|
136
|
+
)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Unregisters a producer from the poller
|
|
140
|
+
# This method blocks until the producer is fully removed from the poller
|
|
141
|
+
# to prevent race conditions when disconnect/reconnect happens in quick succession
|
|
142
|
+
# This matches the threaded polling behavior which drains without timeout
|
|
143
|
+
# @param producer [WaterDrop::Producer] the producer instance
|
|
144
|
+
def unregister(producer)
|
|
145
|
+
state, thread = @mutex.synchronize { [@producers[producer.id], @thread] }
|
|
146
|
+
|
|
147
|
+
return unless state
|
|
148
|
+
|
|
149
|
+
# Signal the poller thread to handle removal
|
|
150
|
+
state.signal_close
|
|
151
|
+
|
|
152
|
+
# Wait for the state to be fully closed by the poller thread
|
|
153
|
+
# This prevents race conditions where a new registration with the same
|
|
154
|
+
# producer_id could be deleted by a pending close signal
|
|
155
|
+
# Skip waiting if called from within the poller thread itself (e.g., from a callback)
|
|
156
|
+
# to avoid deadlock - the poller thread can't wait for itself
|
|
157
|
+
# The cleanup will happen after the callback returns
|
|
158
|
+
state.wait_for_close unless Thread.current == thread
|
|
159
|
+
|
|
160
|
+
producer.monitor.instrument(
|
|
161
|
+
"poller.producer_unregistered",
|
|
162
|
+
producer_id: producer.id
|
|
163
|
+
)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
private
|
|
167
|
+
|
|
168
|
+
# Ensures we're in the same process (for fork safety)
|
|
169
|
+
def ensure_same_process!
|
|
170
|
+
return if @pid == Process.pid
|
|
171
|
+
|
|
172
|
+
# Reset state after fork - parent's thread and producers are not valid in child
|
|
173
|
+
@mutex = Mutex.new
|
|
174
|
+
@producers = {}
|
|
175
|
+
@thread = nil
|
|
176
|
+
@shutdown = false
|
|
177
|
+
@pid = Process.pid
|
|
178
|
+
@cached_ios = []
|
|
179
|
+
@cached_io_to_state = {}
|
|
180
|
+
@cached_states = []
|
|
181
|
+
@cached_result = nil
|
|
182
|
+
@ios_dirty = true
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Ensures the polling thread is running
|
|
186
|
+
# Must be called within @mutex.synchronize
|
|
187
|
+
def ensure_thread_running!
|
|
188
|
+
return if @thread&.alive?
|
|
189
|
+
|
|
190
|
+
@shutdown = false
|
|
191
|
+
@thread = Thread.new { polling_loop }
|
|
192
|
+
@thread.name = "waterdrop.poller##{@id}"
|
|
193
|
+
@thread.priority = Config.config.thread_priority
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Main polling loop that runs in a dedicated thread
|
|
197
|
+
def polling_loop
|
|
198
|
+
backoff_ms = 0
|
|
199
|
+
|
|
200
|
+
loop do
|
|
201
|
+
break if @shutdown
|
|
202
|
+
|
|
203
|
+
# Apply backoff from previous error
|
|
204
|
+
if backoff_ms > 0
|
|
205
|
+
sleep(backoff_ms / 1_000.0)
|
|
206
|
+
backoff_ms = 0
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Collect readable IOs (queue FDs)
|
|
210
|
+
readable_ios, io_to_state = collect_readable_ios
|
|
211
|
+
|
|
212
|
+
# Exit when no producers registered
|
|
213
|
+
# New registrations will start a fresh thread via ensure_thread_running!
|
|
214
|
+
break if readable_ios.empty?
|
|
215
|
+
|
|
216
|
+
poll_with_select(readable_ios, io_to_state)
|
|
217
|
+
rescue => e
|
|
218
|
+
# Report error and apply exponential backoff to prevent spam
|
|
219
|
+
broadcast_error("poller.polling_loop", e)
|
|
220
|
+
backoff_ms =
|
|
221
|
+
if backoff_ms.zero?
|
|
222
|
+
Config.config.backoff_min
|
|
223
|
+
else
|
|
224
|
+
[backoff_ms * 2, Config.config.backoff_max].min
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
ensure
|
|
228
|
+
# Clear thread reference first so new registrations will start a fresh thread
|
|
229
|
+
# This prevents race where register sees old thread as alive during cleanup
|
|
230
|
+
@mutex.synchronize { @thread = nil }
|
|
231
|
+
|
|
232
|
+
# When the poller thread exits (error or clean shutdown), close all remaining states
|
|
233
|
+
# This releases any latches that might be waiting in unregister calls
|
|
234
|
+
close_all_states
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Broadcasts an error to all registered producers' monitors
|
|
238
|
+
# @param type [String] error type identifier
|
|
239
|
+
# @param error [Exception] the error to report
|
|
240
|
+
def broadcast_error(type, error)
|
|
241
|
+
@cached_states.each do |state|
|
|
242
|
+
state.monitor.instrument(
|
|
243
|
+
"error.occurred",
|
|
244
|
+
type: type,
|
|
245
|
+
error: error,
|
|
246
|
+
producer_id: state.producer_id
|
|
247
|
+
)
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Collects all IOs to monitor and builds a mapping from IO to State
|
|
252
|
+
# Uses cached arrays when possible to avoid allocations in the hot path
|
|
253
|
+
# @return [Array<Array<IO>, Hash{IO => State}, Array<State>>] tuple of ios, io-to-state map, states
|
|
254
|
+
def collect_readable_ios
|
|
255
|
+
# Fast path: return cached result if not dirty (no mutex needed)
|
|
256
|
+
# Safe because @cached_result is frozen and assigned atomically
|
|
257
|
+
return @cached_result unless @ios_dirty
|
|
258
|
+
|
|
259
|
+
@mutex.synchronize do
|
|
260
|
+
@cached_ios = []
|
|
261
|
+
@cached_io_to_state = {}
|
|
262
|
+
@cached_states = []
|
|
263
|
+
|
|
264
|
+
@producers.each_value do |state|
|
|
265
|
+
io = state.io
|
|
266
|
+
@cached_ios << io
|
|
267
|
+
@cached_io_to_state[io] = state
|
|
268
|
+
@cached_states << state
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
@cached_result = [@cached_ios, @cached_io_to_state, @cached_states].freeze
|
|
272
|
+
@ios_dirty = false
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
@cached_result
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Poll producers using IO.select for efficient multiplexing
|
|
279
|
+
# @param readable_ios [Array<IO>] IOs to monitor
|
|
280
|
+
# @param io_to_state [Hash{IO => State}] mapping from IO to state
|
|
281
|
+
def poll_with_select(readable_ios, io_to_state)
|
|
282
|
+
begin
|
|
283
|
+
ready = IO.select(readable_ios, nil, nil, poll_timeout_s)
|
|
284
|
+
rescue IOError, Errno::EBADF
|
|
285
|
+
# An IO was closed - mark dirty to rebuild on next iteration
|
|
286
|
+
@ios_dirty = true
|
|
287
|
+
return
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
if ready.nil?
|
|
291
|
+
# Timeout: poll ALL producers to ensure OAuth/stats fire
|
|
292
|
+
poll_all_producers
|
|
293
|
+
else
|
|
294
|
+
# FDs ready: handle close signals and poll active producers
|
|
295
|
+
any_polled = false
|
|
296
|
+
|
|
297
|
+
ready[0].each do |io|
|
|
298
|
+
state = io_to_state[io]
|
|
299
|
+
next unless state
|
|
300
|
+
|
|
301
|
+
# Drain the pipe first (clears librdkafka signals + our signals)
|
|
302
|
+
state.drain
|
|
303
|
+
|
|
304
|
+
# Check if this producer is closing (flag set before signal)
|
|
305
|
+
if state.closing?
|
|
306
|
+
handle_close_signal(state)
|
|
307
|
+
else
|
|
308
|
+
poll_producer(state)
|
|
309
|
+
# Check if callback signaled close while we were polling
|
|
310
|
+
# (e.g., user code closed producer from within delivery callback)
|
|
311
|
+
if state.closing?
|
|
312
|
+
handle_close_signal(state)
|
|
313
|
+
else
|
|
314
|
+
any_polled = true
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# Check for stale producers when actively polling
|
|
320
|
+
# Skip when single producer (most common case) - no other producers to become stale
|
|
321
|
+
# (ensures OAuth/stats fire for idle producers when others are busy)
|
|
322
|
+
poll_stale_producers if any_polled && @cached_states.size > 1
|
|
323
|
+
end
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# Polls all registered producers
|
|
327
|
+
# Called when IO.select times out to ensure periodic polling happens
|
|
328
|
+
# This ensures OAuth token refresh and statistics callbacks fire for all producers
|
|
329
|
+
def poll_all_producers
|
|
330
|
+
@cached_states.each { |state| poll_producer(state) }
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# Polls producers that haven't been polled recently
|
|
334
|
+
# Called when processing continue signals to prevent starvation of idle producers
|
|
335
|
+
# when one producer is very busy
|
|
336
|
+
# Each State internally throttles the check to avoid excessive overhead
|
|
337
|
+
def poll_stale_producers
|
|
338
|
+
@cached_states.each do |state|
|
|
339
|
+
poll_producer(state) if state.needs_periodic_poll?
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# Drains the producer's event queue by polling until empty or time quanta exceeded
|
|
344
|
+
# @param state [State] the producer state
|
|
345
|
+
def poll_producer(state)
|
|
346
|
+
# state.poll returns:
|
|
347
|
+
# - true when queue is empty (fully drained)
|
|
348
|
+
# - false when timeout hit (more events may remain)
|
|
349
|
+
drained = state.poll
|
|
350
|
+
state.mark_polled!
|
|
351
|
+
|
|
352
|
+
# Hit time limit but still have events - signal to continue polling
|
|
353
|
+
state.signal_continue unless drained
|
|
354
|
+
rescue Rdkafka::ClosedProducerError
|
|
355
|
+
# Producer was closed, will be cleaned up
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# Handles a close signal from a producer
|
|
359
|
+
# @param state [State] the producer state
|
|
360
|
+
def handle_close_signal(state)
|
|
361
|
+
# Drain remaining events before closing
|
|
362
|
+
# This matches rdkafka's native polling thread behavior: keep polling until outq_len is zero
|
|
363
|
+
drain_producer_queue(state)
|
|
364
|
+
|
|
365
|
+
# Remove producer from registry and clean up
|
|
366
|
+
# If this was the last producer, signal shutdown to stop the thread immediately
|
|
367
|
+
@mutex.synchronize do
|
|
368
|
+
@producers.delete(state.producer_id)
|
|
369
|
+
@ios_dirty = true
|
|
370
|
+
|
|
371
|
+
# Stop thread immediately when last producer unregisters to prevent resource leakage
|
|
372
|
+
@shutdown = true if @producers.empty?
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
state.close
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
# Closes all remaining producer states
|
|
379
|
+
# Called when the poller thread exits to release any pending latches
|
|
380
|
+
# This prevents deadlocks if producers are waiting in unregister
|
|
381
|
+
def close_all_states
|
|
382
|
+
states = @mutex.synchronize do
|
|
383
|
+
to_close = @producers.values.dup
|
|
384
|
+
@producers.clear
|
|
385
|
+
@ios_dirty = true
|
|
386
|
+
to_close
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
states.each do |state|
|
|
390
|
+
state.close unless state.closed?
|
|
391
|
+
rescue
|
|
392
|
+
# Ignore errors during cleanup
|
|
393
|
+
end
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# Drains the producer's event queue completely before closing
|
|
397
|
+
# Matches rdkafka's native polling thread behavior: keep polling until queue is empty
|
|
398
|
+
# @param state [State] the producer state
|
|
399
|
+
def drain_producer_queue(state)
|
|
400
|
+
loop do
|
|
401
|
+
break if state.queue_empty?
|
|
402
|
+
|
|
403
|
+
state.poll
|
|
404
|
+
end
|
|
405
|
+
rescue Rdkafka::ClosedProducerError
|
|
406
|
+
# Producer was already closed, nothing more to drain
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
# @return [Float] poll_timeout converted to seconds (cached)
|
|
410
|
+
def poll_timeout_s
|
|
411
|
+
@poll_timeout_s ||= Config.config.poll_timeout / 1_000.0
|
|
412
|
+
end
|
|
413
|
+
end
|
|
414
|
+
end
|
|
415
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module WaterDrop
|
|
4
|
+
module Polling
|
|
5
|
+
# A pipe connected to librdkafka's queue event notification system
|
|
6
|
+
# When events (delivery reports, statistics) arrive, librdkafka writes to the pipe
|
|
7
|
+
# allowing IO.select to wake up immediately
|
|
8
|
+
#
|
|
9
|
+
# This pipe is also used by WaterDrop to signal:
|
|
10
|
+
# - Continue: when poll hits time limit but more events remain
|
|
11
|
+
# - Close: when producer is being closed (combined with @closing flag in State)
|
|
12
|
+
#
|
|
13
|
+
# Reusing the same pipe reduces file descriptors and IO.select monitoring overhead
|
|
14
|
+
class QueuePipe
|
|
15
|
+
# @return [IO] the readable end of the pipe for use with IO.select
|
|
16
|
+
attr_reader :reader
|
|
17
|
+
|
|
18
|
+
# Creates a new queue pipe and connects it to the client's event queue
|
|
19
|
+
# @param client [Rdkafka::Producer] the rdkafka client
|
|
20
|
+
# @raise [StandardError] if enable_queue_io_events fails
|
|
21
|
+
def initialize(client)
|
|
22
|
+
@reader, @writer = IO.pipe
|
|
23
|
+
|
|
24
|
+
# Tell librdkafka to write to our pipe when events arrive on the main queue
|
|
25
|
+
client.enable_queue_io_events(@writer.fileno)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Signals by writing a byte to the pipe
|
|
29
|
+
# Used to wake IO.select for continue/close signals
|
|
30
|
+
# Thread-safe and non-blocking; silently ignores errors
|
|
31
|
+
def signal
|
|
32
|
+
@writer.write_nonblock("W", exception: false)
|
|
33
|
+
rescue IOError, Errno::EBADF
|
|
34
|
+
# Pipe closed
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Drains all pending bytes from the pipe
|
|
38
|
+
# Called after IO.select returns to clear the notification
|
|
39
|
+
# Uses a single large read to drain in one syscall (pipe buffers are typically 64KB)
|
|
40
|
+
def drain
|
|
41
|
+
@reader.read_nonblock(1_048_576, exception: false)
|
|
42
|
+
rescue IOError, Errno::EBADF
|
|
43
|
+
# Pipe closed during drain
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Closes both ends of the pipe
|
|
47
|
+
def close
|
|
48
|
+
close_io(@reader)
|
|
49
|
+
close_io(@writer)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
# Safely closes an IO object
|
|
55
|
+
# @param io [IO] the IO to close
|
|
56
|
+
def close_io(io)
|
|
57
|
+
io.close
|
|
58
|
+
rescue IOError, Errno::EBADF
|
|
59
|
+
# Already closed, ignore
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module WaterDrop
|
|
4
|
+
module Polling
|
|
5
|
+
# Holds the state for a registered producer in the poller
|
|
6
|
+
# Each producer has its own State instance that tracks:
|
|
7
|
+
# - The producer ID and client reference
|
|
8
|
+
# - Queue pipe for IO.select monitoring (shared with librdkafka for efficiency)
|
|
9
|
+
# - Configuration (max poll time)
|
|
10
|
+
# - Last poll time for staleness detection
|
|
11
|
+
# - Closing flag for shutdown signaling
|
|
12
|
+
class State
|
|
13
|
+
include ::Karafka::Core::Helpers::Time
|
|
14
|
+
|
|
15
|
+
# @return [String] producer ID
|
|
16
|
+
attr_reader :producer_id
|
|
17
|
+
|
|
18
|
+
# @return [IO] the queue pipe reader for IO.select monitoring
|
|
19
|
+
attr_reader :io
|
|
20
|
+
|
|
21
|
+
# @return [Object] the producer's monitor for instrumentation
|
|
22
|
+
attr_reader :monitor
|
|
23
|
+
|
|
24
|
+
# Creates a new state for a producer
|
|
25
|
+
# @param producer_id [String] unique producer ID
|
|
26
|
+
# @param client [Rdkafka::Producer] the rdkafka producer client
|
|
27
|
+
# @param monitor [Object] the producer's monitor for error reporting
|
|
28
|
+
# @param max_poll_time [Integer] max time in ms to poll per cycle
|
|
29
|
+
# @param periodic_poll_interval [Integer] max time in ms before this producer needs periodic poll
|
|
30
|
+
# @raise [StandardError] if queue pipe setup fails (FD mode requires this to work)
|
|
31
|
+
def initialize(producer_id, client, monitor, max_poll_time, periodic_poll_interval)
|
|
32
|
+
@producer_id = producer_id
|
|
33
|
+
@client = client
|
|
34
|
+
@monitor = monitor
|
|
35
|
+
@max_poll_time = max_poll_time
|
|
36
|
+
@periodic_poll_interval = periodic_poll_interval
|
|
37
|
+
# Initialize to 0 so first check always triggers (no nil handling needed)
|
|
38
|
+
@last_poll_time = 0
|
|
39
|
+
@last_stale_check = 0
|
|
40
|
+
@last_stale_result = false
|
|
41
|
+
|
|
42
|
+
# Closing flag - set by signal_close, checked by poller
|
|
43
|
+
@closing = false
|
|
44
|
+
|
|
45
|
+
# Latch for synchronizing close operations
|
|
46
|
+
@close_latch = Latch.new
|
|
47
|
+
|
|
48
|
+
# Queue pipe for all signaling (librdkafka events + continue + close)
|
|
49
|
+
# Reusing one pipe reduces FDs and IO.select overhead
|
|
50
|
+
@queue_pipe = QueuePipe.new(@client)
|
|
51
|
+
|
|
52
|
+
# Cache reader reference for hot path performance
|
|
53
|
+
@io = @queue_pipe.reader
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Drains the queue pipe
|
|
57
|
+
# Called before polling to clear any pending signals
|
|
58
|
+
def drain
|
|
59
|
+
@queue_pipe.drain
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Polls the producer's event queue
|
|
63
|
+
# @return [Boolean] true if no more events to process, false if stopped due to time limit
|
|
64
|
+
def poll
|
|
65
|
+
drained = true
|
|
66
|
+
deadline = monotonic_now + @max_poll_time
|
|
67
|
+
|
|
68
|
+
@client.events_poll_nb_each do |count|
|
|
69
|
+
if count.zero?
|
|
70
|
+
:stop
|
|
71
|
+
elsif monotonic_now >= deadline
|
|
72
|
+
drained = false
|
|
73
|
+
:stop
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
drained
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Checks if the producer's event queue is empty
|
|
81
|
+
# @return [Boolean] true if queue is empty
|
|
82
|
+
def queue_empty?
|
|
83
|
+
@client.queue_size.zero?
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Minimum interval between stale checks to avoid excessive overhead
|
|
87
|
+
STALE_CHECK_THROTTLE_MS = 100
|
|
88
|
+
|
|
89
|
+
private_constant :STALE_CHECK_THROTTLE_MS
|
|
90
|
+
|
|
91
|
+
# Marks this producer as having been polled
|
|
92
|
+
# Called after polling to track staleness
|
|
93
|
+
def mark_polled!
|
|
94
|
+
@last_poll_time = monotonic_now
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Checks if this producer needs a periodic poll
|
|
98
|
+
# Used to ensure OAuth/stats callbacks fire even when another producer is busy
|
|
99
|
+
# Includes internal throttling to avoid excessive checks
|
|
100
|
+
# @return [Boolean] true if the producer needs a periodic poll
|
|
101
|
+
def needs_periodic_poll?
|
|
102
|
+
now = monotonic_now
|
|
103
|
+
|
|
104
|
+
# Throttle: return cached result if checked recently
|
|
105
|
+
return @last_stale_result if (now - @last_stale_check) < STALE_CHECK_THROTTLE_MS
|
|
106
|
+
|
|
107
|
+
@last_stale_check = now
|
|
108
|
+
@last_stale_result = (now - @last_poll_time) >= @periodic_poll_interval
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Signals the poller to remove this producer
|
|
112
|
+
# Called from any thread when the producer is being closed
|
|
113
|
+
# Sets closing flag BEFORE signaling to ensure poller sees it
|
|
114
|
+
def signal_close
|
|
115
|
+
@closing = true
|
|
116
|
+
@queue_pipe.signal
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Signals that there's more work to do (hit time limit but queue not empty)
|
|
120
|
+
# This wakes up IO.select immediately instead of waiting for timeout
|
|
121
|
+
def signal_continue
|
|
122
|
+
@queue_pipe.signal
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# @return [Boolean] whether this producer is being closed
|
|
126
|
+
def closing?
|
|
127
|
+
@closing
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Closes all resources and signals any waiters
|
|
131
|
+
def close
|
|
132
|
+
return if closed?
|
|
133
|
+
|
|
134
|
+
@queue_pipe.close
|
|
135
|
+
@close_latch.release!
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Waits for this state to be closed
|
|
139
|
+
# Used by unregister to ensure synchronous cleanup before returning
|
|
140
|
+
# This matches the threaded polling behavior which drains without timeout
|
|
141
|
+
def wait_for_close
|
|
142
|
+
@close_latch.wait
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# @return [Boolean] whether this state has been closed
|
|
146
|
+
def closed?
|
|
147
|
+
@close_latch.released?
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# WaterDrop main module
|
|
4
|
+
module WaterDrop
|
|
5
|
+
# Namespace for FD-based polling components
|
|
6
|
+
# Contains the global Poller singleton and State class for managing producer polling
|
|
7
|
+
module Polling
|
|
8
|
+
class << self
|
|
9
|
+
# Configures the global FD poller settings
|
|
10
|
+
# @param block [Proc] Configuration block
|
|
11
|
+
# @yieldparam config [Karafka::Core::Configurable::Node] config node
|
|
12
|
+
# @example Configure before creating any producers
|
|
13
|
+
# WaterDrop::Polling.setup do |config|
|
|
14
|
+
# config.thread_priority = -1
|
|
15
|
+
# config.poll_timeout = 500
|
|
16
|
+
# end
|
|
17
|
+
def setup(&block)
|
|
18
|
+
Config.setup(&block)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -18,7 +18,7 @@ module WaterDrop
|
|
|
18
18
|
validate_message!(message)
|
|
19
19
|
|
|
20
20
|
@monitor.instrument(
|
|
21
|
-
|
|
21
|
+
"message.produced_async",
|
|
22
22
|
producer_id: id,
|
|
23
23
|
message: message
|
|
24
24
|
) { produce(message) }
|
|
@@ -29,11 +29,11 @@ module WaterDrop
|
|
|
29
29
|
raise Errors::ProduceError, e.inspect
|
|
30
30
|
rescue Errors::ProduceError => ex
|
|
31
31
|
@monitor.instrument(
|
|
32
|
-
|
|
32
|
+
"error.occurred",
|
|
33
33
|
producer_id: id,
|
|
34
34
|
message: message,
|
|
35
35
|
error: ex,
|
|
36
|
-
type:
|
|
36
|
+
type: "message.produce_async"
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
raise ex
|
|
@@ -56,7 +56,7 @@ module WaterDrop
|
|
|
56
56
|
messages.each { |message| validate_message!(message) }
|
|
57
57
|
|
|
58
58
|
@monitor.instrument(
|
|
59
|
-
|
|
59
|
+
"messages.produced_async",
|
|
60
60
|
producer_id: id,
|
|
61
61
|
messages: messages
|
|
62
62
|
) do
|
|
@@ -72,12 +72,12 @@ module WaterDrop
|
|
|
72
72
|
re_raised = Errors::ProduceManyError.new(dispatched, e.inspect)
|
|
73
73
|
|
|
74
74
|
@monitor.instrument(
|
|
75
|
-
|
|
75
|
+
"error.occurred",
|
|
76
76
|
producer_id: id,
|
|
77
77
|
messages: messages,
|
|
78
78
|
dispatched: dispatched,
|
|
79
79
|
error: re_raised,
|
|
80
|
-
type:
|
|
80
|
+
type: "messages.produce_many_async"
|
|
81
81
|
)
|
|
82
82
|
|
|
83
83
|
raise re_raised
|