quonfig 0.0.14 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +55 -11
- data/lib/quonfig/client.rb +398 -22
- data/lib/quonfig/datadir.rb +8 -3
- data/lib/quonfig/sse_config_client.rb +550 -93
- data/lib/quonfig/version.rb +1 -1
- data/lib/quonfig/worker_supervisor.rb +186 -0
- data/lib/quonfig.rb +2 -1
- data/quonfig.gemspec +0 -1
- metadata +3 -16
|
@@ -2,154 +2,611 @@
|
|
|
2
2
|
|
|
3
3
|
require 'base64'
|
|
4
4
|
require 'json'
|
|
5
|
+
require 'net/http'
|
|
6
|
+
require 'uri'
|
|
5
7
|
|
|
6
8
|
module Quonfig
|
|
9
|
+
# Event delivered to on_envelope. +id+ mirrors the SSE +id:+ field and is
|
|
10
|
+
# consumed by callers that want the server cursor (tests + last-event-id
|
|
11
|
+
# resume). +data+ is the raw +data:+ payload string. +envelope+ is the
|
|
12
|
+
# parsed Quonfig::ConfigEnvelope.
|
|
13
|
+
StreamEvent = Struct.new(:envelope, :id, :data)
|
|
14
|
+
|
|
15
|
+
# SSE client for real-time config delivery from api-delivery-sse.
|
|
16
|
+
#
|
|
17
|
+
# Owns its reconnect loop end-to-end. sdk-go, sdk-python, and sdk-node all
|
|
18
|
+
# reached the same conclusion: the wire format we consume (plain JSON
|
|
19
|
+
# envelopes in single-line +data:+ frames, no named events, no retry
|
|
20
|
+
# directives) is simple enough that an SDK-owned loop is clearer than a
|
|
21
|
+
# library wrapper, and the operator-facing reconnect counter becomes
|
|
22
|
+
# trivially correct because there is exactly one place that increments it
|
|
23
|
+
# (qfg-35sm; replaces the ld-eventsource integration from qfg-ie49 +
|
|
24
|
+
# qfg-cf52, which required log-line scraping and a raise-proof logger
|
|
25
|
+
# wrapper to observe reconnects through the upstream library).
|
|
7
26
|
class SSEConfigClient
|
|
8
27
|
class Options
|
|
9
|
-
attr_reader :sse_read_timeout, :
|
|
10
|
-
:
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
28
|
+
attr_reader :sse_read_timeout, :sse_connect_timeout,
|
|
29
|
+
:sse_initial_reconnect_delay, :sse_max_reconnect_delay
|
|
30
|
+
|
|
31
|
+
# sse_read_timeout: 90s = 3x the 30s server heartbeat. A silent socket
|
|
32
|
+
# stall trips within one missed-heartbeat window rather than the OS
|
|
33
|
+
# TCP idle (often hours).
|
|
34
|
+
#
|
|
35
|
+
# sse_initial_reconnect_delay / sse_max_reconnect_delay: backoff bounds.
|
|
36
|
+
# Each failed reconnect doubles the delay (with +/-50% jitter) up to the
|
|
37
|
+
# max. A successful event delivery resets the delay to the initial
|
|
38
|
+
# value — matches sdk-python's policy. A clean server-initiated FIN is
|
|
39
|
+
# treated as "not a failure for backoff purposes" because LBs recycling
|
|
40
|
+
# connections is normal; the reconnect counter still increments.
|
|
41
|
+
def initialize(sse_read_timeout: 90,
|
|
42
|
+
sse_connect_timeout: 10,
|
|
43
|
+
sse_initial_reconnect_delay: 1.0,
|
|
44
|
+
sse_max_reconnect_delay: 30.0)
|
|
18
45
|
@sse_read_timeout = sse_read_timeout
|
|
19
|
-
@
|
|
20
|
-
@
|
|
21
|
-
@
|
|
22
|
-
@errors_to_close_connection = errors_to_close_connection
|
|
46
|
+
@sse_connect_timeout = sse_connect_timeout
|
|
47
|
+
@sse_initial_reconnect_delay = sse_initial_reconnect_delay.to_f
|
|
48
|
+
@sse_max_reconnect_delay = sse_max_reconnect_delay.to_f
|
|
23
49
|
end
|
|
24
50
|
end
|
|
25
51
|
|
|
26
52
|
LOG = Quonfig::InternalLogger.new(self)
|
|
27
53
|
|
|
28
|
-
|
|
54
|
+
# qfg-i5xv: HTTP status codes the SDK classifies as terminal — these will
|
|
55
|
+
# not heal by retrying (bad key, revoked permission, missing endpoint).
|
|
56
|
+
# Anything else (5xx, 429, network errors) stays on the transient path.
|
|
57
|
+
TERMINAL_HTTP_CODES = [401, 403, 404].freeze
|
|
58
|
+
|
|
59
|
+
# +on_error+: optional callable invoked on every SSE error edge. Parent
|
|
60
|
+
# Quonfig::Client wires this to drive @sse_state -> :error so that
|
|
61
|
+
# +connection_state+ reflects the disconnect (qfg-47c2.27).
|
|
62
|
+
def initialize(prefab_options, config_loader, options = nil, logger = nil, on_error: nil)
|
|
29
63
|
@prefab_options = prefab_options
|
|
30
64
|
@options = options || Options.new
|
|
31
65
|
@config_loader = config_loader
|
|
32
|
-
@connected = false
|
|
33
66
|
@logger = logger || LOG
|
|
67
|
+
@on_error = on_error
|
|
68
|
+
|
|
69
|
+
@stopped = Concurrent::AtomicBoolean.new(false)
|
|
70
|
+
@restart_total = 0
|
|
71
|
+
@restart_mutex = Mutex.new
|
|
72
|
+
|
|
73
|
+
@on_envelope_error_total = 0
|
|
74
|
+
@on_envelope_error_mutex = Mutex.new
|
|
75
|
+
|
|
76
|
+
@conn_mutex = Mutex.new
|
|
77
|
+
@active_http = nil
|
|
78
|
+
|
|
79
|
+
@source_index = -1
|
|
80
|
+
@last_event_id = nil
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Layer 1 (SSE) reconnect counter. Bumped exactly once per reconnect
|
|
84
|
+
# attempt — never per error edge, never per envelope. Read by
|
|
85
|
+
# Quonfig::Client#worker_restart_total(layer: '1') and asserted by chaos
|
|
86
|
+
# scenario 09 (>= 5 after 5 proxy flaps in 30s).
|
|
87
|
+
def restart_total
|
|
88
|
+
@restart_mutex.synchronize { @restart_total }
|
|
34
89
|
end
|
|
35
90
|
|
|
91
|
+
# qfg-m3lk: count of user-supplied on_envelope callback invocations that
|
|
92
|
+
# raised. Surfaced for operator visibility — a non-zero value here with
|
|
93
|
+
# restart_total stable means a caller-side listener bug, not a transport
|
|
94
|
+
# problem. (Pre-fix, those raises propagated into run_loop's rescue and
|
|
95
|
+
# masqueraded as transport errors, causing reconnect storms.)
|
|
96
|
+
def on_envelope_error_total
|
|
97
|
+
@on_envelope_error_mutex.synchronize { @on_envelope_error_total }
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def start(&on_envelope)
|
|
101
|
+
return if @prefab_options.sse_api_urls.nil? || @prefab_options.sse_api_urls.empty?
|
|
102
|
+
|
|
103
|
+
@worker = Thread.new { run_loop(&on_envelope) }
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Shut down. Interrupts the in-flight stream by closing the underlying
|
|
107
|
+
# socket from this thread — the worker thread observes the resulting
|
|
108
|
+
# IOError, sees @stopped == true, and exits cleanly.
|
|
36
109
|
def close
|
|
37
|
-
@
|
|
38
|
-
@
|
|
110
|
+
@stopped.make_true
|
|
111
|
+
@conn_mutex.synchronize do
|
|
112
|
+
begin
|
|
113
|
+
@active_http&.finish
|
|
114
|
+
rescue StandardError
|
|
115
|
+
# already closed / never started — idempotent
|
|
116
|
+
end
|
|
117
|
+
@active_http = nil
|
|
118
|
+
end
|
|
119
|
+
@worker&.join(2)
|
|
120
|
+
@worker = nil
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Public so tests can assert the headers shape. Body of the request is
|
|
124
|
+
# always empty; this is the full set api-delivery-sse sees.
|
|
125
|
+
def headers
|
|
126
|
+
auth = "1:#{@prefab_options.sdk_key}"
|
|
127
|
+
auth_string = Base64.strict_encode64(auth)
|
|
128
|
+
h = {
|
|
129
|
+
'Authorization' => "Basic #{auth_string}",
|
|
130
|
+
'Accept' => 'text/event-stream',
|
|
131
|
+
'Cache-Control' => 'no-cache',
|
|
132
|
+
'X-Quonfig-SDK-Version' => "ruby-#{Quonfig::VERSION}"
|
|
133
|
+
}
|
|
134
|
+
cursor = current_cursor
|
|
135
|
+
h['Last-Event-Id'] = cursor if cursor
|
|
136
|
+
h
|
|
39
137
|
end
|
|
40
138
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
139
|
+
# Compute a Last-Event-ID for the next request. Three sources, in
|
|
140
|
+
# priority order:
|
|
141
|
+
# 1. @last_event_id -- set by the most recent event we processed
|
|
142
|
+
# 2. config_loader.version -- string ETag from last HTTP fetch
|
|
143
|
+
# 3. config_loader.highwater_mark -- legacy numeric cursor
|
|
144
|
+
# Returns nil if no prior state exists.
|
|
145
|
+
def current_cursor
|
|
146
|
+
return @last_event_id if @last_event_id && !@last_event_id.empty?
|
|
147
|
+
|
|
148
|
+
if @config_loader.respond_to?(:version)
|
|
149
|
+
v = @config_loader.version
|
|
150
|
+
return v if v.is_a?(String) && !v.empty?
|
|
45
151
|
end
|
|
46
152
|
|
|
47
|
-
@
|
|
153
|
+
if @config_loader.respond_to?(:highwater_mark)
|
|
154
|
+
hw = @config_loader.highwater_mark
|
|
155
|
+
return hw.to_s if hw.is_a?(Numeric) && hw.positive?
|
|
156
|
+
return hw if hw.is_a?(String) && !hw.empty?
|
|
157
|
+
end
|
|
48
158
|
|
|
49
|
-
|
|
159
|
+
nil
|
|
160
|
+
end
|
|
50
161
|
|
|
51
|
-
|
|
52
|
-
loop do
|
|
53
|
-
sleep @options.sleep_delay_for_new_connection_check
|
|
162
|
+
private
|
|
54
163
|
|
|
55
|
-
|
|
164
|
+
# Long-lived reconnect loop. One iteration = one connect attempt. Bumps
|
|
165
|
+
# restart_total *before* every retry — so the counter answers "how many
|
|
166
|
+
# times have we reconnected after a drop" rather than "how many connect
|
|
167
|
+
# attempts have occurred." The first attempt is not a restart.
|
|
168
|
+
#
|
|
169
|
+
# qfg-tj18: the body is wrapped in
|
|
170
|
+
# +Thread.handle_interrupt(SSEReadDeadlineExceeded => :on_blocking)+ so a
|
|
171
|
+
# watchdog raise that's already been queued (the watchdog's mutex covers
|
|
172
|
+
# the *decision* to fire but cannot un-queue a delivered raise) lands
|
|
173
|
+
# only at a blocking-IO checkpoint. Inside stream_once we explicitly
|
|
174
|
+
# re-enable +:immediate+ around the +read_body+ block where we *do*
|
|
175
|
+
# want the raise to wake the read. A per-iteration paranoid rescue
|
|
176
|
+
# catches any late-landing raise that escapes the inner +rescue
|
|
177
|
+
# StandardError+ (e.g. lands inside +interruptible_sleep+ between
|
|
178
|
+
# iterations) so the worker thread never silently dies.
|
|
179
|
+
def run_loop(&on_envelope)
|
|
180
|
+
Thread.handle_interrupt(SSEReadDeadlineExceeded => :on_blocking) do
|
|
181
|
+
delay = @options.sse_initial_reconnect_delay
|
|
182
|
+
first_attempt = true
|
|
56
183
|
|
|
57
|
-
|
|
184
|
+
until @stopped.value
|
|
185
|
+
begin
|
|
186
|
+
unless first_attempt
|
|
187
|
+
increment_restart!
|
|
188
|
+
interruptible_sleep(jittered(delay))
|
|
189
|
+
break if @stopped.value
|
|
190
|
+
end
|
|
191
|
+
first_attempt = false
|
|
58
192
|
|
|
59
|
-
|
|
193
|
+
connected_at_least_once = false
|
|
194
|
+
begin
|
|
195
|
+
stream_once do |event|
|
|
196
|
+
connected_at_least_once = true
|
|
197
|
+
# Persist the most recent id so the next reconnect resumes
|
|
198
|
+
# from there via Last-Event-Id. Updated *before* the user
|
|
199
|
+
# callback runs so a raising listener still advances the
|
|
200
|
+
# cursor — the event was delivered to us, the bug is on the
|
|
201
|
+
# caller side.
|
|
202
|
+
@last_event_id = event.id if event.id
|
|
203
|
+
# qfg-m3lk: callback exceptions are isolated. A buggy
|
|
204
|
+
# listener must not look like a transport error and trigger
|
|
205
|
+
# a reconnect.
|
|
206
|
+
invoke_on_envelope_safely(on_envelope, event)
|
|
207
|
+
# A connection healthy enough to deliver a real envelope
|
|
208
|
+
# earns a reset of the backoff. Sustained outages never
|
|
209
|
+
# reach this branch (no event ever delivered) so the
|
|
210
|
+
# exponential growth still holds.
|
|
211
|
+
delay = @options.sse_initial_reconnect_delay
|
|
212
|
+
end
|
|
213
|
+
rescue StandardError => e
|
|
214
|
+
handle_error(e) unless @stopped.value
|
|
215
|
+
end
|
|
60
216
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
217
|
+
# Backoff only grows on failed connect attempts. A server-
|
|
218
|
+
# initiated clean FIN after a healthy session (normal LB
|
|
219
|
+
# recycling) reuses the same delay — punishing it would make
|
|
220
|
+
# us look broken under benign rolling restarts. Matches
|
|
221
|
+
# sdk-go's `connectedOK` distinction.
|
|
222
|
+
delay = [delay * 2, @options.sse_max_reconnect_delay].min unless connected_at_least_once
|
|
223
|
+
rescue SSEReadDeadlineExceeded => e
|
|
224
|
+
# Paranoid backstop (qfg-tj18). A watchdog raise that landed
|
|
225
|
+
# outside +stream_once+ — typically in +interruptible_sleep+
|
|
226
|
+
# — must not kill the worker thread. We log loudly and let the
|
|
227
|
+
# +until+ loop carry on.
|
|
228
|
+
@logger.error "SSE watchdog late-raise contained: #{e.inspect}; resuming loop"
|
|
229
|
+
end
|
|
64
230
|
end
|
|
65
231
|
end
|
|
232
|
+
ensure
|
|
233
|
+
register_active(nil)
|
|
66
234
|
end
|
|
67
235
|
|
|
68
|
-
|
|
69
|
-
|
|
236
|
+
# Opens one SSE request and yields each parsed event until the stream
|
|
237
|
+
# ends (clean FIN, error, or stop). Raises on transport errors so the
|
|
238
|
+
# caller can apply backoff. Clean FIN returns without raising.
|
|
239
|
+
#
|
|
240
|
+
# A watchdog thread closes the socket if no bytes arrive within
|
|
241
|
+
# +sse_read_timeout+. Net::HTTP#read_timeout is NOT reliable for the
|
|
242
|
+
# streaming +read_body do |chunk|+ form — the underlying BufferedIO
|
|
243
|
+
# reads bypass it in practice (a silent server stall blocks indefinitely
|
|
244
|
+
# against a configured deadline). sdk-go and sdk-node hit the same
|
|
245
|
+
# gotcha and solve it the same way: per-chunk reset, async close on
|
|
246
|
+
# expiry (chaos scenario 02 — sse_silent_stall).
|
|
247
|
+
def stream_once(&block)
|
|
248
|
+
url = "#{current_url}/api/v2/sse/config"
|
|
70
249
|
cursor = current_cursor
|
|
71
250
|
@logger.debug "SSE Streaming Connect to #{url} start_at #{cursor.inspect}"
|
|
72
251
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
252
|
+
uri = URI(url)
|
|
253
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
254
|
+
http.use_ssl = (uri.scheme == 'https')
|
|
255
|
+
http.open_timeout = @options.sse_connect_timeout
|
|
256
|
+
# Keep Net::HTTP's read_timeout as a backstop for the header read
|
|
257
|
+
# (where it does apply reliably). The watchdog covers the body path.
|
|
258
|
+
http.read_timeout = @options.sse_read_timeout
|
|
259
|
+
|
|
260
|
+
req = Net::HTTP::Get.new(uri.request_uri, headers)
|
|
261
|
+
|
|
262
|
+
http.start
|
|
263
|
+
register_active(http)
|
|
264
|
+
|
|
265
|
+
watchdog = ReadDeadlineWatchdog.new(
|
|
266
|
+
worker: Thread.current, deadline_s: @options.sse_read_timeout,
|
|
267
|
+
stopped: @stopped, logger: @logger
|
|
268
|
+
)
|
|
269
|
+
watchdog.start
|
|
270
|
+
|
|
271
|
+
begin
|
|
272
|
+
http.request(req) do |resp|
|
|
273
|
+
code = resp.code.to_i
|
|
274
|
+
if TERMINAL_HTTP_CODES.include?(code)
|
|
275
|
+
# qfg-i5xv: 401/403/404 will not heal by retrying — bad key,
|
|
276
|
+
# revoked permission, or wrong endpoint. Mark stopped *before*
|
|
277
|
+
# invoking on_error so the loop's terminal-error branch is
|
|
278
|
+
# already locked in if the parent callback inspects state, and
|
|
279
|
+
# so the inner rescue's `handle_error(e) unless @stopped.value`
|
|
280
|
+
# guard suppresses a second on_error edge.
|
|
281
|
+
err = SSEHTTPTerminalError.new(code)
|
|
282
|
+
@logger.error "SSE Streaming Terminal Error: HTTP #{code} for url #{url}; will not retry"
|
|
283
|
+
@stopped.make_true
|
|
284
|
+
invoke_on_error(err)
|
|
285
|
+
raise err
|
|
286
|
+
end
|
|
287
|
+
if code != 200
|
|
288
|
+
err = SSEHTTPStatusError.new(code)
|
|
289
|
+
@logger.error "SSE Streaming Error: HTTP #{code} for url #{url}"
|
|
290
|
+
invoke_on_error(err)
|
|
291
|
+
raise err
|
|
84
292
|
end
|
|
85
293
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
294
|
+
parser = EventParser.new
|
|
295
|
+
# qfg-tj18: run_loop wraps the body in +:on_blocking+ which
|
|
296
|
+
# *would* still deliver during read_body (read_body is a
|
|
297
|
+
# blocking IO call), but be explicit: we want the watchdog raise
|
|
298
|
+
# to land here without ambiguity.
|
|
299
|
+
Thread.handle_interrupt(SSEReadDeadlineExceeded => :immediate) do
|
|
300
|
+
resp.read_body do |chunk|
|
|
301
|
+
watchdog.reset!
|
|
302
|
+
break if @stopped.value
|
|
303
|
+
|
|
304
|
+
parser.feed(chunk, &block)
|
|
305
|
+
end
|
|
92
306
|
end
|
|
307
|
+
# read_body returned cleanly — either a server-initiated FIN, or
|
|
308
|
+
# the watchdog closed the socket on a silent stall. Either way,
|
|
309
|
+
# the outer loop will reconnect and bump restart_total on the
|
|
310
|
+
# next iteration.
|
|
311
|
+
@logger.debug "SSE stream ended for url #{url}"
|
|
312
|
+
end
|
|
313
|
+
ensure
|
|
314
|
+
watchdog.stop
|
|
315
|
+
register_active(nil)
|
|
316
|
+
begin
|
|
317
|
+
http.finish if http.started?
|
|
318
|
+
rescue StandardError
|
|
319
|
+
# already closed
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
end
|
|
93
323
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
324
|
+
# Track the active connection so close() can interrupt a blocked
|
|
325
|
+
# read_body from another thread. Guarded by @conn_mutex.
|
|
326
|
+
def register_active(http)
|
|
327
|
+
@conn_mutex.synchronize { @active_http = http }
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def increment_restart!
|
|
331
|
+
@restart_mutex.synchronize { @restart_total += 1 }
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
def handle_error(error)
|
|
335
|
+
@logger.error "SSE Streaming Error: #{error.inspect}"
|
|
336
|
+
invoke_on_error(error)
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
# qfg-m3lk: rescue StandardError (NOT Exception) so SystemExit /
|
|
340
|
+
# Interrupt / SignalException still escape — Ctrl-C inside a customer
|
|
341
|
+
# callback must still kill the process. StandardError is the right
|
|
342
|
+
# boundary for "the caller's listener has a bug".
|
|
343
|
+
def invoke_on_envelope_safely(on_envelope, event)
|
|
344
|
+
on_envelope.call(event.envelope, event, :sse)
|
|
345
|
+
rescue StandardError => e
|
|
346
|
+
@on_envelope_error_mutex.synchronize { @on_envelope_error_total += 1 }
|
|
347
|
+
bt = (e.backtrace || []).first(5).join("\n ")
|
|
348
|
+
@logger.error "SSE on_envelope callback raised: #{e.class}: #{e.message}\n #{bt}"
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def invoke_on_error(error)
|
|
352
|
+
return unless @on_error
|
|
353
|
+
|
|
354
|
+
begin
|
|
355
|
+
@on_error.call(error)
|
|
356
|
+
rescue StandardError => e
|
|
357
|
+
@logger.error "SSE on_error callback raised: #{e.inspect}"
|
|
358
|
+
end
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
# +/-50% jitter — caps thundering-herd amplitude after a partition heal.
|
|
362
|
+
# Identical shape to ld-eventsource's Backoff#next_interval (and
|
|
363
|
+
# sdk-go's runLoop jitter) so we don't surprise operators familiar with
|
|
364
|
+
# those.
|
|
365
|
+
def jittered(delay)
|
|
366
|
+
(delay / 2) + rand(delay / 2.0)
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
# Sleep with interrupt: chunks the sleep so close() during a long
|
|
370
|
+
# backoff doesn't block shutdown for tens of seconds.
|
|
371
|
+
def interruptible_sleep(seconds)
|
|
372
|
+
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + seconds
|
|
373
|
+
until @stopped.value
|
|
374
|
+
remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
375
|
+
break if remaining <= 0
|
|
376
|
+
|
|
377
|
+
sleep([remaining, 0.1].min)
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
# Rotate through configured SSE URLs. The same rotation rule the
|
|
382
|
+
# previous implementation used, preserved so multi-region failover
|
|
383
|
+
# behavior is unchanged.
|
|
384
|
+
def current_url
|
|
385
|
+
urls = @prefab_options.sse_api_urls
|
|
386
|
+
@source_index = (@source_index + 1) % urls.size
|
|
387
|
+
urls[@source_index]
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
# Internal: HTTP-status sentinel error for non-200 SSE responses. Surfaces
|
|
391
|
+
# the status code through #message so parent on_error callbacks can log
|
|
392
|
+
# meaningfully without depending on ld-eventsource's error hierarchy.
|
|
393
|
+
class SSEHTTPStatusError < StandardError
|
|
394
|
+
attr_reader :status_code
|
|
395
|
+
|
|
396
|
+
def initialize(status_code)
|
|
397
|
+
@status_code = status_code
|
|
398
|
+
super("HTTP #{status_code}")
|
|
399
|
+
end
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# qfg-i5xv: terminal HTTP failures the SDK will not retry. 401 = bad key,
|
|
403
|
+
# 403 = revoked workspace permission, 404 = wrong endpoint / missing
|
|
404
|
+
# workspace. A subclass of SSEHTTPStatusError so existing on_error
|
|
405
|
+
# callbacks that only check `is_a?(SSEHTTPStatusError)` keep working,
|
|
406
|
+
# while customers that want to distinguish (alerting, OpenFeature
|
|
407
|
+
# provider error events) can dispatch on the subclass.
|
|
408
|
+
class SSEHTTPTerminalError < SSEHTTPStatusError; end
|
|
409
|
+
|
|
410
|
+
# Raised by the watchdog into the worker thread when the per-chunk
|
|
411
|
+
# read deadline elapses. Caught by run_loop's rescue, indistinguishable
|
|
412
|
+
# from any other transport error for backoff/restart purposes.
|
|
413
|
+
class SSEReadDeadlineExceeded < StandardError; end
|
|
414
|
+
|
|
415
|
+
# Background watchdog that interrupts the worker thread if no chunk
|
|
416
|
+
# arrives within +deadline_s+ seconds. Uses Thread#raise — the only
|
|
417
|
+
# reliable cross-platform way to unblock a Ruby thread blocked in
|
|
418
|
+
# +Net::HTTP+'s body-read on macOS. (Closing or shutting down the
|
|
419
|
+
# underlying socket from another thread does NOT wake the reader on
|
|
420
|
+
# macOS; the kernel discards future reads but the in-flight syscall
|
|
421
|
+
# stays blocked until something else trips. sdk-go and sdk-node solve
|
|
422
|
+
# the equivalent problem with context cancellation / AbortController,
|
|
423
|
+
# which Ruby lacks at the IO layer.) Thread#raise is essentially what
|
|
424
|
+
# +Timeout.timeout+ does internally; using it directly avoids
|
|
425
|
+
# Timeout.timeout's sketch reputation around ensure blocks.
|
|
426
|
+
class ReadDeadlineWatchdog
|
|
427
|
+
POLL_INTERVAL = 0.25
|
|
428
|
+
|
|
429
|
+
def initialize(worker:, deadline_s:, stopped:, logger:)
|
|
430
|
+
@worker = worker
|
|
431
|
+
@deadline_s = deadline_s
|
|
432
|
+
@stopped = stopped
|
|
433
|
+
@logger = logger
|
|
434
|
+
@active = true
|
|
435
|
+
# Mutex covers @active AND the decision to fire Thread#raise. stop()
|
|
436
|
+
# holds the mutex when flipping @active false, so a +stop+ that
|
|
437
|
+
# arrives mid-deadline-check cannot lose the race against the
|
|
438
|
+
# watchdog's @worker.raise call (which would inject a spurious
|
|
439
|
+
# SSEReadDeadlineExceeded into the worker thread right after a
|
|
440
|
+
# clean read_body return).
|
|
441
|
+
@mutex = Mutex.new
|
|
442
|
+
@last_read_at = Concurrent::AtomicReference.new(Process.clock_gettime(Process::CLOCK_MONOTONIC))
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
def start
|
|
446
|
+
@thread = Thread.new { watch }
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
def reset!
|
|
450
|
+
@last_read_at.set(Process.clock_gettime(Process::CLOCK_MONOTONIC))
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
def stop
|
|
454
|
+
@mutex.synchronize { @active = false }
|
|
455
|
+
@thread&.join(1)
|
|
456
|
+
@thread = nil
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
private
|
|
460
|
+
|
|
461
|
+
def watch
|
|
462
|
+
loop do
|
|
463
|
+
sleep POLL_INTERVAL
|
|
464
|
+
break unless @mutex.synchronize { @active } && !@stopped.value
|
|
465
|
+
|
|
466
|
+
idle = Process.clock_gettime(Process::CLOCK_MONOTONIC) - @last_read_at.value
|
|
467
|
+
next if idle < @deadline_s
|
|
468
|
+
|
|
469
|
+
fired = @mutex.synchronize do
|
|
470
|
+
next false unless @active && !@stopped.value
|
|
471
|
+
|
|
472
|
+
@logger.debug "SSE read deadline exceeded (#{idle.round(1)}s idle >= #{@deadline_s}s); interrupting worker"
|
|
473
|
+
@worker.raise(SSEReadDeadlineExceeded.new("SSE read deadline #{@deadline_s}s exceeded"))
|
|
474
|
+
true
|
|
475
|
+
end
|
|
476
|
+
break if fired
|
|
99
477
|
end
|
|
478
|
+
rescue StandardError => e
|
|
479
|
+
# Watchdog must never crash the SDK. Worst case we silently fall
|
|
480
|
+
# back to Net::HTTP's own (unreliable) read_timeout.
|
|
481
|
+
@logger.debug "SSE watchdog error: #{e.inspect}"
|
|
482
|
+
end
|
|
483
|
+
end
|
|
100
484
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
485
|
+
# Streaming SSE parser. Accepts byte chunks (any encoding), yields one
|
|
486
|
+
# Quonfig::StreamEvent per complete event. Tolerates:
|
|
487
|
+
# - chunks that split a UTF-8 multi-byte character (buffer in 8-bit,
|
|
488
|
+
# transcode whole lines)
|
|
489
|
+
# - chunks that split a line mid-way
|
|
490
|
+
# - any of CR / LF / CRLF as line terminators
|
|
491
|
+
# - +data:+, +data: + (optional space per SSE spec)
|
|
492
|
+
# - +:comment+ lines (keepalives — ignored)
|
|
493
|
+
# - multi-line +data:+ (concatenated with +\n+, per spec)
|
|
494
|
+
# Ignores +event:+ and +retry:+ — api-delivery does not emit them and the
|
|
495
|
+
# Quonfig wire contract does not honor reconnect-time directives.
|
|
496
|
+
# Malformed +data:+ JSON is logged and skipped; one bad event does not
|
|
497
|
+
# tear down the stream.
|
|
498
|
+
class EventParser
|
|
499
|
+
def initialize(logger: nil)
|
|
500
|
+
@logger = logger
|
|
501
|
+
@reader = LineReader.new
|
|
502
|
+
@data = +''
|
|
503
|
+
@have_data = false
|
|
504
|
+
@id = nil
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
def feed(chunk)
|
|
508
|
+
@reader.feed(chunk) do |line|
|
|
509
|
+
if line.empty?
|
|
510
|
+
event = flush
|
|
511
|
+
yield event if event
|
|
512
|
+
elsif line.start_with?(':')
|
|
513
|
+
# comment / keepalive — ignore
|
|
105
514
|
else
|
|
106
|
-
|
|
515
|
+
process_field(line)
|
|
107
516
|
end
|
|
517
|
+
end
|
|
518
|
+
end
|
|
108
519
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
520
|
+
private
|
|
521
|
+
|
|
522
|
+
def process_field(line)
|
|
523
|
+
idx = line.index(':')
|
|
524
|
+
return unless idx
|
|
525
|
+
|
|
526
|
+
name = line[0...idx]
|
|
527
|
+
rest = line[(idx + 1)..]
|
|
528
|
+
rest = rest[1..] if rest.start_with?(' ')
|
|
529
|
+
|
|
530
|
+
case name
|
|
531
|
+
when 'data'
|
|
532
|
+
if @have_data
|
|
533
|
+
@data << "\n" << rest
|
|
534
|
+
else
|
|
535
|
+
@data = rest
|
|
536
|
+
@have_data = true
|
|
112
537
|
end
|
|
538
|
+
when 'id'
|
|
539
|
+
@id = rest unless rest.include?("\x00")
|
|
540
|
+
# event: / retry: are intentionally ignored
|
|
113
541
|
end
|
|
114
542
|
end
|
|
115
|
-
end
|
|
116
543
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
auth_string = Base64.strict_encode64(auth)
|
|
120
|
-
{
|
|
121
|
-
'Authorization' => "Basic #{auth_string}",
|
|
122
|
-
'Accept' => 'text/event-stream',
|
|
123
|
-
'X-Quonfig-SDK-Version' => "ruby-#{Quonfig::VERSION}"
|
|
124
|
-
}
|
|
125
|
-
end
|
|
544
|
+
def flush
|
|
545
|
+
return nil unless @have_data
|
|
126
546
|
|
|
127
|
-
|
|
128
|
-
|
|
547
|
+
data = @data
|
|
548
|
+
id = @id
|
|
549
|
+
@data = +''
|
|
550
|
+
@have_data = false
|
|
551
|
+
# NB: @id persists across events — the SSE spec says last-event-id
|
|
552
|
+
# is sticky until overwritten. Matches ld-eventsource.
|
|
129
553
|
|
|
130
|
-
|
|
554
|
+
begin
|
|
555
|
+
parsed = JSON.parse(data)
|
|
556
|
+
rescue JSON::ParserError => e
|
|
557
|
+
(@logger || LOG).error "SSE Streaming Error: malformed JSON: #{e.message}"
|
|
558
|
+
return nil
|
|
559
|
+
end
|
|
131
560
|
|
|
132
|
-
|
|
561
|
+
envelope = Quonfig::ConfigEnvelope.new(
|
|
562
|
+
configs: parsed['configs'] || [],
|
|
563
|
+
meta: parsed['meta'] || {}
|
|
564
|
+
)
|
|
565
|
+
StreamEvent.new(envelope, id, data)
|
|
566
|
+
end
|
|
133
567
|
end
|
|
134
568
|
|
|
135
|
-
#
|
|
136
|
-
#
|
|
137
|
-
#
|
|
138
|
-
#
|
|
139
|
-
#
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
569
|
+
# Byte-level line reader. Accepts arbitrary chunks, yields one UTF-8
|
|
570
|
+
# line per call to the block. Terminator-stripped (CR / LF / CRLF
|
|
571
|
+
# supported). Modeled on ld-eventsource's BufferedLineReader — same
|
|
572
|
+
# invariants: split bytes-not-chars while scanning, force-encode to
|
|
573
|
+
# UTF-8 only once a complete line is sliced out, so a multi-byte
|
|
574
|
+
# character spanning two chunks does not raise Encoding::CompatibilityError.
|
|
575
|
+
class LineReader
|
|
576
|
+
def initialize
|
|
577
|
+
@buffer = +''.b
|
|
578
|
+
@last_was_cr = false
|
|
144
579
|
end
|
|
145
580
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
581
|
+
def feed(chunk)
|
|
582
|
+
@buffer << chunk.b
|
|
583
|
+
loop do
|
|
584
|
+
idx = @buffer.index(/[\r\n]/)
|
|
585
|
+
break if idx.nil?
|
|
151
586
|
|
|
152
|
-
|
|
587
|
+
ch = @buffer[idx]
|
|
588
|
+
if idx.zero? && ch == "\n" && @last_was_cr
|
|
589
|
+
# Dangling LF of a CRLF pair split across chunks — consume and skip.
|
|
590
|
+
@last_was_cr = false
|
|
591
|
+
@buffer.slice!(0, 1)
|
|
592
|
+
next
|
|
593
|
+
end
|
|
594
|
+
|
|
595
|
+
line = @buffer[0, idx].force_encoding('UTF-8')
|
|
596
|
+
consume = idx + 1
|
|
597
|
+
@last_was_cr = false
|
|
598
|
+
if ch == "\r"
|
|
599
|
+
if consume == @buffer.bytesize
|
|
600
|
+
# CR at end of buffer — could be CRLF split across feeds.
|
|
601
|
+
@last_was_cr = true
|
|
602
|
+
elsif @buffer[consume] == "\n"
|
|
603
|
+
consume += 1
|
|
604
|
+
end
|
|
605
|
+
end
|
|
606
|
+
@buffer.slice!(0, consume)
|
|
607
|
+
yield line
|
|
608
|
+
end
|
|
609
|
+
end
|
|
153
610
|
end
|
|
154
611
|
end
|
|
155
612
|
end
|