quonfig 0.0.14 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,154 +2,611 @@
2
2
 
3
3
  require 'base64'
4
4
  require 'json'
5
+ require 'net/http'
6
+ require 'uri'
5
7
 
6
8
  module Quonfig
9
+ # Event delivered to on_envelope. +id+ mirrors the SSE +id:+ field and is
10
+ # consumed by callers that want the server cursor (tests + last-event-id
11
+ # resume). +data+ is the raw +data:+ payload string. +envelope+ is the
12
+ # parsed Quonfig::ConfigEnvelope.
13
+ StreamEvent = Struct.new(:envelope, :id, :data)
14
+
15
+ # SSE client for real-time config delivery from api-delivery-sse.
16
+ #
17
+ # Owns its reconnect loop end-to-end. sdk-go, sdk-python, and sdk-node all
18
+ # reached the same conclusion: the wire format we consume (plain JSON
19
+ # envelopes in single-line +data:+ frames, no named events, no retry
20
+ # directives) is simple enough that an SDK-owned loop is clearer than a
21
+ # library wrapper, and the operator-facing reconnect counter becomes
22
+ # trivially correct because there is exactly one place that increments it
23
+ # (qfg-35sm; replaces the ld-eventsource integration from qfg-ie49 +
24
+ # qfg-cf52, which required log-line scraping and a raise-proof logger
25
+ # wrapper to observe reconnects through the upstream library).
7
26
  class SSEConfigClient
8
27
  class Options
9
- attr_reader :sse_read_timeout, :seconds_between_new_connection,
10
- :sse_default_reconnect_time, :sleep_delay_for_new_connection_check,
11
- :errors_to_close_connection
12
-
13
- def initialize(sse_read_timeout: 300,
14
- seconds_between_new_connection: 5,
15
- sleep_delay_for_new_connection_check: 1,
16
- sse_default_reconnect_time: SSE::Client::DEFAULT_RECONNECT_TIME,
17
- errors_to_close_connection: [HTTP::ConnectionError])
28
+ attr_reader :sse_read_timeout, :sse_connect_timeout,
29
+ :sse_initial_reconnect_delay, :sse_max_reconnect_delay
30
+
31
+ # sse_read_timeout: 90s = 3x the 30s server heartbeat. A silent socket
32
+ # stall trips within one missed-heartbeat window rather than the OS
33
+ # TCP idle (often hours).
34
+ #
35
+ # sse_initial_reconnect_delay / sse_max_reconnect_delay: backoff bounds.
36
+ # Each failed reconnect doubles the delay (with +/-50% jitter) up to the
37
+ # max. A successful event delivery resets the delay to the initial
38
+ # value — matches sdk-python's policy. A clean server-initiated FIN is
39
+ # treated as "not a failure for backoff purposes" because LBs recycling
40
+ # connections is normal; the reconnect counter still increments.
41
+ def initialize(sse_read_timeout: 90,
42
+ sse_connect_timeout: 10,
43
+ sse_initial_reconnect_delay: 1.0,
44
+ sse_max_reconnect_delay: 30.0)
18
45
  @sse_read_timeout = sse_read_timeout
19
- @seconds_between_new_connection = seconds_between_new_connection
20
- @sse_default_reconnect_time = sse_default_reconnect_time
21
- @sleep_delay_for_new_connection_check = sleep_delay_for_new_connection_check
22
- @errors_to_close_connection = errors_to_close_connection
46
+ @sse_connect_timeout = sse_connect_timeout
47
+ @sse_initial_reconnect_delay = sse_initial_reconnect_delay.to_f
48
+ @sse_max_reconnect_delay = sse_max_reconnect_delay.to_f
23
49
  end
24
50
  end
25
51
 
26
52
  LOG = Quonfig::InternalLogger.new(self)
27
53
 
28
- def initialize(prefab_options, config_loader, options = nil, logger = nil)
54
+ # qfg-i5xv: HTTP status codes the SDK classifies as terminal — these will
55
+ # not heal by retrying (bad key, revoked permission, missing endpoint).
56
+ # Anything else (5xx, 429, network errors) stays on the transient path.
57
+ TERMINAL_HTTP_CODES = [401, 403, 404].freeze
58
+
59
+ # +on_error+: optional callable invoked on every SSE error edge. Parent
60
+ # Quonfig::Client wires this to drive @sse_state -> :error so that
61
+ # +connection_state+ reflects the disconnect (qfg-47c2.27).
62
+ def initialize(prefab_options, config_loader, options = nil, logger = nil, on_error: nil)
29
63
  @prefab_options = prefab_options
30
64
  @options = options || Options.new
31
65
  @config_loader = config_loader
32
- @connected = false
33
66
  @logger = logger || LOG
67
+ @on_error = on_error
68
+
69
+ @stopped = Concurrent::AtomicBoolean.new(false)
70
+ @restart_total = 0
71
+ @restart_mutex = Mutex.new
72
+
73
+ @on_envelope_error_total = 0
74
+ @on_envelope_error_mutex = Mutex.new
75
+
76
+ @conn_mutex = Mutex.new
77
+ @active_http = nil
78
+
79
+ @source_index = -1
80
+ @last_event_id = nil
81
+ end
82
+
83
+ # Layer 1 (SSE) reconnect counter. Bumped exactly once per reconnect
84
+ # attempt — never per error edge, never per envelope. Read by
85
+ # Quonfig::Client#worker_restart_total(layer: '1') and asserted by chaos
86
+ # scenario 09 (>= 5 after 5 proxy flaps in 30s).
87
+ def restart_total
88
+ @restart_mutex.synchronize { @restart_total }
34
89
  end
35
90
 
91
+ # qfg-m3lk: count of user-supplied on_envelope callback invocations that
92
+ # raised. Surfaced for operator visibility — a non-zero value here with
93
+ # restart_total stable means a caller-side listener bug, not a transport
94
+ # problem. (Pre-fix, those raises propagated into run_loop's rescue and
95
+ # masqueraded as transport errors, causing reconnect storms.)
96
+ def on_envelope_error_total
97
+ @on_envelope_error_mutex.synchronize { @on_envelope_error_total }
98
+ end
99
+
100
+ def start(&on_envelope)
101
+ return if @prefab_options.sse_api_urls.nil? || @prefab_options.sse_api_urls.empty?
102
+
103
+ @worker = Thread.new { run_loop(&on_envelope) }
104
+ end
105
+
106
+ # Shut down. Interrupts the in-flight stream by closing the underlying
107
+ # socket from this thread — the worker thread observes the resulting
108
+ # IOError, sees @stopped == true, and exits cleanly.
36
109
  def close
37
- @retry_thread&.kill
38
- @client&.close
110
+ @stopped.make_true
111
+ @conn_mutex.synchronize do
112
+ begin
113
+ @active_http&.finish
114
+ rescue StandardError
115
+ # already closed / never started — idempotent
116
+ end
117
+ @active_http = nil
118
+ end
119
+ @worker&.join(2)
120
+ @worker = nil
121
+ end
122
+
123
+ # Public so tests can assert the headers shape. Body of the request is
124
+ # always empty; this is the full set api-delivery-sse sees.
125
+ def headers
126
+ auth = "1:#{@prefab_options.sdk_key}"
127
+ auth_string = Base64.strict_encode64(auth)
128
+ h = {
129
+ 'Authorization' => "Basic #{auth_string}",
130
+ 'Accept' => 'text/event-stream',
131
+ 'Cache-Control' => 'no-cache',
132
+ 'X-Quonfig-SDK-Version' => "ruby-#{Quonfig::VERSION}"
133
+ }
134
+ cursor = current_cursor
135
+ h['Last-Event-Id'] = cursor if cursor
136
+ h
39
137
  end
40
138
 
41
- def start(&load_configs)
42
- if @prefab_options.sse_api_urls.empty?
43
- @logger.debug 'No SSE api_urls configured'
44
- return
139
+ # Compute a Last-Event-ID for the next request. Three sources, in
140
+ # priority order:
141
+ # 1. @last_event_id -- set by the most recent event we processed
142
+ # 2. config_loader.version -- string ETag from last HTTP fetch
143
+ # 3. config_loader.highwater_mark -- legacy numeric cursor
144
+ # Returns nil if no prior state exists.
145
+ def current_cursor
146
+ return @last_event_id if @last_event_id && !@last_event_id.empty?
147
+
148
+ if @config_loader.respond_to?(:version)
149
+ v = @config_loader.version
150
+ return v if v.is_a?(String) && !v.empty?
45
151
  end
46
152
 
47
- @client = connect(&load_configs)
153
+ if @config_loader.respond_to?(:highwater_mark)
154
+ hw = @config_loader.highwater_mark
155
+ return hw.to_s if hw.is_a?(Numeric) && hw.positive?
156
+ return hw if hw.is_a?(String) && !hw.empty?
157
+ end
48
158
 
49
- closed_count = 0
159
+ nil
160
+ end
50
161
 
51
- @retry_thread = Thread.new do
52
- loop do
53
- sleep @options.sleep_delay_for_new_connection_check
162
+ private
54
163
 
55
- next unless @client.closed?
164
+ # Long-lived reconnect loop. One iteration = one connect attempt. Bumps
165
+ # restart_total *before* every retry — so the counter answers "how many
166
+ # times have we reconnected after a drop" rather than "how many connect
167
+ # attempts have occurred." The first attempt is not a restart.
168
+ #
169
+ # qfg-tj18: the body is wrapped in
170
+ # +Thread.handle_interrupt(SSEReadDeadlineExceeded => :on_blocking)+ so a
171
+ # watchdog raise that's already been queued (the watchdog's mutex covers
172
+ # the *decision* to fire but cannot un-queue a delivered raise) lands
173
+ # only at a blocking-IO checkpoint. Inside stream_once we explicitly
174
+ # re-enable +:immediate+ around the +read_body+ block where we *do*
175
+ # want the raise to wake the read. A per-iteration paranoid rescue
176
+ # catches any late-landing raise that escapes the inner +rescue
177
+ # StandardError+ (e.g. lands inside +interruptible_sleep+ between
178
+ # iterations) so the worker thread never silently dies.
179
+ def run_loop(&on_envelope)
180
+ Thread.handle_interrupt(SSEReadDeadlineExceeded => :on_blocking) do
181
+ delay = @options.sse_initial_reconnect_delay
182
+ first_attempt = true
56
183
 
57
- closed_count += @options.sleep_delay_for_new_connection_check
184
+ until @stopped.value
185
+ begin
186
+ unless first_attempt
187
+ increment_restart!
188
+ interruptible_sleep(jittered(delay))
189
+ break if @stopped.value
190
+ end
191
+ first_attempt = false
58
192
 
59
- next unless closed_count > @options.seconds_between_new_connection
193
+ connected_at_least_once = false
194
+ begin
195
+ stream_once do |event|
196
+ connected_at_least_once = true
197
+ # Persist the most recent id so the next reconnect resumes
198
+ # from there via Last-Event-Id. Updated *before* the user
199
+ # callback runs so a raising listener still advances the
200
+ # cursor — the event was delivered to us, the bug is on the
201
+ # caller side.
202
+ @last_event_id = event.id if event.id
203
+ # qfg-m3lk: callback exceptions are isolated. A buggy
204
+ # listener must not look like a transport error and trigger
205
+ # a reconnect.
206
+ invoke_on_envelope_safely(on_envelope, event)
207
+ # A connection healthy enough to deliver a real envelope
208
+ # earns a reset of the backoff. Sustained outages never
209
+ # reach this branch (no event ever delivered) so the
210
+ # exponential growth still holds.
211
+ delay = @options.sse_initial_reconnect_delay
212
+ end
213
+ rescue StandardError => e
214
+ handle_error(e) unless @stopped.value
215
+ end
60
216
 
61
- closed_count = 0
62
- @logger.debug 'Reconnecting SSE client'
63
- @client = connect(&load_configs)
217
+ # Backoff only grows on failed connect attempts. A server-
218
+ # initiated clean FIN after a healthy session (normal LB
219
+ # recycling) reuses the same delay — punishing it would make
220
+ # us look broken under benign rolling restarts. Matches
221
+ # sdk-go's `connectedOK` distinction.
222
+ delay = [delay * 2, @options.sse_max_reconnect_delay].min unless connected_at_least_once
223
+ rescue SSEReadDeadlineExceeded => e
224
+ # Paranoid backstop (qfg-tj18). A watchdog raise that landed
225
+ # outside +stream_once+ — typically in +interruptible_sleep+
226
+ # — must not kill the worker thread. We log loudly and let the
227
+ # +until+ loop carry on.
228
+ @logger.error "SSE watchdog late-raise contained: #{e.inspect}; resuming loop"
229
+ end
64
230
  end
65
231
  end
232
+ ensure
233
+ register_active(nil)
66
234
  end
67
235
 
68
- def connect(&load_configs)
69
- url = "#{source}/api/v2/sse/config"
236
+ # Opens one SSE request and yields each parsed event until the stream
237
+ # ends (clean FIN, error, or stop). Raises on transport errors so the
238
+ # caller can apply backoff. Clean FIN returns without raising.
239
+ #
240
+ # A watchdog thread closes the socket if no bytes arrive within
241
+ # +sse_read_timeout+. Net::HTTP#read_timeout is NOT reliable for the
242
+ # streaming +read_body do |chunk|+ form — the underlying BufferedIO
243
+ # reads bypass it in practice (a silent server stall blocks indefinitely
244
+ # against a configured deadline). sdk-go and sdk-node hit the same
245
+ # gotcha and solve it the same way: per-chunk reset, async close on
246
+ # expiry (chaos scenario 02 — sse_silent_stall).
247
+ def stream_once(&block)
248
+ url = "#{current_url}/api/v2/sse/config"
70
249
  cursor = current_cursor
71
250
  @logger.debug "SSE Streaming Connect to #{url} start_at #{cursor.inspect}"
72
251
 
73
- SSE::Client.new(url,
74
- headers: headers,
75
- read_timeout: @options.sse_read_timeout,
76
- reconnect_time: @options.sse_default_reconnect_time,
77
- last_event_id: cursor,
78
- logger: Quonfig::InternalLogger.new(SSE::Client)) do |client|
79
- client.on_event do |event|
80
- if event.data.nil? || event.data.empty?
81
- @logger.error "SSE Streaming Error: Received empty data for url #{url}"
82
- client.close
83
- next
252
+ uri = URI(url)
253
+ http = Net::HTTP.new(uri.host, uri.port)
254
+ http.use_ssl = (uri.scheme == 'https')
255
+ http.open_timeout = @options.sse_connect_timeout
256
+ # Keep Net::HTTP's read_timeout as a backstop for the header read
257
+ # (where it does apply reliably). The watchdog covers the body path.
258
+ http.read_timeout = @options.sse_read_timeout
259
+
260
+ req = Net::HTTP::Get.new(uri.request_uri, headers)
261
+
262
+ http.start
263
+ register_active(http)
264
+
265
+ watchdog = ReadDeadlineWatchdog.new(
266
+ worker: Thread.current, deadline_s: @options.sse_read_timeout,
267
+ stopped: @stopped, logger: @logger
268
+ )
269
+ watchdog.start
270
+
271
+ begin
272
+ http.request(req) do |resp|
273
+ code = resp.code.to_i
274
+ if TERMINAL_HTTP_CODES.include?(code)
275
+ # qfg-i5xv: 401/403/404 will not heal by retrying — bad key,
276
+ # revoked permission, or wrong endpoint. Mark stopped *before*
277
+ # invoking on_error so the loop's terminal-error branch is
278
+ # already locked in if the parent callback inspects state, and
279
+ # so the inner rescue's `handle_error(e) unless @stopped.value`
280
+ # guard suppresses a second on_error edge.
281
+ err = SSEHTTPTerminalError.new(code)
282
+ @logger.error "SSE Streaming Terminal Error: HTTP #{code} for url #{url}; will not retry"
283
+ @stopped.make_true
284
+ invoke_on_error(err)
285
+ raise err
286
+ end
287
+ if code != 200
288
+ err = SSEHTTPStatusError.new(code)
289
+ @logger.error "SSE Streaming Error: HTTP #{code} for url #{url}"
290
+ invoke_on_error(err)
291
+ raise err
84
292
  end
85
293
 
86
- begin
87
- parsed = JSON.parse(event.data)
88
- rescue JSON::ParserError => e
89
- @logger.error "SSE Streaming Error: Failed to parse JSON for url #{url}: #{e.message}"
90
- client.close
91
- next
294
+ parser = EventParser.new
295
+ # qfg-tj18: run_loop wraps the body in +:on_blocking+ which
296
+ # *would* still deliver during read_body (read_body is a
297
+ # blocking IO call), but be explicit: we want the watchdog raise
298
+ # to land here without ambiguity.
299
+ Thread.handle_interrupt(SSEReadDeadlineExceeded => :immediate) do
300
+ resp.read_body do |chunk|
301
+ watchdog.reset!
302
+ break if @stopped.value
303
+
304
+ parser.feed(chunk, &block)
305
+ end
92
306
  end
307
+ # read_body returned cleanly — either a server-initiated FIN, or
308
+ # the watchdog closed the socket on a silent stall. Either way,
309
+ # the outer loop will reconnect and bump restart_total on the
310
+ # next iteration.
311
+ @logger.debug "SSE stream ended for url #{url}"
312
+ end
313
+ ensure
314
+ watchdog.stop
315
+ register_active(nil)
316
+ begin
317
+ http.finish if http.started?
318
+ rescue StandardError
319
+ # already closed
320
+ end
321
+ end
322
+ end
93
323
 
94
- envelope = Quonfig::ConfigEnvelope.new(
95
- configs: parsed['configs'] || [],
96
- meta: parsed['meta'] || {}
97
- )
98
- load_configs.call(envelope, event, :sse)
324
+ # Track the active connection so close() can interrupt a blocked
325
+ # read_body from another thread. Guarded by @conn_mutex.
326
+ def register_active(http)
327
+ @conn_mutex.synchronize { @active_http = http }
328
+ end
329
+
330
+ def increment_restart!
331
+ @restart_mutex.synchronize { @restart_total += 1 }
332
+ end
333
+
334
+ def handle_error(error)
335
+ @logger.error "SSE Streaming Error: #{error.inspect}"
336
+ invoke_on_error(error)
337
+ end
338
+
339
+ # qfg-m3lk: rescue StandardError (NOT Exception) so SystemExit /
340
+ # Interrupt / SignalException still escape — Ctrl-C inside a customer
341
+ # callback must still kill the process. StandardError is the right
342
+ # boundary for "the caller's listener has a bug".
343
+ def invoke_on_envelope_safely(on_envelope, event)
344
+ on_envelope.call(event.envelope, event, :sse)
345
+ rescue StandardError => e
346
+ @on_envelope_error_mutex.synchronize { @on_envelope_error_total += 1 }
347
+ bt = (e.backtrace || []).first(5).join("\n ")
348
+ @logger.error "SSE on_envelope callback raised: #{e.class}: #{e.message}\n #{bt}"
349
+ end
350
+
351
+ def invoke_on_error(error)
352
+ return unless @on_error
353
+
354
+ begin
355
+ @on_error.call(error)
356
+ rescue StandardError => e
357
+ @logger.error "SSE on_error callback raised: #{e.inspect}"
358
+ end
359
+ end
360
+
361
+ # +/-50% jitter — caps thundering-herd amplitude after a partition heal.
362
+ # Identical shape to ld-eventsource's Backoff#next_interval (and
363
+ # sdk-go's runLoop jitter) so we don't surprise operators familiar with
364
+ # those.
365
+ def jittered(delay)
366
+ (delay / 2) + rand(delay / 2.0)
367
+ end
368
+
369
+ # Sleep with interrupt: chunks the sleep so close() during a long
370
+ # backoff doesn't block shutdown for tens of seconds.
371
+ def interruptible_sleep(seconds)
372
+ deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + seconds
373
+ until @stopped.value
374
+ remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
375
+ break if remaining <= 0
376
+
377
+ sleep([remaining, 0.1].min)
378
+ end
379
+ end
380
+
381
+ # Rotate through configured SSE URLs. The same rotation rule the
382
+ # previous implementation used, preserved so multi-region failover
383
+ # behavior is unchanged.
384
+ def current_url
385
+ urls = @prefab_options.sse_api_urls
386
+ @source_index = (@source_index + 1) % urls.size
387
+ urls[@source_index]
388
+ end
389
+
390
+ # Internal: HTTP-status sentinel error for non-200 SSE responses. Surfaces
391
+ # the status code through #message so parent on_error callbacks can log
392
+ # meaningfully without depending on ld-eventsource's error hierarchy.
393
+ class SSEHTTPStatusError < StandardError
394
+ attr_reader :status_code
395
+
396
+ def initialize(status_code)
397
+ @status_code = status_code
398
+ super("HTTP #{status_code}")
399
+ end
400
+ end
401
+
402
+ # qfg-i5xv: terminal HTTP failures the SDK will not retry. 401 = bad key,
403
+ # 403 = revoked workspace permission, 404 = wrong endpoint / missing
404
+ # workspace. A subclass of SSEHTTPStatusError so existing on_error
405
+ # callbacks that only check `is_a?(SSEHTTPStatusError)` keep working,
406
+ # while customers that want to distinguish (alerting, OpenFeature
407
+ # provider error events) can dispatch on the subclass.
408
+ class SSEHTTPTerminalError < SSEHTTPStatusError; end
409
+
410
+ # Raised by the watchdog into the worker thread when the per-chunk
411
+ # read deadline elapses. Caught by run_loop's rescue, indistinguishable
412
+ # from any other transport error for backoff/restart purposes.
413
+ class SSEReadDeadlineExceeded < StandardError; end
414
+
415
+ # Background watchdog that interrupts the worker thread if no chunk
416
+ # arrives within +deadline_s+ seconds. Uses Thread#raise — the only
417
+ # reliable cross-platform way to unblock a Ruby thread blocked in
418
+ # +Net::HTTP+'s body-read on macOS. (Closing or shutting down the
419
+ # underlying socket from another thread does NOT wake the reader on
420
+ # macOS; the kernel discards future reads but the in-flight syscall
421
+ # stays blocked until something else trips. sdk-go and sdk-node solve
422
+ # the equivalent problem with context cancellation / AbortController,
423
+ # which Ruby lacks at the IO layer.) Thread#raise is essentially what
424
+ # +Timeout.timeout+ does internally; using it directly avoids
425
+ # Timeout.timeout's sketch reputation around ensure blocks.
426
+ class ReadDeadlineWatchdog
427
+ POLL_INTERVAL = 0.25
428
+
429
+ def initialize(worker:, deadline_s:, stopped:, logger:)
430
+ @worker = worker
431
+ @deadline_s = deadline_s
432
+ @stopped = stopped
433
+ @logger = logger
434
+ @active = true
435
+ # Mutex covers @active AND the decision to fire Thread#raise. stop()
436
+ # holds the mutex when flipping @active false, so a +stop+ that
437
+ # arrives mid-deadline-check cannot lose the race against the
438
+ # watchdog's @worker.raise call (which would inject a spurious
439
+ # SSEReadDeadlineExceeded into the worker thread right after a
440
+ # clean read_body return).
441
+ @mutex = Mutex.new
442
+ @last_read_at = Concurrent::AtomicReference.new(Process.clock_gettime(Process::CLOCK_MONOTONIC))
443
+ end
444
+
445
+ def start
446
+ @thread = Thread.new { watch }
447
+ end
448
+
449
+ def reset!
450
+ @last_read_at.set(Process.clock_gettime(Process::CLOCK_MONOTONIC))
451
+ end
452
+
453
+ def stop
454
+ @mutex.synchronize { @active = false }
455
+ @thread&.join(1)
456
+ @thread = nil
457
+ end
458
+
459
+ private
460
+
461
+ def watch
462
+ loop do
463
+ sleep POLL_INTERVAL
464
+ break unless @mutex.synchronize { @active } && !@stopped.value
465
+
466
+ idle = Process.clock_gettime(Process::CLOCK_MONOTONIC) - @last_read_at.value
467
+ next if idle < @deadline_s
468
+
469
+ fired = @mutex.synchronize do
470
+ next false unless @active && !@stopped.value
471
+
472
+ @logger.debug "SSE read deadline exceeded (#{idle.round(1)}s idle >= #{@deadline_s}s); interrupting worker"
473
+ @worker.raise(SSEReadDeadlineExceeded.new("SSE read deadline #{@deadline_s}s exceeded"))
474
+ true
475
+ end
476
+ break if fired
99
477
  end
478
+ rescue StandardError => e
479
+ # Watchdog must never crash the SDK. Worst case we silently fall
480
+ # back to Net::HTTP's own (unreliable) read_timeout.
481
+ @logger.debug "SSE watchdog error: #{e.inspect}"
482
+ end
483
+ end
100
484
 
101
- client.on_error do |error|
102
- # SSL "unexpected eof" is expected when SSE sessions timeout normally
103
- if error.is_a?(OpenSSL::SSL::SSLError) && error.message.include?('unexpected eof')
104
- @logger.debug "SSE Streaming: Connection closed (expected timeout) for url #{url}"
485
+ # Streaming SSE parser. Accepts byte chunks (any encoding), yields one
486
+ # Quonfig::StreamEvent per complete event. Tolerates:
487
+ # - chunks that split a UTF-8 multi-byte character (buffer in 8-bit,
488
+ # transcode whole lines)
489
+ # - chunks that split a line mid-way
490
+ # - any of CR / LF / CRLF as line terminators
491
+ # - +data:+, +data: + (optional space per SSE spec)
492
+ # - +:comment+ lines (keepalives — ignored)
493
+ # - multi-line +data:+ (concatenated with +\n+, per spec)
494
+ # Ignores +event:+ and +retry:+ — api-delivery does not emit them and the
495
+ # Quonfig wire contract does not honor reconnect-time directives.
496
+ # Malformed +data:+ JSON is logged and skipped; one bad event does not
497
+ # tear down the stream.
498
+ class EventParser
499
+ def initialize(logger: nil)
500
+ @logger = logger
501
+ @reader = LineReader.new
502
+ @data = +''
503
+ @have_data = false
504
+ @id = nil
505
+ end
506
+
507
+ def feed(chunk)
508
+ @reader.feed(chunk) do |line|
509
+ if line.empty?
510
+ event = flush
511
+ yield event if event
512
+ elsif line.start_with?(':')
513
+ # comment / keepalive — ignore
105
514
  else
106
- @logger.error "SSE Streaming Error: #{error.inspect} for url #{url}"
515
+ process_field(line)
107
516
  end
517
+ end
518
+ end
108
519
 
109
- if @options.errors_to_close_connection.any? { |klass| error.is_a?(klass) }
110
- @logger.debug "Closing SSE connection for url #{url}"
111
- client.close
520
+ private
521
+
522
+ def process_field(line)
523
+ idx = line.index(':')
524
+ return unless idx
525
+
526
+ name = line[0...idx]
527
+ rest = line[(idx + 1)..]
528
+ rest = rest[1..] if rest.start_with?(' ')
529
+
530
+ case name
531
+ when 'data'
532
+ if @have_data
533
+ @data << "\n" << rest
534
+ else
535
+ @data = rest
536
+ @have_data = true
112
537
  end
538
+ when 'id'
539
+ @id = rest unless rest.include?("\x00")
540
+ # event: / retry: are intentionally ignored
113
541
  end
114
542
  end
115
- end
116
543
 
117
- def headers
118
- auth = "1:#{@prefab_options.sdk_key}"
119
- auth_string = Base64.strict_encode64(auth)
120
- {
121
- 'Authorization' => "Basic #{auth_string}",
122
- 'Accept' => 'text/event-stream',
123
- 'X-Quonfig-SDK-Version' => "ruby-#{Quonfig::VERSION}"
124
- }
125
- end
544
+ def flush
545
+ return nil unless @have_data
126
546
 
127
- def source
128
- @source_index = @source_index.nil? ? 0 : @source_index + 1
547
+ data = @data
548
+ id = @id
549
+ @data = +''
550
+ @have_data = false
551
+ # NB: @id persists across events — the SSE spec says last-event-id
552
+ # is sticky until overwritten. Matches ld-eventsource.
129
553
 
130
- @source_index = 0 if @source_index >= @prefab_options.sse_api_urls.size
554
+ begin
555
+ parsed = JSON.parse(data)
556
+ rescue JSON::ParserError => e
557
+ (@logger || LOG).error "SSE Streaming Error: malformed JSON: #{e.message}"
558
+ return nil
559
+ end
131
560
 
132
- @prefab_options.sse_api_urls[@source_index]
561
+ envelope = Quonfig::ConfigEnvelope.new(
562
+ configs: parsed['configs'] || [],
563
+ meta: parsed['meta'] || {}
564
+ )
565
+ StreamEvent.new(envelope, id, data)
566
+ end
133
567
  end
134
568
 
135
- # Compute a Last-Event-ID to resume the stream from. Three sources, in
136
- # priority order:
137
- # 1. config_loader.version -- string ETag from last HTTP fetch (new path)
138
- # 2. config_loader.highwater_mark -- legacy numeric cursor
139
- # 3. nil -- no prior state; stream from HEAD
140
- def current_cursor
141
- if @config_loader.respond_to?(:version)
142
- v = @config_loader.version
143
- return v if v.is_a?(String) && !v.empty?
569
+ # Byte-level line reader. Accepts arbitrary chunks, yields one UTF-8
570
+ # line per call to the block. Terminator-stripped (CR / LF / CRLF
571
+ # supported). Modeled on ld-eventsource's BufferedLineReader same
572
+ # invariants: split bytes-not-chars while scanning, force-encode to
573
+ # UTF-8 only once a complete line is sliced out, so a multi-byte
574
+ # character spanning two chunks does not raise Encoding::CompatibilityError.
575
+ class LineReader
576
+ def initialize
577
+ @buffer = +''.b
578
+ @last_was_cr = false
144
579
  end
145
580
 
146
- if @config_loader.respond_to?(:highwater_mark)
147
- hw = @config_loader.highwater_mark
148
- return hw.to_s if hw.is_a?(Numeric) && hw.positive?
149
- return hw if hw.is_a?(String) && !hw.empty?
150
- end
581
+ def feed(chunk)
582
+ @buffer << chunk.b
583
+ loop do
584
+ idx = @buffer.index(/[\r\n]/)
585
+ break if idx.nil?
151
586
 
152
- nil
587
+ ch = @buffer[idx]
588
+ if idx.zero? && ch == "\n" && @last_was_cr
589
+ # Dangling LF of a CRLF pair split across chunks — consume and skip.
590
+ @last_was_cr = false
591
+ @buffer.slice!(0, 1)
592
+ next
593
+ end
594
+
595
+ line = @buffer[0, idx].force_encoding('UTF-8')
596
+ consume = idx + 1
597
+ @last_was_cr = false
598
+ if ch == "\r"
599
+ if consume == @buffer.bytesize
600
+ # CR at end of buffer — could be CRLF split across feeds.
601
+ @last_was_cr = true
602
+ elsif @buffer[consume] == "\n"
603
+ consume += 1
604
+ end
605
+ end
606
+ @buffer.slice!(0, consume)
607
+ yield line
608
+ end
609
+ end
153
610
  end
154
611
  end
155
612
  end