durable_streams 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,436 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module DurableStreams
6
+ # Producer for exactly-once writes with batching.
7
+ # Uses producer_id, epoch, and sequence numbers to ensure exactly-once delivery.
8
+ class Producer
9
+ attr_reader :epoch, :seq
10
+
11
+ # Open a producer with block form for automatic cleanup
12
+ # @example
13
+ # Producer.open(url: "...", producer_id: "...") do |producer|
14
+ # producer << data
15
+ # end # auto flush/close
16
+ # @yield [Producer] The producer instance
17
+ # @return [Object] The block's return value
18
+ def self.open(**options, &block)
19
+ producer = new(**options)
20
+ return producer unless block_given?
21
+
22
+ begin
23
+ yield producer
24
+ ensure
25
+ # Preserve original exception if close also raises
26
+ begin
27
+ producer.close
28
+ rescue StandardError => close_error
29
+ raise unless $! # Re-raise close error if no original exception
30
+
31
+ DurableStreams.logger&.warn(
32
+ "Error during producer close (original exception preserved): #{close_error.class}: #{close_error.message}"
33
+ )
34
+ end
35
+ end
36
+ end
37
+
38
+ # @param url [String] Stream URL
39
+ # @param producer_id [String] Stable identifier for this producer
40
+ # @param epoch [Integer] Starting epoch (increment on restart)
41
+ # @param auto_claim [Boolean] Auto-retry with epoch+1 on 403
42
+ # @param max_batch_bytes [Integer] Max bytes before flush (default: 1MB)
43
+ # @param linger_ms [Integer] Max wait before flush (default: 5ms)
44
+ # @param max_in_flight [Integer] Max concurrent batches (default: 5)
45
+ # @param content_type [String] Content type for the stream
46
+ # @param headers [Hash] Additional headers
47
+ def initialize(url:, producer_id:, epoch: 0, auto_claim: false,
48
+ max_batch_bytes: 1_048_576, linger_ms: 5, max_in_flight: 5,
49
+ content_type: nil, headers: {}, next_seq: 0)
50
+ @url = url
51
+ @producer_id = producer_id
52
+ @epoch = epoch
53
+ @auto_claim = auto_claim
54
+ @max_batch_bytes = max_batch_bytes
55
+ @linger_ms = linger_ms
56
+ @max_in_flight = max_in_flight
57
+ @content_type = content_type || "application/json"
58
+ @headers = headers
59
+
60
+ raise ArgumentError, "next_seq must be >= 0" if next_seq.negative?
61
+
62
+ @seq = next_seq - 1 # Start at next_seq - 1 so next append uses next_seq
63
+ @pending = []
64
+ @mutex = Mutex.new
65
+ @send_mutex = Mutex.new # Ensure batches are sent in order
66
+ @in_flight = 0
67
+ @in_flight_cv = ConditionVariable.new
68
+ @transport = HTTP::Transport.new
69
+ @closed = false
70
+ @stream_closed = false
71
+ @linger_timer = nil
72
+ @linger_cancelled = false
73
+ @batch_queue = Queue.new
74
+ @sender_thread = nil
75
+ @last_error = nil
76
+ end
77
+
78
+ # Append a message (fire-and-forget, batched)
79
+ # For JSON streams, pass pre-serialized JSON strings.
80
+ # @param data [String] Data to append (pre-serialized JSON for JSON streams)
81
+ # @example
82
+ # producer.append(JSON.generate({ message: "hello" }))
83
+ def append(data)
84
+ raise ClosedError.new("Producer is closed", url: @url) if @closed
85
+ unless data.is_a?(String)
86
+ raise ArgumentError, "append() requires a String. For objects, use JSON.generate(). Got #{data.class}"
87
+ end
88
+
89
+ batch_to_send = nil
90
+ @mutex.synchronize do
91
+ @seq += 1
92
+ @pending << { data: data, seq: @seq }
93
+
94
+ # Start linger timer if this is first message in batch
95
+ start_linger_timer if @pending.size == 1 && @linger_ms > 0
96
+
97
+ # Flush if batch is full
98
+ if batch_size_bytes >= @max_batch_bytes
99
+ batch_to_send = @pending.dup
100
+ @pending.clear
101
+ cancel_linger_timer
102
+ end
103
+ end
104
+
105
+ # Send outside the mutex to avoid blocking other appends
106
+ queue_batch(batch_to_send) if batch_to_send
107
+ end
108
+
109
+ # Shovel operator for append (Ruby idiom)
110
+ # @param data [String] Data to append (pre-serialized JSON for JSON streams)
111
+ # @return [self] Returns self for chaining
112
+ def <<(data)
113
+ append(data)
114
+ self
115
+ end
116
+
117
+ # Append and wait for acknowledgment (sync/blocking)
118
+ # @param data [String] Data to append (pre-serialized JSON for JSON streams)
119
+ # @return [ProducerResult]
120
+ def append!(data)
121
+ append(data)
122
+ flush
123
+ ProducerResult.new(
124
+ next_offset: nil, # We don't track individual message offsets in batched mode
125
+ duplicate: false,
126
+ epoch: @epoch,
127
+ seq: @seq
128
+ )
129
+ end
130
+
131
+ # Flush all pending batches
132
+ def flush
133
+ batch = nil
134
+ @mutex.synchronize do
135
+ cancel_linger_timer
136
+ # Check for errors from background threads
137
+ raise @last_error if @last_error
138
+
139
+ return if @pending.empty?
140
+
141
+ batch = @pending.dup
142
+ @pending.clear
143
+ end
144
+
145
+ # Send synchronously for flush
146
+ send_batch_sync(batch) if batch && !batch.empty?
147
+
148
+ # Wait for all in-flight batches to complete
149
+ wait_for_inflight
150
+
151
+ # Check for errors that occurred during wait
152
+ @mutex.synchronize { raise @last_error if @last_error }
153
+ end
154
+
155
+ # Close the producer, flushing pending data
156
+ def close
157
+ return if @closed
158
+
159
+ @closed = true
160
+ cancel_linger_timer
161
+ flush
162
+
163
+ # Signal sender thread to stop and wait for it
164
+ if @sender_thread&.alive?
165
+ @batch_queue << :shutdown
166
+ @sender_thread.join(5) # Wait up to 5 seconds
167
+ @sender_thread.kill if @sender_thread.alive? # Force kill if stuck
168
+ end
169
+ end
170
+
171
+ # Close the stream using producer headers (idempotent)
172
+ # @param data [String, nil] Optional final data to append before closing
173
+ def close_stream(data: nil)
174
+ return if @stream_closed
175
+
176
+ flush
177
+
178
+ attempts = 0
179
+ close_seq = @seq + 1
180
+
181
+ begin
182
+ send_close_request(data, close_seq, @epoch)
183
+ @seq = close_seq
184
+ @stream_closed = true
185
+ @closed = true
186
+ rescue StaleEpochError => e
187
+ attempts += 1
188
+ raise if attempts > 3
189
+
190
+ if @auto_claim
191
+ server_epoch = e.current_epoch || @epoch
192
+ @epoch = [server_epoch + 1, @epoch + 1].max
193
+ close_seq = 0
194
+ retry
195
+ else
196
+ raise
197
+ end
198
+ end
199
+ end
200
+
201
+ # Check if the producer has been closed
202
+ # @return [Boolean]
203
+ def closed?
204
+ @closed
205
+ end
206
+
207
+ private
208
+
209
+ def batch_size_bytes
210
+ # Data is now pre-serialized strings
211
+ @pending.sum { |msg| msg[:data].bytesize }
212
+ end
213
+
214
+ def start_linger_timer
215
+ return if @linger_ms <= 0
216
+
217
+ @linger_cancelled = false
218
+ @linger_timer = Thread.new do
219
+ begin
220
+ sleep(@linger_ms / 1000.0)
221
+ flush unless @closed || @linger_cancelled
222
+ rescue StandardError => e
223
+ @mutex.synchronize do
224
+ if @last_error
225
+ DurableStreams.logger&.warn(
226
+ "Additional error in linger timer (first error already recorded): #{e.class}: #{e.message}"
227
+ )
228
+ else
229
+ @last_error = e
230
+ end
231
+ end
232
+ end
233
+ end
234
+ end
235
+
236
+ def cancel_linger_timer
237
+ @linger_cancelled = true
238
+ @linger_timer&.kill
239
+ @linger_timer = nil
240
+ end
241
+
242
+ def queue_batch(batch)
243
+ return if batch.nil? || batch.empty?
244
+
245
+ # If max_in_flight is 1 or linger_ms is 0, send synchronously for ordering
246
+ if @max_in_flight <= 1 || @linger_ms == 0
247
+ send_batch_sync(batch)
248
+ else
249
+ # For batched mode with concurrency, use the queue
250
+ start_sender_thread
251
+ @batch_queue << batch
252
+ end
253
+ end
254
+
255
+ def start_sender_thread
256
+ return if @sender_thread&.alive?
257
+
258
+ @sender_thread = Thread.new do
259
+ begin
260
+ loop do
261
+ batch = @batch_queue.pop
262
+ break if batch == :shutdown
263
+
264
+ send_batch_sync(batch)
265
+ end
266
+ rescue StandardError => e
267
+ DurableStreams.logger&.error(
268
+ "Sender thread died unexpectedly: #{e.class}: #{e.message}"
269
+ )
270
+ @mutex.synchronize do
271
+ if @last_error
272
+ DurableStreams.logger&.warn(
273
+ "Sender thread error (first error already recorded): #{e.class}: #{e.message}"
274
+ )
275
+ else
276
+ @last_error = e
277
+ end
278
+ end
279
+ end
280
+ end
281
+ end
282
+
283
+ def wait_for_inflight
284
+ @mutex.synchronize do
285
+ while @in_flight > 0
286
+ @in_flight_cv.wait(@mutex, 0.1)
287
+ end
288
+ end
289
+ end
290
+
291
+ def send_batch_sync(batch, retry_count: 0)
292
+ return if batch.empty?
293
+
294
+ # Serialize batch sending to ensure sequence order
295
+ @send_mutex.synchronize do
296
+ # Wait for in-flight slot
297
+ @mutex.synchronize do
298
+ while @in_flight >= @max_in_flight
299
+ @in_flight_cv.wait(@mutex, 0.1)
300
+ end
301
+ @in_flight += 1
302
+ end
303
+
304
+ begin
305
+ send_batch_request(batch)
306
+ rescue StaleEpochError => e
307
+ if @auto_claim && retry_count < 3
308
+ new_epoch = nil
309
+ new_batch = nil
310
+ @mutex.synchronize do
311
+ # Use the server's current epoch + 1, or at minimum our epoch + 1
312
+ server_epoch = e.current_epoch || @epoch
313
+ new_epoch = [server_epoch + 1, @epoch + 1].max
314
+ @epoch = new_epoch
315
+ # Rebuild the batch with seq starting from 0 for the new epoch
316
+ new_batch = batch.each_with_index.map do |msg, idx|
317
+ { data: msg[:data], seq: idx }
318
+ end
319
+ # Update @seq to the last seq in the batch so subsequent appends continue correctly
320
+ # Any pending messages will be re-sequenced on next flush
321
+ @seq = new_batch.size - 1
322
+ # Re-sequence any pending messages to continue after the batch
323
+ @pending.each_with_index do |msg, idx|
324
+ msg[:seq] = @seq + 1 + idx
325
+ end
326
+ @seq += @pending.size
327
+ end
328
+ send_batch_request_with_epoch(new_batch, new_epoch)
329
+ else
330
+ raise
331
+ end
332
+ ensure
333
+ @mutex.synchronize do
334
+ @in_flight -= 1
335
+ @in_flight_cv.broadcast
336
+ end
337
+ end
338
+ end
339
+ end
340
+
341
+ def send_batch_request(batch)
342
+ send_batch_request_with_epoch(batch, @epoch)
343
+ end
344
+
345
+ def send_batch_request_with_epoch(batch, epoch)
346
+ headers = HTTP.resolve_headers(@headers)
347
+ headers["content-type"] = @content_type
348
+ headers[PRODUCER_ID_HEADER] = @producer_id
349
+ headers[PRODUCER_EPOCH_HEADER] = epoch.to_s
350
+
351
+ # Use the first message's seq as the starting seq
352
+ first_seq = batch.first[:seq]
353
+ headers[PRODUCER_SEQ_HEADER] = first_seq.to_s
354
+
355
+ # Build body - data is pre-serialized strings
356
+ body = if DurableStreams.json_content_type?(@content_type)
357
+ # Wrap pre-serialized JSON strings in array
358
+ "[#{batch.map { |m| m[:data] }.join(',')}]"
359
+ else
360
+ batch.map { |m| m[:data] }.join
361
+ end
362
+
363
+ response = @transport.request(:post, @url, headers: headers, body: body)
364
+
365
+ case response.status
366
+ when 200, 201, 204
367
+ # Success
368
+ nil
369
+ when 403
370
+ # Stale epoch
371
+ current_epoch = response[PRODUCER_EPOCH_HEADER]&.to_i
372
+ raise StaleEpochError.new(current_epoch: current_epoch, url: @url, headers: response.headers)
373
+ when 409
374
+ if response[STREAM_CLOSED_HEADER]&.downcase == "true"
375
+ raise StreamClosedError.new(url: @url, headers: response.headers)
376
+ end
377
+
378
+ # Could be sequence gap or other conflict
379
+ expected = response[PRODUCER_EXPECTED_SEQ_HEADER]&.to_i
380
+ received = response[PRODUCER_RECEIVED_SEQ_HEADER]&.to_i
381
+ if expected && received
382
+ raise SequenceGapError.new(expected_seq: expected, received_seq: received,
383
+ url: @url, headers: response.headers)
384
+ else
385
+ raise SeqConflictError.new(url: @url, headers: response.headers)
386
+ end
387
+ else
388
+ raise DurableStreams.error_from_status(response.status, url: @url, body: response.body,
389
+ headers: response.headers)
390
+ end
391
+ end
392
+
393
+ def send_close_request(data, seq, epoch)
394
+ headers = HTTP.resolve_headers(@headers)
395
+ headers["content-type"] = @content_type
396
+ headers[PRODUCER_ID_HEADER] = @producer_id
397
+ headers[PRODUCER_EPOCH_HEADER] = epoch.to_s
398
+ headers[PRODUCER_SEQ_HEADER] = seq.to_s
399
+ headers[STREAM_CLOSED_HEADER] = "true"
400
+
401
+ body = if data.nil?
402
+ ""
403
+ elsif DurableStreams.json_content_type?(@content_type)
404
+ "[#{data}]"
405
+ else
406
+ data
407
+ end
408
+
409
+ response = @transport.request(:post, @url, headers: headers, body: body)
410
+
411
+ case response.status
412
+ when 200, 201, 204
413
+ nil
414
+ when 403
415
+ current_epoch = response[PRODUCER_EPOCH_HEADER]&.to_i
416
+ raise StaleEpochError.new(current_epoch: current_epoch, url: @url, headers: response.headers)
417
+ when 409
418
+ if response[STREAM_CLOSED_HEADER]&.downcase == "true"
419
+ raise StreamClosedError.new(url: @url, headers: response.headers)
420
+ end
421
+
422
+ expected = response[PRODUCER_EXPECTED_SEQ_HEADER]&.to_i
423
+ received = response[PRODUCER_RECEIVED_SEQ_HEADER]&.to_i
424
+ if expected && received
425
+ raise SequenceGapError.new(expected_seq: expected, received_seq: received,
426
+ url: @url, headers: response.headers)
427
+ else
428
+ raise SeqConflictError.new(url: @url, headers: response.headers)
429
+ end
430
+ else
431
+ raise DurableStreams.error_from_status(response.status, url: @url, body: response.body,
432
+ headers: response.headers)
433
+ end
434
+ end
435
+ end
436
+ end
@@ -0,0 +1,228 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "uri"
5
+ require "net/http"
6
+ require "base64"
7
+
8
+ module DurableStreams
9
+ # SSE (Server-Sent Events) reader for live streaming
10
+ class SSEReader
11
+ attr_reader :next_offset, :cursor, :up_to_date, :status
12
+
13
+ # @param stream [Stream] Parent stream handle
14
+ # @param offset [String] Starting offset
15
+ # @param cursor [String, nil] Initial cursor
16
+ # @param retry_policy [RetryPolicy, nil] Retry policy for reconnection
17
+ def initialize(stream, offset: "-1", cursor: nil, retry_policy: nil)
18
+ @stream = stream
19
+ @offset = offset
20
+ @next_offset = offset
21
+ @cursor = cursor
22
+ @retry_policy = retry_policy || RetryPolicy.default
23
+ @up_to_date = false
24
+ @closed = false
25
+ @status = nil
26
+ @buffer = +""
27
+ @http_response = nil
28
+ @connection = nil
29
+ @encoding = nil
30
+ end
31
+
32
+ # Iterate over SSE events
33
+ # @yield [Hash] Event with :type, :data, :next_offset, :cursor, :up_to_date
34
+ def each_event(&block)
35
+ return enum_for(:each_event) unless block_given?
36
+
37
+ with_reconnection do
38
+ open_sse_connection do |response|
39
+ @http_response = response
40
+
41
+ response.read_body do |chunk|
42
+ break if @closed
43
+
44
+ @buffer << chunk
45
+ parse_events.each do |event|
46
+ yield event
47
+ break if @closed
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ # Close the SSE connection
55
+ def close
56
+ @closed = true
57
+ begin
58
+ @http_response&.instance_variable_get(:@socket)&.close
59
+ rescue StandardError => e
60
+ DurableStreams.logger&.warn("SSE socket close error (expected during cleanup): #{e.class}: #{e.message}")
61
+ end
62
+ begin
63
+ @connection&.finish
64
+ rescue StandardError => e
65
+ DurableStreams.logger&.warn("SSE connection finish error (expected during cleanup): #{e.class}: #{e.message}")
66
+ end
67
+ end
68
+
69
+ def closed?
70
+ @closed
71
+ end
72
+
73
+ private
74
+
75
+ def with_reconnection
76
+ attempts = 0
77
+ last_error = nil
78
+ begin
79
+ yield
80
+ rescue IOError, Errno::ECONNRESET, Net::ReadTimeout, Errno::EPIPE => e
81
+ return if @closed
82
+
83
+ last_error = e
84
+ attempts += 1
85
+ if attempts > @retry_policy.max_retries
86
+ raise ConnectionError.new("SSE connection failed after #{attempts} retries: #{e.message}")
87
+ end
88
+
89
+ delay = [@retry_policy.initial_delay * (@retry_policy.multiplier**(attempts - 1)),
90
+ @retry_policy.max_delay].min
91
+ sleep(delay)
92
+ @buffer = +""
93
+ retry
94
+ end
95
+ end
96
+
97
+ def open_sse_connection(&block)
98
+ params = { offset: @next_offset, live: "sse" }
99
+ params[:cursor] = @cursor if @cursor
100
+ request_url = HTTP.build_url(@stream.url, params)
101
+ uri = URI.parse(request_url)
102
+
103
+ @connection = Net::HTTP.new(uri.host, uri.port)
104
+ @connection.use_ssl = uri.scheme == "https"
105
+ @connection.open_timeout = 10
106
+ @connection.read_timeout = 300 # Long timeout for SSE
107
+ @connection.start
108
+
109
+ path = uri.path
110
+ path = "/" if path.empty?
111
+ path = "#{path}?#{uri.query}" if uri.query
112
+
113
+ request = Net::HTTP::Get.new(path)
114
+ # Apply user headers first, then force Accept header for SSE
115
+ @stream.resolved_headers.each { |k, v| request[k] = v }
116
+ request["Accept"] = "text/event-stream"
117
+
118
+ @connection.request(request) do |response|
119
+ @status = response.code.to_i
120
+ if @status == 404
121
+ raise StreamNotFoundError.new(url: @stream.url)
122
+ end
123
+ unless @status >= 200 && @status < 300
124
+ raise DurableStreams.error_from_status(@status, url: @stream.url)
125
+ end
126
+ # Detect encoding from response header (server auto-detects binary content types)
127
+ encoding_header = response["stream-sse-data-encoding"]
128
+ @encoding = encoding_header if encoding_header && !encoding_header.empty?
129
+ yield response
130
+ end
131
+ ensure
132
+ begin
133
+ @connection&.finish
134
+ rescue StandardError => e
135
+ DurableStreams.logger&.warn("SSE connection cleanup error: #{e.class}: #{e.message}")
136
+ end
137
+ end
138
+
139
+ def parse_events
140
+ events = []
141
+ # Handle both \n\n and \r\n\r\n delimiters
142
+ while (match = @buffer.match(/\r?\n\r?\n/))
143
+ idx = match.begin(0)
144
+ raw = @buffer.slice!(0, idx + match[0].length)
145
+ event = parse_sse_event(raw)
146
+ events << event if event
147
+ end
148
+ events
149
+ end
150
+
151
+ def parse_sse_event(raw)
152
+ event_type = nil
153
+ data_lines = []
154
+
155
+ raw.each_line do |line|
156
+ line = line.chomp
157
+ next if line.start_with?(":") # Comment line
158
+ next if line.empty?
159
+
160
+ case line
161
+ when /^event:\s*(.*)$/
162
+ event_type = ::Regexp.last_match(1)
163
+ when /^data:\s?(.*)$/
164
+ data_lines << ::Regexp.last_match(1)
165
+ when /^data$/
166
+ data_lines << "" # Empty data line
167
+ end
168
+ end
169
+
170
+ return nil if data_lines.empty? && event_type != "control"
171
+
172
+ data = data_lines.join("\n")
173
+
174
+ # Parse control events for metadata
175
+ if event_type == "control"
176
+ # Validate control event data
177
+ if data.nil? || data.strip.empty?
178
+ raise ParseError.new("Empty control event data")
179
+ end
180
+
181
+ begin
182
+ control = JSON.parse(data)
183
+ # Must be a JSON object
184
+ unless control.is_a?(Hash)
185
+ raise ParseError.new("Control event data is not a JSON object")
186
+ end
187
+ @next_offset = control["streamNextOffset"] if control["streamNextOffset"]
188
+ @cursor = control["streamCursor"] if control["streamCursor"]
189
+ @up_to_date = control["upToDate"] == true || control["streamUpToDate"] == true
190
+ return {
191
+ type: "control",
192
+ data: nil, # No data payload for control events
193
+ next_offset: @next_offset,
194
+ cursor: @cursor,
195
+ up_to_date: @up_to_date
196
+ }
197
+ rescue JSON::ParserError => e
198
+ raise ParseError.new("Malformed control event JSON: #{e.message}")
199
+ end
200
+ end
201
+
202
+ # Only process known event types: "data", "message", or nil (default)
203
+ # Ignore unknown event types per SSE spec (forward compatibility)
204
+ unless event_type.nil? || event_type == "data" || event_type == "message"
205
+ return nil
206
+ end
207
+
208
+ # Decode base64 if encoding is set (Protocol Section 5.7)
209
+ # Per protocol: remove \n and \r characters before base64 decoding
210
+ if @encoding == "base64" && data && !data.empty?
211
+ cleaned_data = data.gsub(/[\n\r]/, "")
212
+ begin
213
+ data = Base64.strict_decode64(cleaned_data)
214
+ rescue ArgumentError => e
215
+ raise ParseError.new("Invalid base64 data in SSE event: #{e.message}")
216
+ end
217
+ end
218
+
219
+ {
220
+ type: event_type,
221
+ data: data,
222
+ next_offset: @next_offset,
223
+ cursor: @cursor,
224
+ up_to_date: @up_to_date
225
+ }
226
+ end
227
+ end
228
+ end