fiber_stream 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE +19 -0
  4. data/README.md +361 -0
  5. data/examples/README.md +51 -0
  6. data/examples/async_http_requests.rb +132 -0
  7. data/examples/background_execution.rb +31 -0
  8. data/examples/backpressure_buffer.rb +66 -0
  9. data/examples/basic_pipeline.rb +28 -0
  10. data/examples/composable_pipeline.rb +43 -0
  11. data/examples/file_copy.rb +33 -0
  12. data/examples/line_processing.rb +20 -0
  13. data/examples/ractor_map_hashing.rb +43 -0
  14. data/examples/ractor_port_source.rb +45 -0
  15. data/lib/fiber_stream/errors.rb +44 -0
  16. data/lib/fiber_stream/flow.rb +190 -0
  17. data/lib/fiber_stream/pipeline.rb +49 -0
  18. data/lib/fiber_stream/pull/async_boundary.rb +85 -0
  19. data/lib/fiber_stream/pull/buffer_boundary.rb +123 -0
  20. data/lib/fiber_stream/pull/each.rb +31 -0
  21. data/lib/fiber_stream/pull/io_source.rb +89 -0
  22. data/lib/fiber_stream/pull/lines.rb +121 -0
  23. data/lib/fiber_stream/pull/map.rb +37 -0
  24. data/lib/fiber_stream/pull/parallel_map_boundary.rb +299 -0
  25. data/lib/fiber_stream/pull/ractor_map_boundary.rb +500 -0
  26. data/lib/fiber_stream/pull/ractor_port_source.rb +242 -0
  27. data/lib/fiber_stream/pull/select.rb +40 -0
  28. data/lib/fiber_stream/pull/take.rb +47 -0
  29. data/lib/fiber_stream/pull.rb +85 -0
  30. data/lib/fiber_stream/ractor_port.rb +17 -0
  31. data/lib/fiber_stream/running_pipeline.rb +156 -0
  32. data/lib/fiber_stream/sink.rb +176 -0
  33. data/lib/fiber_stream/source.rb +184 -0
  34. data/lib/fiber_stream/version.rb +5 -0
  35. data/lib/fiber_stream.rb +15 -0
  36. data/sig/fiber_stream.rbs +97 -0
  37. metadata +154 -0
@@ -0,0 +1,500 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Ordered Ractor-backed worker boundary for `Flow.ractor_map`.
6
+ #
7
+ # Upstream is pulled by the downstream caller, while blocking waits for
8
+ # Ractor worker messages are isolated in a coordinator thread. The boundary
9
+ # admits work only when a worker is ready and the pulled-but-unemitted count
10
+ # is below `workers`, preserving bounded backpressure and ordered output.
11
+ class RactorMapBoundary
12
+ TERMINAL_RESULT_CAPACITY = 1
13
+ READY_WAIT_INTERVAL = 0.001
14
+
15
+ def initialize(upstream, workers, input_transfer, output_transfer, transform)
16
+ @upstream = upstream
17
+ @workers_count = workers
18
+ @input_transfer = input_transfer
19
+ @output_transfer = output_transfer
20
+ @transform = transform
21
+ @result_port = nil
22
+ @ready_workers = Thread::SizedQueue.new(workers)
23
+ @results = Thread::SizedQueue.new(workers + TERMINAL_RESULT_CAPACITY)
24
+ @workers = []
25
+ @active_sequences = {}
26
+ @worker_state_mutex = Mutex.new
27
+ @coordinator = nil
28
+ @pending = {}
29
+ @next_sequence = 0
30
+ @next_emit_sequence = 0
31
+ @in_flight = 0
32
+ @failure_sequence = nil
33
+ @started = false
34
+ @closed = false
35
+ @done = false
36
+ @admission_closed = false
37
+ @worker_shutdown_sent = false
38
+ @upstream_closed = false
39
+ @upstream_close_error = nil
40
+ end
41
+
42
+ def next
43
+ return DONE if @closed || @done
44
+
45
+ start
46
+ next_message
47
+ end
48
+
49
+ def close
50
+ return if @closed
51
+
52
+ @closed = true
53
+ @done = true
54
+ close_error = close_upstream
55
+ close_admission(close_upstream: false)
56
+ request_worker_shutdown
57
+ wait_for_workers
58
+ close_error ||= @upstream_close_error
59
+ raise close_error if close_error
60
+ end
61
+
62
+ private
63
+
64
+ def start
65
+ return if @started
66
+
67
+ @started = true
68
+ @result_port = Ractor::Port.new
69
+ @workers_count.times do |worker_id|
70
+ @workers << self.class.__send__(
71
+ :spawn_worker,
72
+ worker_id,
73
+ @result_port,
74
+ @transform,
75
+ @output_transfer
76
+ )
77
+ end
78
+ @coordinator = Thread.new { run_coordinator }
79
+ end
80
+
81
+ def next_message
82
+ loop do
83
+ fill_capacity
84
+
85
+ ready = @pending.delete(@next_emit_sequence)
86
+ if ready
87
+ drain_available_results
88
+ return emit(ready)
89
+ end
90
+
91
+ message = @results.pop
92
+ return complete if message.nil?
93
+
94
+ record_result(message)
95
+ end
96
+ end
97
+
98
+ def fill_capacity
99
+ return if @admission_closed
100
+
101
+ while @in_flight < @workers_count
102
+ worker = take_ready_worker(block: @in_flight.zero?)
103
+ break unless worker
104
+
105
+ message = pull_job_message
106
+ if message.fetch(0) == :job
107
+ @in_flight += 1
108
+ break unless deliver_job(worker, message)
109
+ else
110
+ close_admission(close_upstream: false)
111
+ record_result(message)
112
+ break
113
+ end
114
+ end
115
+ end
116
+
117
+ def pull_job_message
118
+ value = @upstream.next
119
+ return terminal_done_message if Pull.done?(value)
120
+
121
+ sequence = @next_sequence
122
+ @next_sequence += 1
123
+ [:job, sequence, value]
124
+ rescue StandardError => error
125
+ close_upstream(record_error: false)
126
+ [:error, @next_sequence, error]
127
+ end
128
+
129
+ def terminal_done_message
130
+ close_error = close_upstream
131
+ close_error ? [:error, @next_sequence, close_error] : [:done, @next_sequence]
132
+ end
133
+
134
+ def deliver_job(worker, message)
135
+ sequence = message.fetch(1)
136
+ track_worker_job(worker, sequence)
137
+
138
+ if @input_transfer == :move
139
+ worker.send(message, move: true)
140
+ else
141
+ worker.send(message)
142
+ end
143
+ true
144
+ rescue StandardError => error
145
+ clear_worker_job(worker)
146
+ sequence = message.fetch(1)
147
+ record_result([:error, sequence, build_ractor_map_error(sequence, :input_transfer, error)])
148
+ false
149
+ end
150
+
151
+ def take_ready_worker(block:)
152
+ if block
153
+ loop do
154
+ worker = @ready_workers.pop
155
+ return worker if worker || @closed || @admission_closed || @ready_workers.closed?
156
+ end
157
+ else
158
+ @ready_workers.pop(true)
159
+ end
160
+ rescue ThreadError, ClosedQueueError
161
+ nil
162
+ end
163
+
164
+ def emit(message)
165
+ case message.fetch(0)
166
+ when :value
167
+ emit_value(message)
168
+ when :done
169
+ complete
170
+ when :error
171
+ fail_with_ordered_error(message)
172
+ end
173
+ end
174
+
175
+ def emit_value(message)
176
+ sequence = message.fetch(1)
177
+ value = message.fetch(2)
178
+ @next_emit_sequence = sequence + 1
179
+ @in_flight -= 1 if @in_flight.positive?
180
+ value
181
+ end
182
+
183
+ def fail_with_ordered_error(message)
184
+ sequence = message.fetch(1)
185
+ error = message.fetch(2)
186
+
187
+ if @failure_sequence && sequence > @failure_sequence
188
+ @next_emit_sequence = sequence + 1
189
+ @in_flight -= 1 if @in_flight.positive?
190
+ return next_message
191
+ end
192
+
193
+ @done = true
194
+ close_admission
195
+ request_worker_shutdown
196
+ if error.is_a?(RactorMapError) && error.original_cause
197
+ raise error, cause: error.original_cause
198
+ end
199
+
200
+ raise error
201
+ end
202
+
203
+ def complete
204
+ @done = true
205
+ request_worker_shutdown
206
+ DONE
207
+ end
208
+
209
+ def record_result(message)
210
+ if message.fetch(0) == :error
211
+ sequence = message.fetch(1)
212
+ @failure_sequence = sequence if @failure_sequence.nil? || sequence < @failure_sequence
213
+ close_admission
214
+ request_worker_shutdown
215
+ end
216
+
217
+ @pending[message.fetch(1)] = message
218
+ end
219
+
220
+ def drain_available_results
221
+ loop do
222
+ message = @results.pop(true)
223
+ break if message.nil?
224
+
225
+ record_result(message)
226
+ rescue ThreadError
227
+ break
228
+ end
229
+ end
230
+
231
+ def run_coordinator
232
+ stopped = 0
233
+ live_workers = @workers.dup
234
+
235
+ until stopped == @workers_count
236
+ selected, message = select_worker_message(live_workers)
237
+ if selected == :worker_remote_error
238
+ stopped += 1
239
+ elsif selected == @result_port
240
+ stopped += handle_worker_message(message, live_workers)
241
+ else
242
+ live_workers.delete(selected)
243
+ handle_worker_termination(selected)
244
+ stopped += 1
245
+ end
246
+ end
247
+ ensure
248
+ close_ready_queue
249
+ close_result_queue if @closed
250
+ end
251
+
252
+ def select_worker_message(live_workers)
253
+ Ractor.select(@result_port, *live_workers)
254
+ rescue Ractor::RemoteError => error
255
+ worker = remote_error_worker(error, live_workers) || failed_worker_for_remote_error(live_workers)
256
+ live_workers.delete(worker) if worker
257
+ handle_worker_remote_error(worker, error)
258
+ [:worker_remote_error, nil]
259
+ end
260
+
261
+ def remote_error_worker(error, live_workers)
262
+ return unless error.respond_to?(:ractor)
263
+
264
+ worker = error.ractor
265
+ live_workers.include?(worker) ? worker : nil
266
+ end
267
+
268
+ def failed_worker_for_remote_error(live_workers)
269
+ @worker_state_mutex.synchronize do
270
+ live_workers
271
+ .select { |worker| @active_sequences.key?(worker) }
272
+ .min_by { |worker| @active_sequences.fetch(worker) }
273
+ end || live_workers.first
274
+ end
275
+
276
+ def handle_worker_remote_error(worker, error)
277
+ sequence = worker ? clear_worker_job(worker) : nil
278
+ sequence ||= @next_sequence
279
+ return if @closed || @worker_shutdown_sent
280
+
281
+ deliver_worker_termination_error(worker, sequence, cause: error)
282
+ end
283
+
284
+ def handle_worker_message(message, live_workers)
285
+ case message.fetch(0)
286
+ when :ready
287
+ deliver_ready_worker(message.fetch(1))
288
+ 0
289
+ when :value
290
+ handle_worker_value_message(message)
291
+ 0
292
+ when :error
293
+ handle_worker_error_message(message)
294
+ 0
295
+ when :stopped
296
+ handle_worker_stopped_message(message, live_workers)
297
+ end
298
+ end
299
+
300
+ def handle_worker_value_message(message)
301
+ worker = worker_for_id(message.fetch(1))
302
+ sequence = message.fetch(2)
303
+ value = message.fetch(3)
304
+
305
+ clear_worker_job(worker)
306
+ deliver_result([:value, sequence, value])
307
+ end
308
+
309
+ def handle_worker_error_message(message)
310
+ worker = worker_for_id(message.fetch(1))
311
+
312
+ clear_worker_job(worker)
313
+ deliver_result(normalize_worker_error_message(message))
314
+ end
315
+
316
+ def handle_worker_stopped_message(message, live_workers)
317
+ worker = worker_for_id(message.fetch(1))
318
+ live_workers.delete(worker)
319
+ sequence = clear_worker_job(worker)
320
+ deliver_worker_termination_error(worker, sequence) if sequence && !@closed && !@worker_shutdown_sent
321
+ 1
322
+ end
323
+
324
+ def handle_worker_termination(worker)
325
+ sequence = clear_worker_job(worker) || @next_sequence
326
+ return if @closed || @worker_shutdown_sent
327
+
328
+ deliver_worker_termination_error(worker, sequence)
329
+ end
330
+
331
+ def deliver_worker_termination_error(worker, sequence, cause: nil)
332
+ close_ready_queue
333
+ error =
334
+ RactorMapError.new(
335
+ sequence: sequence,
336
+ kind: :worker_termination,
337
+ cause_class_name: cause&.class&.name || worker.class.name,
338
+ cause_message: cause&.message || "worker terminated without a lifecycle message",
339
+ cause: cause
340
+ )
341
+
342
+ deliver_result([:error, sequence, error])
343
+ end
344
+
345
+ def deliver_ready_worker(worker_id)
346
+ return if @closed
347
+
348
+ push_until_delivered_or_closed(@ready_workers, worker_for_id(worker_id), suppress_data: false)
349
+ end
350
+
351
+ def deliver_result(message)
352
+ return if @closed
353
+
354
+ push_until_delivered_or_closed(@results, message, suppress_data: true)
355
+ end
356
+
357
+ def push_until_delivered_or_closed(queue, message, suppress_data:)
358
+ loop do
359
+ return if @closed && suppress_data
360
+ return if @closed && !suppress_data
361
+
362
+ queue.push(message, true)
363
+ return
364
+ rescue ThreadError, ClosedQueueError
365
+ sleep READY_WAIT_INTERVAL
366
+ end
367
+ end
368
+
369
+ def normalize_worker_error_message(message)
370
+ sequence = message.fetch(2)
371
+ kind = message.fetch(3)
372
+ cause_class_name = message.fetch(4)
373
+ cause_message = message.fetch(5)
374
+ error =
375
+ RactorMapError.new(
376
+ sequence: sequence,
377
+ kind: kind,
378
+ cause_class_name: cause_class_name,
379
+ cause_message: cause_message
380
+ )
381
+
382
+ [:error, sequence, error]
383
+ end
384
+
385
+ def worker_for_id(worker_id)
386
+ @workers.fetch(worker_id)
387
+ end
388
+
389
+ def track_worker_job(worker, sequence)
390
+ @worker_state_mutex.synchronize do
391
+ @active_sequences[worker] = sequence
392
+ end
393
+ end
394
+
395
+ def clear_worker_job(worker)
396
+ @worker_state_mutex.synchronize do
397
+ @active_sequences.delete(worker)
398
+ end
399
+ end
400
+
401
+ def close_admission(close_upstream: true)
402
+ return if @admission_closed
403
+
404
+ @admission_closed = true
405
+ close_upstream(record_error: false) if close_upstream
406
+ end
407
+
408
+ def request_worker_shutdown
409
+ return unless @started
410
+ return if @worker_shutdown_sent
411
+
412
+ @worker_shutdown_sent = true
413
+ @workers.each do |worker|
414
+ worker.send([:shutdown])
415
+ rescue StandardError
416
+ nil
417
+ end
418
+ end
419
+
420
+ def wait_for_workers
421
+ return unless @coordinator
422
+
423
+ sleep READY_WAIT_INTERVAL while @coordinator.alive?
424
+ @coordinator.join
425
+ end
426
+
427
+ def close_upstream(record_error: true)
428
+ return nil if @upstream_closed
429
+
430
+ @upstream_closed = true
431
+ @upstream.close
432
+ nil
433
+ rescue StandardError => error
434
+ @upstream_close_error ||= error if record_error
435
+ error
436
+ end
437
+
438
+ def close_ready_queue
439
+ @ready_workers.close
440
+ end
441
+
442
+ def close_result_queue
443
+ @results.close
444
+ end
445
+
446
+ def build_ractor_map_error(sequence, kind, error)
447
+ RactorMapError.new(
448
+ sequence: sequence,
449
+ kind: kind,
450
+ cause_class_name: error.class.name,
451
+ cause_message: error.message,
452
+ cause: error
453
+ )
454
+ end
455
+
456
+ def self.spawn_worker(worker_id, result_port, transform, output_transfer)
457
+ Ractor.new(worker_id, result_port, transform, output_transfer) do |id, port, mapper, transfer|
458
+ current_sequence = nil
459
+
460
+ begin
461
+ port.send([:ready, id])
462
+
463
+ loop do
464
+ message = Ractor.receive
465
+ break if message.fetch(0) == :shutdown
466
+
467
+ current_sequence = message.fetch(1)
468
+ value = message.fetch(2)
469
+ begin
470
+ mapped_value = mapper.call(value)
471
+ rescue Exception => error # rubocop:disable Lint/RescueException
472
+ port.send([:error, id, current_sequence, :worker, error.class.name, error.message])
473
+ else
474
+ begin
475
+ if transfer == :move
476
+ port.send([:value, id, current_sequence, mapped_value], move: true)
477
+ else
478
+ port.send([:value, id, current_sequence, mapped_value])
479
+ end
480
+ rescue Exception => error # rubocop:disable Lint/RescueException
481
+ port.send([:error, id, current_sequence, :output_transfer, error.class.name, error.message])
482
+ end
483
+ end
484
+
485
+ current_sequence = nil
486
+ port.send([:ready, id])
487
+ end
488
+ rescue Exception => error # rubocop:disable Lint/RescueException
489
+ sequence = current_sequence || -1
490
+ port.send([:error, id, sequence, :worker_termination, error.class.name, error.message])
491
+ ensure
492
+ port.send([:stopped, id])
493
+ end
494
+ end
495
+ end
496
+
497
+ private_class_method :spawn_worker
498
+ end
499
+ end
500
+ end