fiber_stream 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Scheduler-backed ready-order source merge.
6
+ #
7
+ # Each input source is materialized by a scheduled producer fiber on first
8
+ # downstream demand. Producers publish values, completion, and failures into
9
+ # a bounded mailbox; downstream emits values in mailbox arrival order while
10
+ # preserving each input's own order.
11
+ class Merge
12
+ SIDE_ORDER = [:left, :right].freeze
13
+ CancellationError = Class.new(StandardError)
14
+ ValueMessage = Data.define(:side, :value)
15
+ DoneMessage = Data.define(:side)
16
+ ErrorMessage = Data.define(:side, :error)
17
+ private_constant :ValueMessage, :DoneMessage, :ErrorMessage
18
+
19
+ def initialize(left_materializer, right_materializer)
20
+ @materializers = { left: left_materializer, right: right_materializer }
21
+ @streams = { left: nil, right: nil }
22
+ @stream_closed = { left: false, right: false }
23
+ @side_done = { left: false, right: false }
24
+ @producers = {}
25
+ @mailbox = nil
26
+ @started = false
27
+ @closed = false
28
+ @done = false
29
+ end
30
+
31
+ def next
32
+ return DONE if @closed || @done
33
+
34
+ start
35
+ next_message
36
+ end
37
+
38
+ def close
39
+ return if @closed
40
+
41
+ @closed = true
42
+ @done = true
43
+ close_error = close_materialized_streams
44
+ close_mailbox
45
+ raise close_error if close_error
46
+ ensure
47
+ cancel_producers
48
+ end
49
+
50
+ private
51
+
52
+ def start
53
+ return if @started
54
+
55
+ validate_scheduler!
56
+
57
+ @mailbox = MergeMailbox.new(1)
58
+ @started = true
59
+ SIDE_ORDER.each do |side|
60
+ @producers[side] = Fiber.schedule { run_producer(side) }
61
+ end
62
+ end
63
+
64
+ def next_message
65
+ loop do
66
+ message = @mailbox.pop
67
+ return complete if message.nil?
68
+
69
+ case message
70
+ in ValueMessage[value:]
71
+ return value
72
+ in DoneMessage[side:]
73
+ mark_side_done(side)
74
+ return complete if all_done?
75
+ in ErrorMessage[error:]
76
+ return fail_with(error)
77
+ end
78
+ end
79
+ rescue MergeMailbox::Closed
80
+ complete
81
+ end
82
+
83
+ def run_producer(side)
84
+ stream = materialize_side(side)
85
+
86
+ loop do
87
+ break if @closed
88
+
89
+ message = pull_message(side, stream)
90
+ break unless deliver(message)
91
+ break unless message.is_a?(ValueMessage)
92
+ end
93
+ rescue MergeMailbox::Closed, CancellationError
94
+ nil
95
+ rescue StandardError => error
96
+ close_side(side, record_error: false)
97
+ deliver(ErrorMessage.new(side:, error:)) unless @closed
98
+ end
99
+
100
+ def materialize_side(side)
101
+ stream = @materializers.fetch(side).call
102
+ @streams[side] = stream
103
+ close_side(side) if @closed
104
+ stream
105
+ end
106
+
107
+ def pull_message(side, stream)
108
+ value = stream.next
109
+ return terminal_done_message(side) if Pull.done?(value)
110
+
111
+ ValueMessage.new(side:, value:)
112
+ rescue StandardError => error
113
+ close_side(side, record_error: false)
114
+ ErrorMessage.new(side:, error:)
115
+ end
116
+
117
+ def terminal_done_message(side)
118
+ close_error = close_side(side)
119
+ close_error ? ErrorMessage.new(side:, error: close_error) : DoneMessage.new(side:)
120
+ end
121
+
122
+ def deliver(message)
123
+ @mailbox.push(message)
124
+ true
125
+ rescue MergeMailbox::Closed
126
+ false
127
+ end
128
+
129
+ def mark_side_done(side)
130
+ @side_done[side] = true
131
+ end
132
+
133
+ def all_done?
134
+ SIDE_ORDER.all? { |side| @side_done.fetch(side) }
135
+ end
136
+
137
+ def complete
138
+ @done = true
139
+ close_mailbox
140
+ DONE
141
+ end
142
+
143
+ def fail_with(error)
144
+ @done = true
145
+ close_mailbox
146
+ close_materialized_streams
147
+ cancel_producers
148
+ raise error
149
+ end
150
+
151
+ def close_materialized_streams
152
+ first_error = nil
153
+
154
+ SIDE_ORDER.each do |side|
155
+ close_error = close_side(side)
156
+ first_error ||= close_error
157
+ end
158
+
159
+ first_error
160
+ end
161
+
162
+ def close_side(side, record_error: true)
163
+ return nil if @stream_closed.fetch(side)
164
+
165
+ stream = @streams[side]
166
+ return nil unless stream
167
+
168
+ @stream_closed[side] = true
169
+ @streams[side] = nil
170
+ stream.close
171
+ nil
172
+ rescue StandardError => error
173
+ error if record_error
174
+ end
175
+
176
+ def close_mailbox
177
+ @mailbox&.close
178
+ end
179
+
180
+ def cancel_producers
181
+ scheduler = Fiber.scheduler
182
+ return unless scheduler.respond_to?(:fiber_interrupt)
183
+
184
+ @producers.each_value do |fiber|
185
+ next unless fiber&.alive?
186
+
187
+ scheduler.fiber_interrupt(fiber, CancellationError.new)
188
+ rescue StandardError
189
+ nil
190
+ end
191
+ end
192
+
193
+ def validate_scheduler!
194
+ return if Fiber.scheduler && !Fiber.current.blocking?
195
+
196
+ message =
197
+ if Fiber.scheduler
198
+ "Source.merge requires a non-blocking fiber"
199
+ else
200
+ "Source.merge requires Fiber.scheduler"
201
+ end
202
+ raise SchedulerRequiredError, message
203
+ end
204
+
205
+ class MergeMailbox
206
+ Closed = Class.new(StandardError)
207
+
208
+ def initialize(capacity)
209
+ @queue = Thread::SizedQueue.new(capacity)
210
+ end
211
+
212
+ def push(message)
213
+ @queue << message
214
+ rescue ClosedQueueError
215
+ raise Closed
216
+ end
217
+
218
+ def pop
219
+ @queue.pop
220
+ rescue ClosedQueueError
221
+ raise Closed
222
+ end
223
+
224
+ def close
225
+ @queue.close
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
@@ -11,6 +11,11 @@ module FiberStream
11
11
  class ParallelMapBoundary
12
12
  TERMINAL_RESULT_CAPACITY = 1
13
13
  CancellationError = Class.new(StandardError)
14
+ JobMessage = Data.define(:sequence, :value)
15
+ ValueMessage = Data.define(:sequence, :value)
16
+ DoneMessage = Data.define(:sequence)
17
+ ErrorMessage = Data.define(:sequence, :error)
18
+ private_constant :JobMessage, :ValueMessage, :DoneMessage, :ErrorMessage
14
19
 
15
20
  def initialize(upstream, concurrency, transform)
16
21
  @upstream = upstream
@@ -85,28 +90,23 @@ module FiberStream
85
90
  end
86
91
 
87
92
  def emit(message)
88
- case message.fetch(0)
89
- when :value
90
- emit_value(message)
91
- when :done
93
+ case message
94
+ in ValueMessage[sequence:, value:]
95
+ emit_value(sequence, value)
96
+ in DoneMessage
92
97
  complete
93
- when :error
94
- fail_with_ordered_error(message)
98
+ in ErrorMessage[sequence:, error:]
99
+ fail_with_ordered_error(sequence, error)
95
100
  end
96
101
  end
97
102
 
98
- def emit_value(message)
99
- sequence = message.fetch(1)
100
- value = message.fetch(2)
103
+ def emit_value(sequence, value)
101
104
  @next_emit_sequence = sequence + 1
102
105
  return_permit unless @admission_closed
103
106
  value
104
107
  end
105
108
 
106
- def fail_with_ordered_error(message)
107
- sequence = message.fetch(1)
108
- error = message.fetch(2)
109
-
109
+ def fail_with_ordered_error(sequence, error)
110
110
  if @failure_sequence && sequence > @failure_sequence
111
111
  @next_emit_sequence = sequence + 1
112
112
  return next_message
@@ -130,7 +130,7 @@ module FiberStream
130
130
  break unless take_permit
131
131
 
132
132
  message = pull_job_message
133
- if message.fetch(0) == :job
133
+ if message.is_a?(JobMessage)
134
134
  break unless deliver_job(message)
135
135
  else
136
136
  close_admission(close_upstream: false)
@@ -151,15 +151,19 @@ module FiberStream
151
151
 
152
152
  sequence = @next_sequence
153
153
  @next_sequence += 1
154
- [:job, sequence, value]
154
+ JobMessage.new(sequence:, value:)
155
155
  rescue StandardError => error
156
156
  close_upstream(record_error: false)
157
- [:error, @next_sequence, error]
157
+ ErrorMessage.new(sequence: @next_sequence, error:)
158
158
  end
159
159
 
160
160
  def terminal_done_message
161
161
  close_error = close_upstream
162
- close_error ? [:error, @next_sequence, close_error] : [:done, @next_sequence]
162
+ if close_error
163
+ ErrorMessage.new(sequence: @next_sequence, error: close_error)
164
+ else
165
+ DoneMessage.new(sequence: @next_sequence)
166
+ end
163
167
  end
164
168
 
165
169
  def run_worker
@@ -176,23 +180,23 @@ module FiberStream
176
180
  end
177
181
 
178
182
  def map_job(message)
179
- sequence = message.fetch(1)
180
- value = message.fetch(2)
181
- [:value, sequence, @transform.call(value)]
183
+ sequence = message.sequence
184
+ value = message.value
185
+ ValueMessage.new(sequence:, value: @transform.call(value))
182
186
  rescue CancellationError
183
187
  raise
184
188
  rescue StandardError => error
185
- [:error, sequence, error]
189
+ ErrorMessage.new(sequence:, error:)
186
190
  end
187
191
 
188
192
  def record_result(message)
189
- if message.fetch(0) == :error
190
- sequence = message.fetch(1)
193
+ if message.is_a?(ErrorMessage)
194
+ sequence = message.sequence
191
195
  @failure_sequence = sequence if @failure_sequence.nil? || sequence < @failure_sequence
192
196
  close_admission
193
197
  end
194
198
 
195
- @pending[message.fetch(1)] = message
199
+ @pending[message.sequence] = message
196
200
  end
197
201
 
198
202
  def drain_available_results
@@ -10,7 +10,18 @@ module FiberStream
10
10
  # is below `workers`, preserving bounded backpressure and ordered output.
11
11
  class RactorMapBoundary
12
12
  TERMINAL_RESULT_CAPACITY = 1
13
- READY_WAIT_INTERVAL = 0.001
13
+ Job = ::Data.define(:sequence, :value)
14
+ Shutdown = ::Data.define
15
+ Ready = ::Data.define(:worker_id)
16
+ WorkerValue = ::Data.define(:worker_id, :sequence, :value)
17
+ WorkerFailure = ::Data.define(:worker_id, :sequence, :kind, :cause_class_name, :cause_message)
18
+ Stopped = ::Data.define(:worker_id)
19
+ ResultValue = ::Data.define(:sequence, :value)
20
+ ResultDone = ::Data.define(:sequence)
21
+ ResultError = ::Data.define(:sequence, :error)
22
+
23
+ private_constant :Job, :Shutdown, :Ready, :WorkerValue, :WorkerFailure, :Stopped
24
+ private_constant :ResultValue, :ResultDone, :ResultError
14
25
 
15
26
  def initialize(upstream, workers, input_transfer, output_transfer, transform)
16
27
  @upstream = upstream
@@ -53,6 +64,8 @@ module FiberStream
53
64
  @done = true
54
65
  close_error = close_upstream
55
66
  close_admission(close_upstream: false)
67
+ close_ready_queue
68
+ close_result_queue
56
69
  request_worker_shutdown
57
70
  wait_for_workers
58
71
  close_error ||= @upstream_close_error
@@ -103,7 +116,7 @@ module FiberStream
103
116
  break unless worker
104
117
 
105
118
  message = pull_job_message
106
- if message.fetch(0) == :job
119
+ if message.is_a?(Job)
107
120
  @in_flight += 1
108
121
  break unless deliver_job(worker, message)
109
122
  else
@@ -120,19 +133,23 @@ module FiberStream
120
133
 
121
134
  sequence = @next_sequence
122
135
  @next_sequence += 1
123
- [:job, sequence, value]
136
+ Job.new(sequence, value)
124
137
  rescue StandardError => error
125
138
  close_upstream(record_error: false)
126
- [:error, @next_sequence, error]
139
+ ResultError.new(sequence: @next_sequence, error:)
127
140
  end
128
141
 
129
142
  def terminal_done_message
130
143
  close_error = close_upstream
131
- close_error ? [:error, @next_sequence, close_error] : [:done, @next_sequence]
144
+ if close_error
145
+ ResultError.new(sequence: @next_sequence, error: close_error)
146
+ else
147
+ ResultDone.new(sequence: @next_sequence)
148
+ end
132
149
  end
133
150
 
134
151
  def deliver_job(worker, message)
135
- sequence = message.fetch(1)
152
+ sequence = message.sequence
136
153
  track_worker_job(worker, sequence)
137
154
 
138
155
  if @input_transfer == :move
@@ -143,8 +160,7 @@ module FiberStream
143
160
  true
144
161
  rescue StandardError => error
145
162
  clear_worker_job(worker)
146
- sequence = message.fetch(1)
147
- record_result([:error, sequence, build_ractor_map_error(sequence, :input_transfer, error)])
163
+ record_result(ResultError.new(sequence:, error: build_ractor_map_error(sequence, :input_transfer, error)))
148
164
  false
149
165
  end
150
166
 
@@ -162,28 +178,23 @@ module FiberStream
162
178
  end
163
179
 
164
180
  def emit(message)
165
- case message.fetch(0)
166
- when :value
167
- emit_value(message)
168
- when :done
181
+ case message
182
+ in ResultValue[sequence:, value:]
183
+ emit_value(sequence, value)
184
+ in ResultDone
169
185
  complete
170
- when :error
171
- fail_with_ordered_error(message)
186
+ in ResultError[sequence:, error:]
187
+ fail_with_ordered_error(sequence, error)
172
188
  end
173
189
  end
174
190
 
175
- def emit_value(message)
176
- sequence = message.fetch(1)
177
- value = message.fetch(2)
191
+ def emit_value(sequence, value)
178
192
  @next_emit_sequence = sequence + 1
179
193
  @in_flight -= 1 if @in_flight.positive?
180
194
  value
181
195
  end
182
196
 
183
- def fail_with_ordered_error(message)
184
- sequence = message.fetch(1)
185
- error = message.fetch(2)
186
-
197
+ def fail_with_ordered_error(sequence, error)
187
198
  if @failure_sequence && sequence > @failure_sequence
188
199
  @next_emit_sequence = sequence + 1
189
200
  @in_flight -= 1 if @in_flight.positive?
@@ -207,14 +218,14 @@ module FiberStream
207
218
  end
208
219
 
209
220
  def record_result(message)
210
- if message.fetch(0) == :error
211
- sequence = message.fetch(1)
221
+ if message.is_a?(ResultError)
222
+ sequence = message.sequence
212
223
  @failure_sequence = sequence if @failure_sequence.nil? || sequence < @failure_sequence
213
224
  close_admission
214
225
  request_worker_shutdown
215
226
  end
216
227
 
217
- @pending[message.fetch(1)] = message
228
+ @pending[message.sequence] = message
218
229
  end
219
230
 
220
231
  def drain_available_results
@@ -282,39 +293,39 @@ module FiberStream
282
293
  end
283
294
 
284
295
  def handle_worker_message(message, live_workers)
285
- case message.fetch(0)
286
- when :ready
287
- deliver_ready_worker(message.fetch(1))
296
+ case message
297
+ in Ready[worker_id]
298
+ deliver_ready_worker(worker_id)
288
299
  0
289
- when :value
300
+ in WorkerValue
290
301
  handle_worker_value_message(message)
291
302
  0
292
- when :error
303
+ in WorkerFailure
293
304
  handle_worker_error_message(message)
294
305
  0
295
- when :stopped
306
+ in Stopped
296
307
  handle_worker_stopped_message(message, live_workers)
297
308
  end
298
309
  end
299
310
 
300
311
  def handle_worker_value_message(message)
301
- worker = worker_for_id(message.fetch(1))
302
- sequence = message.fetch(2)
303
- value = message.fetch(3)
312
+ worker = worker_for_id(message.worker_id)
313
+ sequence = message.sequence
314
+ value = message.value
304
315
 
305
316
  clear_worker_job(worker)
306
- deliver_result([:value, sequence, value])
317
+ deliver_result(ResultValue.new(sequence:, value:))
307
318
  end
308
319
 
309
320
  def handle_worker_error_message(message)
310
- worker = worker_for_id(message.fetch(1))
321
+ worker = worker_for_id(message.worker_id)
311
322
 
312
323
  clear_worker_job(worker)
313
324
  deliver_result(normalize_worker_error_message(message))
314
325
  end
315
326
 
316
327
  def handle_worker_stopped_message(message, live_workers)
317
- worker = worker_for_id(message.fetch(1))
328
+ worker = worker_for_id(message.worker_id)
318
329
  live_workers.delete(worker)
319
330
  sequence = clear_worker_job(worker)
320
331
  deliver_worker_termination_error(worker, sequence) if sequence && !@closed && !@worker_shutdown_sent
@@ -339,38 +350,34 @@ module FiberStream
339
350
  cause: cause
340
351
  )
341
352
 
342
- deliver_result([:error, sequence, error])
353
+ deliver_result(ResultError.new(sequence:, error:))
343
354
  end
344
355
 
345
356
  def deliver_ready_worker(worker_id)
346
357
  return if @closed
347
358
 
348
- push_until_delivered_or_closed(@ready_workers, worker_for_id(worker_id), suppress_data: false)
359
+ push_until_delivered_or_closed(@ready_workers, worker_for_id(worker_id))
349
360
  end
350
361
 
351
362
  def deliver_result(message)
352
363
  return if @closed
353
364
 
354
- push_until_delivered_or_closed(@results, message, suppress_data: true)
365
+ push_until_delivered_or_closed(@results, message)
355
366
  end
356
367
 
357
- def push_until_delivered_or_closed(queue, message, suppress_data:)
358
- loop do
359
- return if @closed && suppress_data
360
- return if @closed && !suppress_data
368
+ def push_until_delivered_or_closed(queue, message)
369
+ return if @closed
361
370
 
362
- queue.push(message, true)
363
- return
364
- rescue ThreadError, ClosedQueueError
365
- sleep READY_WAIT_INTERVAL
366
- end
371
+ queue.push(message)
372
+ rescue ThreadError, ClosedQueueError
373
+ nil
367
374
  end
368
375
 
369
376
  def normalize_worker_error_message(message)
370
- sequence = message.fetch(2)
371
- kind = message.fetch(3)
372
- cause_class_name = message.fetch(4)
373
- cause_message = message.fetch(5)
377
+ sequence = message.sequence
378
+ kind = message.kind
379
+ cause_class_name = message.cause_class_name
380
+ cause_message = message.cause_message
374
381
  error =
375
382
  RactorMapError.new(
376
383
  sequence: sequence,
@@ -379,7 +386,7 @@ module FiberStream
379
386
  cause_message: cause_message
380
387
  )
381
388
 
382
- [:error, sequence, error]
389
+ ResultError.new(sequence:, error:)
383
390
  end
384
391
 
385
392
  def worker_for_id(worker_id)
@@ -411,7 +418,7 @@ module FiberStream
411
418
 
412
419
  @worker_shutdown_sent = true
413
420
  @workers.each do |worker|
414
- worker.send([:shutdown])
421
+ worker.send(Shutdown.new)
415
422
  rescue StandardError
416
423
  nil
417
424
  end
@@ -420,7 +427,6 @@ module FiberStream
420
427
  def wait_for_workers
421
428
  return unless @coordinator
422
429
 
423
- sleep READY_WAIT_INTERVAL while @coordinator.alive?
424
430
  @coordinator.join
425
431
  end
426
432
 
@@ -456,40 +462,58 @@ module FiberStream
456
462
  def self.spawn_worker(worker_id, result_port, transform, output_transfer)
457
463
  Ractor.new(worker_id, result_port, transform, output_transfer) do |id, port, mapper, transfer|
458
464
  current_sequence = nil
465
+ send_control =
466
+ lambda do |message|
467
+ port.send(message)
468
+ true
469
+ rescue Exception # rubocop:disable Lint/RescueException
470
+ false
471
+ end
472
+ send_failure =
473
+ lambda do |sequence, kind, error|
474
+ send_control.call(WorkerFailure.new(id, sequence, kind, error.class.name, error.message))
475
+ rescue Exception # rubocop:disable Lint/RescueException
476
+ false
477
+ end
459
478
 
460
479
  begin
461
- port.send([:ready, id])
462
-
463
- loop do
464
- message = Ractor.receive
465
- break if message.fetch(0) == :shutdown
466
-
467
- current_sequence = message.fetch(1)
468
- value = message.fetch(2)
469
- begin
470
- mapped_value = mapper.call(value)
471
- rescue Exception => error # rubocop:disable Lint/RescueException
472
- port.send([:error, id, current_sequence, :worker, error.class.name, error.message])
473
- else
480
+ if send_control.call(Ready.new(id))
481
+ loop do
482
+ message = Ractor.receive
483
+ case message
484
+ in Shutdown
485
+ break
486
+ in Job[sequence, value]
487
+ current_sequence = sequence
488
+ else
489
+ raise TypeError, "invalid ractor_map worker message: #{message.class}"
490
+ end
491
+
474
492
  begin
475
- if transfer == :move
476
- port.send([:value, id, current_sequence, mapped_value], move: true)
477
- else
478
- port.send([:value, id, current_sequence, mapped_value])
479
- end
493
+ mapped_value = mapper.call(value)
480
494
  rescue Exception => error # rubocop:disable Lint/RescueException
481
- port.send([:error, id, current_sequence, :output_transfer, error.class.name, error.message])
495
+ break unless send_failure.call(current_sequence, :worker, error)
496
+ else
497
+ begin
498
+ if transfer == :move
499
+ port.send(WorkerValue.new(id, current_sequence, mapped_value), move: true)
500
+ else
501
+ port.send(WorkerValue.new(id, current_sequence, mapped_value))
502
+ end
503
+ rescue Exception => error # rubocop:disable Lint/RescueException
504
+ break unless send_failure.call(current_sequence, :output_transfer, error)
505
+ end
482
506
  end
483
- end
484
507
 
485
- current_sequence = nil
486
- port.send([:ready, id])
508
+ current_sequence = nil
509
+ break unless send_control.call(Ready.new(id))
510
+ end
487
511
  end
488
512
  rescue Exception => error # rubocop:disable Lint/RescueException
489
513
  sequence = current_sequence || -1
490
- port.send([:error, id, sequence, :worker_termination, error.class.name, error.message])
514
+ send_failure.call(sequence, :worker_termination, error)
491
515
  ensure
492
- port.send([:stopped, id])
516
+ send_control.call(Stopped.new(id))
493
517
  end
494
518
  end
495
519
  end