fiber_stream 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,311 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Unordered scheduler-backed worker boundary for
6
+ # `Flow.parallel_unordered_map`.
7
+ #
8
+ # A single dispatcher pulls upstream and a bounded worker pool maps values.
9
+ # Downstream emits worker results in completion order. Admission is
10
+ # permit-based to keep queued, running, and completed pulled-but-unemitted
11
+ # work bounded by the configured concurrency.
12
+ class ParallelUnorderedMapBoundary
13
+ TERMINAL_RESULT_CAPACITY = 1
14
+ CancellationError = Class.new(StandardError)
15
+ JobMessage = Data.define(:sequence, :value)
16
+ ValueMessage = Data.define(:sequence, :value)
17
+ DoneMessage = Data.define
18
+ ErrorMessage = Data.define(:sequence, :error)
19
+ CloseErrorMessage = Data.define(:error)
20
+ private_constant :JobMessage, :ValueMessage, :DoneMessage, :ErrorMessage, :CloseErrorMessage
21
+
22
+ def initialize(upstream, concurrency, transform)
23
+ @upstream = upstream
24
+ @concurrency = concurrency
25
+ @transform = transform
26
+ @permits = Thread::SizedQueue.new(concurrency)
27
+ @jobs = Thread::SizedQueue.new(concurrency)
28
+ @results = Thread::SizedQueue.new(concurrency + TERMINAL_RESULT_CAPACITY)
29
+ @workers = []
30
+ @dispatcher = nil
31
+ @next_sequence = 0
32
+ @outstanding_jobs = 0
33
+ @terminal_message = nil
34
+ @started = false
35
+ @closed = false
36
+ @done = false
37
+ @admission_closed = false
38
+ @upstream_closing = false
39
+ @upstream_closed = false
40
+ @upstream_close_error = nil
41
+ @upstream_close_done = Thread::SizedQueue.new(1)
42
+
43
+ concurrency.times { @permits << true }
44
+ end
45
+
46
+ def next
47
+ return DONE if @closed || @done
48
+
49
+ start
50
+ next_message
51
+ end
52
+
53
+ def close
54
+ return if @closed
55
+
56
+ @closed = true
57
+ @done = true
58
+ close_error = close_upstream
59
+ close_internal_queues
60
+ close_error ||= @upstream_close_error
61
+ raise close_error if close_error
62
+ ensure
63
+ cancel_fibers
64
+ end
65
+
66
+ private
67
+
68
+ def start
69
+ return if @started
70
+
71
+ validate_scheduler!
72
+
73
+ @started = true
74
+ @concurrency.times do
75
+ @workers << Fiber.schedule { run_worker }
76
+ end
77
+ @dispatcher = Fiber.schedule { run_dispatcher }
78
+ end
79
+
80
+ def next_message
81
+ loop do
82
+ return emit_terminal(@terminal_message) if terminal_ready?
83
+
84
+ message = @results.pop
85
+ return complete if message.nil?
86
+
87
+ case message
88
+ in ValueMessage[sequence:, value:]
89
+ return emit_value(sequence, value)
90
+ in DoneMessage | CloseErrorMessage
91
+ @terminal_message = message
92
+ in ErrorMessage[sequence:, error:]
93
+ fail_with_error(sequence, error)
94
+ end
95
+ end
96
+ end
97
+
98
+ def emit_value(_sequence, value)
99
+ @outstanding_jobs -= 1
100
+ return_permit unless @admission_closed
101
+ value
102
+ end
103
+
104
+ def terminal_ready?
105
+ @terminal_message && @outstanding_jobs.zero?
106
+ end
107
+
108
+ def emit_terminal(message)
109
+ case message
110
+ in DoneMessage
111
+ complete
112
+ in CloseErrorMessage[error:]
113
+ fail_with_error(@next_sequence, error, close_admission: false)
114
+ end
115
+ end
116
+
117
+ def fail_with_error(_sequence, error, close_admission: true)
118
+ @done = true
119
+ close_admission() if close_admission
120
+ close_result_queue
121
+ cancel_fibers
122
+ raise error
123
+ end
124
+
125
+ def complete
126
+ @done = true
127
+ close_result_queue
128
+ DONE
129
+ end
130
+
131
+ def run_dispatcher
132
+ loop do
133
+ break if @closed || @admission_closed
134
+ break unless take_permit
135
+
136
+ message = pull_job_message
137
+ if message.is_a?(JobMessage)
138
+ break unless deliver_job(message)
139
+ else
140
+ close_admission(close_upstream: false)
141
+ deliver_result(message)
142
+ break
143
+ end
144
+ end
145
+ rescue CancellationError
146
+ nil
147
+ ensure
148
+ close_upstream unless @upstream_closed || @closed
149
+ close_job_queue
150
+ end
151
+
152
+ def pull_job_message
153
+ value = @upstream.next
154
+ return terminal_done_message if Pull.done?(value)
155
+
156
+ sequence = @next_sequence
157
+ @next_sequence += 1
158
+ @outstanding_jobs += 1
159
+ JobMessage.new(sequence:, value:)
160
+ rescue StandardError => error
161
+ close_upstream(record_error: false)
162
+ ErrorMessage.new(sequence: @next_sequence, error:)
163
+ end
164
+
165
+ def terminal_done_message
166
+ close_error = close_upstream
167
+ if close_error
168
+ CloseErrorMessage.new(error: close_error)
169
+ else
170
+ DoneMessage.new
171
+ end
172
+ end
173
+
174
+ def run_worker
175
+ loop do
176
+ break if @closed
177
+
178
+ message = @jobs.pop
179
+ break if message.nil?
180
+
181
+ deliver_result(map_job(message))
182
+ end
183
+ rescue CancellationError
184
+ nil
185
+ end
186
+
187
+ def map_job(message)
188
+ sequence = message.sequence
189
+ value = message.value
190
+ ValueMessage.new(sequence:, value: @transform.call(value))
191
+ rescue CancellationError
192
+ raise
193
+ rescue StandardError => error
194
+ ErrorMessage.new(sequence:, error:)
195
+ end
196
+
197
+ def close_admission(close_upstream: true)
198
+ return if @admission_closed
199
+
200
+ @admission_closed = true
201
+ close_upstream(record_error: false) if close_upstream
202
+ close_permit_queue
203
+ close_job_queue
204
+ end
205
+
206
+ def take_permit
207
+ @permits.pop
208
+ rescue ClosedQueueError
209
+ nil
210
+ end
211
+
212
+ def return_permit
213
+ @permits << true
214
+ rescue ClosedQueueError
215
+ nil
216
+ end
217
+
218
+ def deliver_job(message)
219
+ @jobs << message
220
+ true
221
+ rescue ClosedQueueError
222
+ false
223
+ end
224
+
225
+ def deliver_result(message)
226
+ @results << message
227
+ true
228
+ rescue ClosedQueueError
229
+ false
230
+ end
231
+
232
+ def close_internal_queues
233
+ close_permit_queue
234
+ close_job_queue
235
+ close_result_queue
236
+ end
237
+
238
+ def close_permit_queue
239
+ @permits.close
240
+ end
241
+
242
+ def close_job_queue
243
+ @jobs.close
244
+ end
245
+
246
+ def close_result_queue
247
+ @results.close
248
+ end
249
+
250
+ def close_upstream(record_error: true)
251
+ return wait_for_upstream_close(record_error:) if @upstream_closing
252
+ return nil if @upstream_closed
253
+
254
+ @upstream_closing = true
255
+ @upstream.close
256
+ nil
257
+ rescue StandardError => error
258
+ @upstream_close_error ||= error if record_error
259
+ error
260
+ ensure
261
+ if @upstream_closing
262
+ @upstream_closed = true
263
+ @upstream_closing = false
264
+ signal_upstream_close_done
265
+ end
266
+ end
267
+
268
+ def wait_for_upstream_close(record_error:)
269
+ @upstream_close_done.pop
270
+ return @upstream_close_error if record_error
271
+
272
+ nil
273
+ rescue ClosedQueueError
274
+ record_error ? @upstream_close_error : nil
275
+ end
276
+
277
+ def signal_upstream_close_done
278
+ @upstream_close_done << true
279
+ rescue ClosedQueueError
280
+ nil
281
+ ensure
282
+ @upstream_close_done.close
283
+ end
284
+
285
+ def cancel_fibers
286
+ scheduler = Fiber.scheduler
287
+ return unless scheduler.respond_to?(:fiber_interrupt)
288
+
289
+ (@workers + [@dispatcher]).compact.each do |fiber|
290
+ next unless fiber.alive?
291
+
292
+ scheduler.fiber_interrupt(fiber, CancellationError.new)
293
+ rescue StandardError
294
+ nil
295
+ end
296
+ end
297
+
298
+ def validate_scheduler!
299
+ return if Fiber.scheduler && !Fiber.current.blocking?
300
+
301
+ message =
302
+ if Fiber.scheduler
303
+ "Flow.parallel_unordered_map requires a non-blocking fiber"
304
+ else
305
+ "Flow.parallel_unordered_map requires Fiber.scheduler"
306
+ end
307
+ raise SchedulerRequiredError, message
308
+ end
309
+ end
310
+ end
311
+ end
@@ -80,8 +80,7 @@ module FiberStream
80
80
  @started = true
81
81
  @result_port = Ractor::Port.new
82
82
  @workers_count.times do |worker_id|
83
- @workers << self.class.__send__(
84
- :spawn_worker,
83
+ @workers << self.class.spawn_worker(
85
84
  worker_id,
86
85
  @result_port,
87
86
  @transform,
@@ -459,66 +458,66 @@ module FiberStream
459
458
  )
460
459
  end
461
460
 
462
- def self.spawn_worker(worker_id, result_port, transform, output_transfer)
463
- Ractor.new(worker_id, result_port, transform, output_transfer) do |id, port, mapper, transfer|
464
- current_sequence = nil
465
- send_control =
466
- lambda do |message|
467
- port.send(message)
468
- true
469
- rescue Exception # rubocop:disable Lint/RescueException
470
- false
471
- end
472
- send_failure =
473
- lambda do |sequence, kind, error|
474
- send_control.call(WorkerFailure.new(id, sequence, kind, error.class.name, error.message))
475
- rescue Exception # rubocop:disable Lint/RescueException
476
- false
477
- end
461
+ class << self
462
+ def spawn_worker(worker_id, result_port, transform, output_transfer) # :nodoc:
463
+ Ractor.new(worker_id, result_port, transform, output_transfer) do |id, port, mapper, transfer|
464
+ current_sequence = nil
465
+ send_control =
466
+ lambda do |message|
467
+ port.send(message)
468
+ true
469
+ rescue Exception # rubocop:disable Lint/RescueException
470
+ false
471
+ end
472
+ send_failure =
473
+ lambda do |sequence, kind, error|
474
+ send_control.call(WorkerFailure.new(id, sequence, kind, error.class.name, error.message))
475
+ rescue Exception # rubocop:disable Lint/RescueException
476
+ false
477
+ end
478
478
 
479
- begin
480
- if send_control.call(Ready.new(id))
481
- loop do
482
- message = Ractor.receive
483
- case message
484
- in Shutdown
485
- break
486
- in Job[sequence, value]
487
- current_sequence = sequence
488
- else
489
- raise TypeError, "invalid ractor_map worker message: #{message.class}"
490
- end
479
+ begin
480
+ if send_control.call(Ready.new(id))
481
+ loop do
482
+ message = Ractor.receive
483
+ case message
484
+ in Shutdown
485
+ break
486
+ in Job[sequence, value]
487
+ current_sequence = sequence
488
+ else
489
+ raise TypeError, "invalid ractor_map worker message: #{message.class}"
490
+ end
491
491
 
492
- begin
493
- mapped_value = mapper.call(value)
494
- rescue Exception => error # rubocop:disable Lint/RescueException
495
- break unless send_failure.call(current_sequence, :worker, error)
496
- else
497
492
  begin
498
- if transfer == :move
499
- port.send(WorkerValue.new(id, current_sequence, mapped_value), move: true)
500
- else
501
- port.send(WorkerValue.new(id, current_sequence, mapped_value))
502
- end
493
+ mapped_value = mapper.call(value)
503
494
  rescue Exception => error # rubocop:disable Lint/RescueException
504
- break unless send_failure.call(current_sequence, :output_transfer, error)
495
+ break unless send_failure.call(current_sequence, :worker, error)
496
+ else
497
+ begin
498
+ if transfer == :move
499
+ port.send(WorkerValue.new(id, current_sequence, mapped_value), move: true)
500
+ else
501
+ port.send(WorkerValue.new(id, current_sequence, mapped_value))
502
+ end
503
+ rescue Exception => error # rubocop:disable Lint/RescueException
504
+ break unless send_failure.call(current_sequence, :output_transfer, error)
505
+ end
505
506
  end
506
- end
507
507
 
508
- current_sequence = nil
509
- break unless send_control.call(Ready.new(id))
508
+ current_sequence = nil
509
+ break unless send_control.call(Ready.new(id))
510
+ end
510
511
  end
512
+ rescue Exception => error # rubocop:disable Lint/RescueException
513
+ sequence = current_sequence || -1
514
+ send_failure.call(sequence, :worker_termination, error)
515
+ ensure
516
+ send_control.call(Stopped.new(id))
511
517
  end
512
- rescue Exception => error # rubocop:disable Lint/RescueException
513
- sequence = current_sequence || -1
514
- send_failure.call(sequence, :worker_termination, error)
515
- ensure
516
- send_control.call(Stopped.new(id))
517
518
  end
518
519
  end
519
520
  end
520
-
521
- private_class_method :spawn_worker
522
521
  end
523
522
  end
524
523
  end
@@ -9,7 +9,7 @@ module FiberStream
9
9
  # one outstanding ack, and downstream demand replenishes only the producer
10
10
  # that emitted the previous value.
11
11
  class RactorMergePortsSource
12
- PortPair = Data.define(:side, :port, :ack_port)
12
+ PortPair = Data.define(:side, :port, :ack_port, :producer_ractor)
13
13
  StartCommand = Data.define
14
14
  RequestAckCommand = Data.define(:side)
15
15
  ShutdownCommand = Data.define
@@ -21,7 +21,8 @@ module FiberStream
21
21
 
22
22
  def initialize(port_pairs, ack_transfer, cancel)
23
23
  @pairs = port_pairs.each_with_index.map do |pair, side|
24
- PortPair.new(side:, port: pair.port, ack_port: pair.ack_port)
24
+ producer_ractor = pair.respond_to?(:producer_ractor) ? pair.producer_ractor : nil
25
+ PortPair.new(side:, port: pair.port, ack_port: pair.ack_port, producer_ractor:)
25
26
  end.freeze
26
27
  @ack_transfer = ack_transfer
27
28
  @cancel_enabled = cancel
@@ -151,14 +152,28 @@ module FiberStream
151
152
  def run_coordinator
152
153
  outstanding_ack = @pairs.to_h { |pair| [pair.side, false] }
153
154
  active_ports = @pairs.map(&:port)
155
+ active_ractors = @pairs.filter_map(&:producer_ractor)
154
156
  pair_by_port = @pairs.to_h { |pair| [pair.port, pair] }
157
+ pair_by_ractor =
158
+ @pairs.each_with_object({}) do |pair, pairs|
159
+ pairs[pair.producer_ractor] = pair if pair.producer_ractor
160
+ end
155
161
 
156
162
  loop do
157
163
  break if active_ports.empty?
158
164
 
159
- selected, message = Ractor.select(@control_port, *active_ports)
165
+ selected, message = Ractor.select(@control_port, *active_ports, *active_ractors)
160
166
  if selected == @control_port
161
167
  break if handle_control_message(message, outstanding_ack)
168
+ elsif pair_by_ractor.key?(selected)
169
+ pair = pair_by_ractor.fetch(selected)
170
+ active_ractors.delete(selected)
171
+ case message
172
+ in ProducerTerminal | ProducerCancelled
173
+ next
174
+ else
175
+ deliver_result(ErrorResult.new(side: pair.side, error: Pull.ractor_producer_termination_error(message)))
176
+ end
162
177
  else
163
178
  pair = pair_by_port.fetch(selected)
164
179
  outstanding_ack[pair.side] = false
@@ -12,13 +12,18 @@ module FiberStream
12
12
  ProtocolMessage = Data.define(:message)
13
13
  ErrorMessage = Data.define(:error)
14
14
  ClosedMessage = Data.define
15
+ SelectedProtocol = Data.define(:message)
16
+ SelectedShutdown = Data.define
17
+ SelectedProducerTerminated = Data.define(:result)
15
18
  private_constant :ProtocolMessage, :ErrorMessage, :ClosedMessage
19
+ private_constant :SelectedProtocol, :SelectedShutdown, :SelectedProducerTerminated
16
20
 
17
- def initialize(port, ack_port, ack_transfer, cancel)
21
+ def initialize(port, ack_port, ack_transfer, cancel, producer_ractor = nil)
18
22
  @port = port
19
23
  @ack_port = ack_port
20
24
  @ack_transfer = ack_transfer
21
25
  @cancel_enabled = cancel
26
+ @producer_ractor = producer_ractor
22
27
  @demands = Thread::SizedQueue.new(1)
23
28
  @results = Thread::SizedQueue.new(1)
24
29
  @shutdown_port = nil
@@ -28,6 +33,7 @@ module FiberStream
28
33
  @closed = false
29
34
  @done = false
30
35
  @producer_terminal = false
36
+ @producer_ractor_terminal = false
31
37
  @cancel_sent = false
32
38
  end
33
39
 
@@ -156,10 +162,15 @@ module FiberStream
156
162
  break
157
163
  end
158
164
 
159
- selected, message = select_message
160
- break if selected == @shutdown_port || closed?
161
-
162
- deliver_result(ProtocolMessage.new(message:))
165
+ case select_message
166
+ in SelectedShutdown
167
+ break
168
+ in SelectedProducerTerminated[result:]
169
+ deliver_result(ErrorMessage.new(error: Pull.ractor_producer_termination_error(result)))
170
+ break
171
+ in SelectedProtocol[message:]
172
+ deliver_result(ProtocolMessage.new(message:))
173
+ end
163
174
  end
164
175
  rescue StandardError => error
165
176
  deliver_result(ErrorMessage.new(error: build_error(:receive, error)))
@@ -168,7 +179,29 @@ module FiberStream
168
179
  end
169
180
 
170
181
  def select_message
171
- Ractor.select(@port, @shutdown_port)
182
+ loop do
183
+ selected, message =
184
+ if @producer_ractor && !@producer_ractor_terminal
185
+ Ractor.select(@port, @shutdown_port, @producer_ractor)
186
+ else
187
+ Ractor.select(@port, @shutdown_port)
188
+ end
189
+
190
+ if selected == @producer_ractor
191
+ @producer_ractor_terminal = true
192
+ case message
193
+ in ProducerTerminal | ProducerCancelled
194
+ next
195
+ else
196
+ return SelectedProducerTerminated.new(result: message)
197
+ end
198
+
199
+ end
200
+
201
+ return SelectedShutdown.new if selected == @shutdown_port || closed?
202
+
203
+ return SelectedProtocol.new(message:)
204
+ end
172
205
  end
173
206
 
174
207
  def send_ack