fiber_stream 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,349 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Setup adapter for high-level owned Ractor producer sources.
6
+ #
7
+ # Producer ractors and ports are created on first demand. Once every
8
+ # producer has returned its producer-owned ack port, this adapter delegates
9
+ # demand to the existing low-level Ractor port pull sources.
10
+ class RactorProducerSource
11
+ StartedProducer = Data.define(:side, :data_port, :setup_port, :ractor, :definition)
12
+ ReadyProducer = Data.define(:side, :data_port, :ack_port, :ractor)
13
+ PortPair = Data.define(:port, :ack_port, :producer_ractor)
14
+ SetupSuccess = Data.define(:producers)
15
+ SetupError = Data.define(:error)
16
+ SetupClosed = Data.define
17
+ private_constant :StartedProducer, :ReadyProducer, :PortPair, :SetupSuccess, :SetupError, :SetupClosed
18
+
19
+ def initialize(definitions, ack_transfer, merge)
20
+ @definitions = definitions
21
+ @ack_transfer = ack_transfer
22
+ @merge = merge
23
+ @setup_results = Thread::SizedQueue.new(1)
24
+ @state_mutex = Mutex.new
25
+ @started = false
26
+ @closed = false
27
+ @done = false
28
+ @shutdown_port = nil
29
+ @setup_thread = nil
30
+ @delegate = nil
31
+ @started_producers = []
32
+ @ready_producers = []
33
+ end
34
+
35
+ def next
36
+ return DONE if closed_or_done?
37
+
38
+ start
39
+ return DONE unless ensure_delegate
40
+
41
+ value = @delegate.next
42
+ mark_done if Pull.done?(value)
43
+ value
44
+ end
45
+
46
+ def close
47
+ already_closed = mark_closed
48
+ return if already_closed
49
+
50
+ wake_setup
51
+ wait_for_setup
52
+ close_setup_queue
53
+ close_error = close_delegate
54
+ cancel_ready_producers
55
+ wait_for_ractors(@ready_producers.map(&:ractor))
56
+ raise close_error if close_error
57
+ end
58
+
59
+ private
60
+
61
+ def closed_or_done?
62
+ @state_mutex.synchronize { @closed || @done }
63
+ end
64
+
65
+ def mark_done
66
+ @state_mutex.synchronize { @done = true }
67
+ end
68
+
69
+ def mark_closed
70
+ @state_mutex.synchronize do
71
+ already_closed = @closed
72
+ @closed = true
73
+ @done = true
74
+ already_closed
75
+ end
76
+ end
77
+
78
+ def start
79
+ @state_mutex.synchronize do
80
+ return if @started
81
+
82
+ @started = true
83
+ @shutdown_port = Ractor::Port.new
84
+ spawn_producers
85
+ @setup_thread = Thread.new { run_setup }
86
+ end
87
+ rescue Exception => error # rubocop:disable Lint/RescueException
88
+ setup_error = build_error(:producer_setup, error)
89
+ @setup_thread = Thread.new { cleanup_after_start_failure(setup_error) }
90
+ end
91
+
92
+ def cleanup_after_start_failure(setup_error)
93
+ setup_ports = @started_producers.map(&:setup_port)
94
+ ractors = @started_producers.map(&:ractor)
95
+ producer_by_setup_port = @started_producers.to_h { |producer| [producer.setup_port, producer] }
96
+
97
+ cleanup_remaining_setup(setup_ports, ractors, producer_by_setup_port)
98
+ deliver_setup(SetupError.new(error: setup_error))
99
+ end
100
+
101
+ def spawn_producers
102
+ @started_producers = []
103
+
104
+ @definitions.each_with_index do |definition, side|
105
+ data_port = Ractor::Port.new
106
+ setup_port = Ractor::Port.new
107
+ ractor = self.class.spawn_producer(data_port, setup_port, definition)
108
+ @started_producers << StartedProducer.new(side:, data_port:, setup_port:, ractor:, definition:)
109
+ end
110
+ end
111
+
112
+ def run_setup
113
+ remaining_setup_ports = @started_producers.map(&:setup_port)
114
+ remaining_ractors = @started_producers.map(&:ractor)
115
+ producer_by_setup_port = @started_producers.to_h { |producer| [producer.setup_port, producer] }
116
+
117
+ until remaining_setup_ports.empty?
118
+ selected, message = Ractor.select(@shutdown_port, *remaining_setup_ports, *remaining_ractors)
119
+ if selected == @shutdown_port
120
+ deliver_setup(SetupClosed.new)
121
+ cleanup_remaining_setup(remaining_setup_ports, remaining_ractors, producer_by_setup_port)
122
+ return
123
+ elsif producer_by_setup_port.key?(selected)
124
+ producer = producer_by_setup_port.fetch(selected)
125
+ validate_ack_port!(message)
126
+ @ready_producers << ReadyProducer.new(
127
+ side: producer.side,
128
+ data_port: producer.data_port,
129
+ ack_port: message,
130
+ ractor: producer.ractor
131
+ )
132
+ remaining_setup_ports.delete(selected)
133
+ else
134
+ raise "producer exited before setup completed"
135
+ end
136
+ end
137
+
138
+ deliver_setup(SetupSuccess.new(producers: @ready_producers.sort_by(&:side).freeze))
139
+ rescue Exception => error # rubocop:disable Lint/RescueException
140
+ setup_error = build_error(:producer_setup, error)
141
+ cancel_ready_producers
142
+ cleanup_remaining_setup(remaining_setup_ports, remaining_ractors, producer_by_setup_port)
143
+ deliver_setup(SetupError.new(error: setup_error))
144
+ end
145
+
146
+ def cleanup_remaining_setup(remaining_setup_ports, remaining_ractors, producer_by_setup_port)
147
+ producer_by_ractor = @started_producers.to_h { |producer| [producer.ractor, producer] }
148
+
149
+ until remaining_setup_ports.empty?
150
+ selected, message = Ractor.select(*remaining_setup_ports, *remaining_ractors)
151
+ if producer_by_setup_port.key?(selected)
152
+ producer = producer_by_setup_port.fetch(selected)
153
+ validate_ack_port!(message)
154
+ ready = ReadyProducer.new(
155
+ side: producer.side,
156
+ data_port: producer.data_port,
157
+ ack_port: message,
158
+ ractor: producer.ractor
159
+ )
160
+ @ready_producers << ready
161
+ send_cancel(ready)
162
+ remaining_setup_ports.delete(selected)
163
+ else
164
+ producer = producer_by_ractor.fetch(selected)
165
+ remaining_setup_ports.delete(producer.setup_port)
166
+ remaining_ractors.delete(selected)
167
+ end
168
+ end
169
+
170
+ wait_for_ractors(@started_producers.map(&:ractor))
171
+ rescue Exception # rubocop:disable Lint/RescueException
172
+ nil
173
+ end
174
+
175
+ def validate_ack_port!(ack_port)
176
+ return if ack_port.respond_to?(:send) && ack_port.method(:send).owner != Kernel
177
+
178
+ raise TypeError, "producer setup did not return a Ractor-style ack port"
179
+ end
180
+
181
+ def ensure_delegate
182
+ return true if delegate_installed?
183
+
184
+ case setup_result
185
+ in SetupSuccess[producers:]
186
+ delegate = build_delegate(producers)
187
+ should_close_delegate = false
188
+ @state_mutex.synchronize do
189
+ @delegate = delegate
190
+ should_close_delegate = @closed
191
+ @delegate = nil if should_close_delegate
192
+ end
193
+ if should_close_delegate
194
+ close_delegate_suppressing(delegate)
195
+ return false
196
+ end
197
+
198
+ true
199
+ in SetupError[error:]
200
+ mark_done
201
+ raise_error(error)
202
+ in SetupClosed
203
+ mark_done
204
+ false
205
+ end
206
+ end
207
+
208
+ def setup_result
209
+ result = @setup_results.pop
210
+ result || SetupClosed.new
211
+ rescue ClosedQueueError
212
+ SetupClosed.new
213
+ end
214
+
215
+ def build_delegate(producers)
216
+ if @merge
217
+ Pull.ractor_merge_ports(
218
+ producers.map do |producer|
219
+ PortPair.new(port: producer.data_port, ack_port: producer.ack_port, producer_ractor: producer.ractor)
220
+ end,
221
+ @ack_transfer,
222
+ true
223
+ )
224
+ else
225
+ producer = producers.fetch(0)
226
+ Pull.ractor_port(producer.data_port, producer.ack_port, @ack_transfer, true, producer.ractor)
227
+ end
228
+ end
229
+
230
+ def cancel_ready_producers
231
+ @ready_producers.each do |producer|
232
+ send_cancel(producer)
233
+ rescue Exception # rubocop:disable Lint/RescueException
234
+ nil
235
+ end
236
+ end
237
+
238
+ def send_cancel(producer)
239
+ send_control(producer.ack_port, RactorPort::Cancel.new(:closed))
240
+ end
241
+
242
+ def send_control(port, message)
243
+ if @ack_transfer == :move
244
+ port.send(message, move: true)
245
+ else
246
+ port.send(message)
247
+ end
248
+ end
249
+
250
+ def close_delegate
251
+ delegate = @state_mutex.synchronize { @delegate }
252
+ delegate&.close
253
+ nil
254
+ rescue StandardError => error
255
+ error
256
+ end
257
+
258
+ def close_delegate_suppressing(delegate)
259
+ delegate.close
260
+ rescue StandardError
261
+ nil
262
+ end
263
+
264
+ def delegate_installed?
265
+ @state_mutex.synchronize { !@delegate.nil? }
266
+ end
267
+
268
+ def wake_setup
269
+ @shutdown_port&.send(:shutdown)
270
+ rescue Exception # rubocop:disable Lint/RescueException
271
+ nil
272
+ end
273
+
274
+ def wait_for_setup
275
+ @setup_thread&.join
276
+ end
277
+
278
+ def wait_for_ractors(ractors)
279
+ return if ractors.empty?
280
+
281
+ Thread.new do
282
+ ractors.each do |ractor|
283
+ ractor.value
284
+ rescue Exception # rubocop:disable Lint/RescueException
285
+ nil
286
+ end
287
+ end.join
288
+ end
289
+
290
+ def close_setup_queue
291
+ @setup_results.close
292
+ end
293
+
294
+ def deliver_setup(result)
295
+ @setup_results.push(result)
296
+ rescue ClosedQueueError, ThreadError
297
+ nil
298
+ end
299
+
300
+ def build_error(kind, error)
301
+ RactorPortSourceError.new(
302
+ kind: kind,
303
+ cause_class_name: error.class.name,
304
+ cause_message: error.message,
305
+ cause: error
306
+ )
307
+ end
308
+
309
+ def raise_error(error)
310
+ if error.is_a?(RactorPortSourceError) && error.original_cause
311
+ raise error, cause: error.original_cause
312
+ end
313
+
314
+ raise error
315
+ end
316
+
317
+ class << self
318
+ def spawn_producer(data_port, setup_port, definition) # :nodoc:
319
+ Ractor.new(
320
+ data_port,
321
+ setup_port,
322
+ definition.block,
323
+ definition.transfer,
324
+ definition.args
325
+ ) do |outbox, setup, block, transfer, args|
326
+ ack_port = Ractor::Port.new
327
+ setup.send(ack_port)
328
+ producer = RactorProducer.new(outbox, ack_port, transfer)
329
+
330
+ begin
331
+ block.call(producer, *args)
332
+ producer.complete unless producer.terminal? || producer.cancelled?
333
+ rescue Exception => error # rubocop:disable Lint/RescueException
334
+ producer.fail(error) unless producer.terminal? || producer.cancelled?
335
+ end
336
+
337
+ if producer.send_failed?
338
+ ProducerSendFailed.new
339
+ elsif producer.cancelled?
340
+ ProducerCancelled.new
341
+ else
342
+ ProducerTerminal.new
343
+ end
344
+ end
345
+ end
346
+ end
347
+ end
348
+ end
349
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Running-accumulator stage.
6
+ #
7
+ # It pulls one immediate upstream value for each downstream demand, updates
8
+ # the accumulator with the reducer, and emits the updated accumulator.
9
+ class Scan
10
+ def initialize(upstream, initial, reducer)
11
+ @upstream = upstream
12
+ @accumulator = initial
13
+ @reducer = reducer
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ value = @upstream.next
22
+ if Pull.done?(value)
23
+ @done = true
24
+ return DONE
25
+ end
26
+
27
+ @accumulator = @reducer.call(@accumulator, value)
28
+ end
29
+
30
+ def close
31
+ return if @closed
32
+
33
+ @closed = true
34
+ @upstream.close
35
+ end
36
+ end
37
+ end
38
+ end
@@ -11,11 +11,40 @@ module FiberStream
11
11
  # The sentinel must never escape through public APIs.
12
12
  module Pull
13
13
  DONE = Object.new.freeze
14
+ ProducerTerminal = Data.define
15
+ ProducerCancelled = Data.define
16
+ ProducerSendFailed = Data.define
17
+ private_constant :ProducerTerminal, :ProducerCancelled, :ProducerSendFailed
14
18
 
15
19
  def self.done?(value)
16
20
  value.equal?(DONE)
17
21
  end
18
22
 
23
+ def self.each_value(stream)
24
+ loop do
25
+ value = stream.next
26
+ break if done?(value)
27
+
28
+ yield value
29
+ end
30
+ end
31
+
32
+ def self.ractor_producer_termination_error(result)
33
+ cause_message =
34
+ case result
35
+ in ProducerSendFailed
36
+ "producer exited after failing to send the ack-permitted message"
37
+ else
38
+ "producer exited before sending the ack-permitted message"
39
+ end
40
+
41
+ RactorPortSourceError.new(
42
+ kind: :producer_failure,
43
+ cause_class_name: "RuntimeError",
44
+ cause_message: cause_message
45
+ )
46
+ end
47
+
19
48
  def self.each(enumerable)
20
49
  Each.new(enumerable)
21
50
  end
@@ -24,14 +53,22 @@ module FiberStream
24
53
  IOSource.new(io, chunk_size, close_io)
25
54
  end
26
55
 
27
- def self.ractor_port(port, ack_port, ack_transfer, cancel)
28
- RactorPortSource.new(port, ack_port, ack_transfer, cancel)
56
+ def self.ractor_port(port, ack_port, ack_transfer, cancel, producer_ractor = nil)
57
+ RactorPortSource.new(port, ack_port, ack_transfer, cancel, producer_ractor)
29
58
  end
30
59
 
31
60
  def self.ractor_merge_ports(port_pairs, ack_transfer, cancel)
32
61
  RactorMergePortsSource.new(port_pairs, ack_transfer, cancel)
33
62
  end
34
63
 
64
+ def self.ractor_producer(definitions, ack_transfer)
65
+ RactorProducerSource.new(definitions, ack_transfer, false)
66
+ end
67
+
68
+ def self.ractor_merge_producers(definitions, ack_transfer)
69
+ RactorProducerSource.new(definitions, ack_transfer, true)
70
+ end
71
+
35
72
  def self.concat(left_materializer, right_materializer)
36
73
  Concat.new(left_materializer, right_materializer)
37
74
  end
@@ -52,6 +89,10 @@ module FiberStream
52
89
  ParallelMapBoundary.new(upstream, concurrency, transform)
53
90
  end
54
91
 
92
+ def self.parallel_unordered_map(upstream, concurrency, transform)
93
+ ParallelUnorderedMapBoundary.new(upstream, concurrency, transform)
94
+ end
95
+
55
96
  def self.ractor_map(upstream, workers, input_transfer, output_transfer, transform)
56
97
  RactorMapBoundary.new(upstream, workers, input_transfer, output_transfer, transform)
57
98
  end
@@ -72,6 +113,10 @@ module FiberStream
72
113
  Grouped.new(upstream, count)
73
114
  end
74
115
 
116
+ def self.scan(upstream, initial, reducer)
117
+ Scan.new(upstream, initial, reducer)
118
+ end
119
+
75
120
  def self.take_while(upstream, predicate)
76
121
  TakeWhile.new(upstream, predicate)
77
122
  end
@@ -104,6 +149,7 @@ require_relative "pull/each"
104
149
  require_relative "pull/io_source"
105
150
  require_relative "pull/ractor_port_source"
106
151
  require_relative "pull/ractor_merge_ports_source"
152
+ require_relative "pull/ractor_producer_source"
107
153
  require_relative "pull/concat"
108
154
  require_relative "pull/zip"
109
155
  require_relative "pull/merge"
@@ -112,6 +158,7 @@ require_relative "pull/select"
112
158
  require_relative "pull/take"
113
159
  require_relative "pull/drop"
114
160
  require_relative "pull/grouped"
161
+ require_relative "pull/scan"
115
162
  require_relative "pull/take_while"
116
163
  require_relative "pull/drop_while"
117
164
  require_relative "pull/lines"
@@ -119,12 +166,14 @@ require_relative "pull/split"
119
166
  require_relative "pull/async_boundary"
120
167
  require_relative "pull/buffer_boundary"
121
168
  require_relative "pull/parallel_map_boundary"
169
+ require_relative "pull/parallel_unordered_map_boundary"
122
170
  require_relative "pull/ractor_map_boundary"
123
171
 
124
172
  module FiberStream
125
173
  module Pull
126
- private_constant :Each, :IOSource, :RactorPortSource, :RactorMergePortsSource, :Concat, :Zip, :Merge, :Map,
127
- :Select, :Take, :Drop, :Grouped, :TakeWhile, :DropWhile, :Lines, :Split, :AsyncBoundary,
128
- :BufferBoundary, :ParallelMapBoundary, :RactorMapBoundary
174
+ private_constant :Each, :IOSource, :RactorPortSource, :RactorMergePortsSource, :RactorProducerSource, :Concat,
175
+ :Zip, :Merge, :Map, :Select, :Take, :Drop, :Grouped, :Scan, :TakeWhile, :DropWhile, :Lines, :Split,
176
+ :AsyncBoundary, :BufferBoundary, :ParallelMapBoundary, :ParallelUnorderedMapBoundary,
177
+ :RactorMapBoundary
129
178
  end
130
179
  end
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ # Producer-side context for `Source.ractor_producer`.
5
+ #
6
+ # Producer blocks call `emit`, `complete`, or `fail` to send one protocol
7
+ # message after receiving one downstream acknowledgment. A `false` return
8
+ # means cooperative cancellation was observed before the requested message
9
+ # could be sent.
10
+ class RactorProducer
11
+ def initialize(data_port, ack_port, transfer)
12
+ @data_port = data_port
13
+ @ack_port = ack_port
14
+ @transfer = transfer
15
+ @terminal = false
16
+ @cancelled = false
17
+ @send_failed = false
18
+ end
19
+
20
+ def emit(value, transfer: nil)
21
+ return false if terminal? || cancelled?
22
+
23
+ message_transfer = validate_transfer_override!(transfer)
24
+ return false unless wait_for_ack
25
+
26
+ send_emitted_message(RactorPort::Element.new(value), message_transfer)
27
+ end
28
+
29
+ def complete
30
+ return false if terminal? || cancelled?
31
+ return false unless wait_for_ack
32
+
33
+ send_terminal_message(RactorPort::Complete.new)
34
+ end
35
+
36
+ def fail(error = nil, cause_class_name: nil, cause_message: nil)
37
+ return false if terminal? || cancelled?
38
+
39
+ failure = failure_message(error, cause_class_name, cause_message)
40
+ return false unless wait_for_ack
41
+
42
+ send_terminal_message(failure)
43
+ end
44
+
45
+ def cancelled?
46
+ @cancelled
47
+ end
48
+
49
+ def terminal? # :nodoc:
50
+ @terminal
51
+ end
52
+
53
+ def send_failed? # :nodoc:
54
+ @send_failed
55
+ end
56
+
57
+ private
58
+
59
+ def send_emitted_message(message, transfer)
60
+ send_data_message(message, transfer)
61
+ true
62
+ rescue Exception => error # rubocop:disable Lint/RescueException
63
+ report_same_ack_failure(error)
64
+ false
65
+ end
66
+
67
+ def send_terminal_message(message)
68
+ send_data_message(message, @transfer)
69
+ @terminal = true
70
+ true
71
+ rescue Exception => send_error # rubocop:disable Lint/RescueException
72
+ report_same_ack_failure(send_error)
73
+ false
74
+ end
75
+
76
+ def validate_transfer_override!(transfer)
77
+ return @transfer if transfer.nil?
78
+ return transfer if [:copy, :move].include?(transfer)
79
+
80
+ raise ArgumentError, "transfer must be :copy or :move"
81
+ end
82
+
83
+ def wait_for_ack
84
+ case @ack_port.receive
85
+ in RactorPort::Ack
86
+ true
87
+ in RactorPort::Cancel
88
+ @cancelled = true
89
+ false
90
+ else
91
+ raise TypeError, "invalid ractor producer control message"
92
+ end
93
+ end
94
+
95
+ def send_data_message(message, transfer)
96
+ if transfer == :move
97
+ @data_port.send(message, move: true)
98
+ else
99
+ @data_port.send(message)
100
+ end
101
+ end
102
+
103
+ def failure_message(error, cause_class_name, cause_message)
104
+ if error
105
+ return RactorPort::Failure.new(safe_class_name(error), safe_message(error))
106
+ end
107
+
108
+ unless cause_class_name.is_a?(String) && cause_message.is_a?(String)
109
+ raise ArgumentError, "fail requires an error or String failure metadata"
110
+ end
111
+
112
+ RactorPort::Failure.new(cause_class_name, cause_message)
113
+ end
114
+
115
+ def safe_class_name(error)
116
+ name = error.class.name
117
+ name.is_a?(String) && !name.empty? ? name : "Exception"
118
+ rescue Exception # rubocop:disable Lint/RescueException
119
+ "Exception"
120
+ end
121
+
122
+ def safe_message(error)
123
+ message = error.message
124
+ message.is_a?(String) ? message : ""
125
+ rescue Exception # rubocop:disable Lint/RescueException
126
+ ""
127
+ end
128
+
129
+ def report_same_ack_failure(error)
130
+ send_data_message(RactorPort::Failure.new(safe_class_name(error), safe_message(error)), :copy)
131
+ @terminal = true
132
+ rescue Exception # rubocop:disable Lint/RescueException
133
+ @terminal = true
134
+ @send_failed = true
135
+ end
136
+ end
137
+
138
+ # Builder passed to `Source.ractor_merge_producers`.
139
+ #
140
+ # Each `producer` call records one lazily started owned producer definition.
141
+ # Registration validates producer block isolation and transfer policy but
142
+ # does not create Ractor ports or start producer code.
143
+ class RactorProducerGroup
144
+ Definition = Data.define(:args, :transfer, :block)
145
+ private_constant :Definition
146
+
147
+ def initialize(default_transfer)
148
+ @default_transfer = default_transfer
149
+ @definitions = []
150
+ end
151
+
152
+ def producer(*args, transfer: nil, &block)
153
+ raise ArgumentError, "missing block" unless block
154
+ unless transfer.nil? || [:copy, :move].include?(transfer)
155
+ raise ArgumentError, "transfer must be :copy or :move"
156
+ end
157
+ raise TypeError, "block must be shareable" unless Ractor.shareable?(block)
158
+
159
+ @definitions << Definition.new(args:, transfer: transfer || @default_transfer, block:)
160
+ self
161
+ end
162
+
163
+ def definitions
164
+ @definitions.dup.freeze
165
+ end
166
+ end
167
+ end
@@ -7,6 +7,10 @@ module FiberStream
7
7
  CancelledMessage = Data.define(:error)
8
8
  private_constant :ValueMessage, :ErrorMessage, :CancelledMessage
9
9
 
10
+ def self.start(scheduler, &run) # :nodoc:
11
+ new(scheduler, &run)
12
+ end
13
+
10
14
  def initialize(scheduler, &run)
11
15
  @scheduler = scheduler
12
16
  @completion = nil