fiber_stream 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,349 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Setup adapter for high-level owned Ractor producer sources.
6
+ #
7
+ # Producer ractors and ports are created on first demand. Once every
8
+ # producer has returned its producer-owned ack port, this adapter delegates
9
+ # demand to the existing low-level Ractor port pull sources.
10
+ class RactorProducerSource
11
+ StartedProducer = Data.define(:side, :data_port, :setup_port, :ractor, :definition)
12
+ ReadyProducer = Data.define(:side, :data_port, :ack_port, :ractor)
13
+ PortPair = Data.define(:port, :ack_port, :producer_ractor)
14
+ SetupSuccess = Data.define(:producers)
15
+ SetupError = Data.define(:error)
16
+ SetupClosed = Data.define
17
+ private_constant :StartedProducer, :ReadyProducer, :PortPair, :SetupSuccess, :SetupError, :SetupClosed
18
+
19
+ def initialize(definitions, ack_transfer, merge)
20
+ @definitions = definitions
21
+ @ack_transfer = ack_transfer
22
+ @merge = merge
23
+ @setup_results = Thread::SizedQueue.new(1)
24
+ @state_mutex = Mutex.new
25
+ @started = false
26
+ @closed = false
27
+ @done = false
28
+ @shutdown_port = nil
29
+ @setup_thread = nil
30
+ @delegate = nil
31
+ @started_producers = []
32
+ @ready_producers = []
33
+ end
34
+
35
+ def next
36
+ return DONE if closed_or_done?
37
+
38
+ start
39
+ return DONE unless ensure_delegate
40
+
41
+ value = @delegate.next
42
+ mark_done if Pull.done?(value)
43
+ value
44
+ end
45
+
46
+ def close
47
+ already_closed = mark_closed
48
+ return if already_closed
49
+
50
+ wake_setup
51
+ wait_for_setup
52
+ close_setup_queue
53
+ close_error = close_delegate
54
+ cancel_ready_producers
55
+ wait_for_ractors(@ready_producers.map(&:ractor))
56
+ raise close_error if close_error
57
+ end
58
+
59
+ private
60
+
61
+ def closed_or_done?
62
+ @state_mutex.synchronize { @closed || @done }
63
+ end
64
+
65
+ def mark_done
66
+ @state_mutex.synchronize { @done = true }
67
+ end
68
+
69
+ def mark_closed
70
+ @state_mutex.synchronize do
71
+ already_closed = @closed
72
+ @closed = true
73
+ @done = true
74
+ already_closed
75
+ end
76
+ end
77
+
78
+ def start
79
+ @state_mutex.synchronize do
80
+ return if @started
81
+
82
+ @started = true
83
+ @shutdown_port = Ractor::Port.new
84
+ spawn_producers
85
+ @setup_thread = Thread.new { run_setup }
86
+ end
87
+ rescue Exception => error # rubocop:disable Lint/RescueException
88
+ setup_error = build_error(:producer_setup, error)
89
+ @setup_thread = Thread.new { cleanup_after_start_failure(setup_error) }
90
+ end
91
+
92
+ def cleanup_after_start_failure(setup_error)
93
+ setup_ports = @started_producers.map(&:setup_port)
94
+ ractors = @started_producers.map(&:ractor)
95
+ producer_by_setup_port = @started_producers.to_h { |producer| [producer.setup_port, producer] }
96
+
97
+ cleanup_remaining_setup(setup_ports, ractors, producer_by_setup_port)
98
+ deliver_setup(SetupError.new(error: setup_error))
99
+ end
100
+
101
+ def spawn_producers
102
+ @started_producers = []
103
+
104
+ @definitions.each_with_index do |definition, side|
105
+ data_port = Ractor::Port.new
106
+ setup_port = Ractor::Port.new
107
+ ractor = self.class.spawn_producer(data_port, setup_port, definition)
108
+ @started_producers << StartedProducer.new(side:, data_port:, setup_port:, ractor:, definition:)
109
+ end
110
+ end
111
+
112
+ def run_setup
113
+ remaining_setup_ports = @started_producers.map(&:setup_port)
114
+ remaining_ractors = @started_producers.map(&:ractor)
115
+ producer_by_setup_port = @started_producers.to_h { |producer| [producer.setup_port, producer] }
116
+
117
+ until remaining_setup_ports.empty?
118
+ selected, message = Ractor.select(@shutdown_port, *remaining_setup_ports, *remaining_ractors)
119
+ if selected == @shutdown_port
120
+ deliver_setup(SetupClosed.new)
121
+ cleanup_remaining_setup(remaining_setup_ports, remaining_ractors, producer_by_setup_port)
122
+ return
123
+ elsif producer_by_setup_port.key?(selected)
124
+ producer = producer_by_setup_port.fetch(selected)
125
+ validate_ack_port!(message)
126
+ @ready_producers << ReadyProducer.new(
127
+ side: producer.side,
128
+ data_port: producer.data_port,
129
+ ack_port: message,
130
+ ractor: producer.ractor
131
+ )
132
+ remaining_setup_ports.delete(selected)
133
+ else
134
+ raise "producer exited before setup completed"
135
+ end
136
+ end
137
+
138
+ deliver_setup(SetupSuccess.new(producers: @ready_producers.sort_by(&:side).freeze))
139
+ rescue Exception => error # rubocop:disable Lint/RescueException
140
+ setup_error = build_error(:producer_setup, error)
141
+ cancel_ready_producers
142
+ cleanup_remaining_setup(remaining_setup_ports, remaining_ractors, producer_by_setup_port)
143
+ deliver_setup(SetupError.new(error: setup_error))
144
+ end
145
+
146
+ def cleanup_remaining_setup(remaining_setup_ports, remaining_ractors, producer_by_setup_port)
147
+ producer_by_ractor = @started_producers.to_h { |producer| [producer.ractor, producer] }
148
+
149
+ until remaining_setup_ports.empty?
150
+ selected, message = Ractor.select(*remaining_setup_ports, *remaining_ractors)
151
+ if producer_by_setup_port.key?(selected)
152
+ producer = producer_by_setup_port.fetch(selected)
153
+ validate_ack_port!(message)
154
+ ready = ReadyProducer.new(
155
+ side: producer.side,
156
+ data_port: producer.data_port,
157
+ ack_port: message,
158
+ ractor: producer.ractor
159
+ )
160
+ @ready_producers << ready
161
+ send_cancel(ready)
162
+ remaining_setup_ports.delete(selected)
163
+ else
164
+ producer = producer_by_ractor.fetch(selected)
165
+ remaining_setup_ports.delete(producer.setup_port)
166
+ remaining_ractors.delete(selected)
167
+ end
168
+ end
169
+
170
+ wait_for_ractors(@started_producers.map(&:ractor))
171
+ rescue Exception # rubocop:disable Lint/RescueException
172
+ nil
173
+ end
174
+
175
+ def validate_ack_port!(ack_port)
176
+ return if ack_port.respond_to?(:send) && ack_port.method(:send).owner != Kernel
177
+
178
+ raise TypeError, "producer setup did not return a Ractor-style ack port"
179
+ end
180
+
181
+ def ensure_delegate
182
+ return true if delegate_installed?
183
+
184
+ case setup_result
185
+ in SetupSuccess[producers:]
186
+ delegate = build_delegate(producers)
187
+ should_close_delegate = false
188
+ @state_mutex.synchronize do
189
+ @delegate = delegate
190
+ should_close_delegate = @closed
191
+ @delegate = nil if should_close_delegate
192
+ end
193
+ if should_close_delegate
194
+ close_delegate_suppressing(delegate)
195
+ return false
196
+ end
197
+
198
+ true
199
+ in SetupError[error:]
200
+ mark_done
201
+ raise_error(error)
202
+ in SetupClosed
203
+ mark_done
204
+ false
205
+ end
206
+ end
207
+
208
+ def setup_result
209
+ result = @setup_results.pop
210
+ result || SetupClosed.new
211
+ rescue ClosedQueueError
212
+ SetupClosed.new
213
+ end
214
+
215
+ def build_delegate(producers)
216
+ if @merge
217
+ Pull.ractor_merge_ports(
218
+ producers.map do |producer|
219
+ PortPair.new(port: producer.data_port, ack_port: producer.ack_port, producer_ractor: producer.ractor)
220
+ end,
221
+ @ack_transfer,
222
+ true
223
+ )
224
+ else
225
+ producer = producers.fetch(0)
226
+ Pull.ractor_port(producer.data_port, producer.ack_port, @ack_transfer, true, producer.ractor)
227
+ end
228
+ end
229
+
230
+ def cancel_ready_producers
231
+ @ready_producers.each do |producer|
232
+ send_cancel(producer)
233
+ rescue Exception # rubocop:disable Lint/RescueException
234
+ nil
235
+ end
236
+ end
237
+
238
+ def send_cancel(producer)
239
+ send_control(producer.ack_port, RactorPort::Cancel.new(:closed))
240
+ end
241
+
242
+ def send_control(port, message)
243
+ if @ack_transfer == :move
244
+ port.send(message, move: true)
245
+ else
246
+ port.send(message)
247
+ end
248
+ end
249
+
250
+ def close_delegate
251
+ delegate = @state_mutex.synchronize { @delegate }
252
+ delegate&.close
253
+ nil
254
+ rescue StandardError => error
255
+ error
256
+ end
257
+
258
+ def close_delegate_suppressing(delegate)
259
+ delegate.close
260
+ rescue StandardError
261
+ nil
262
+ end
263
+
264
+ def delegate_installed?
265
+ @state_mutex.synchronize { !@delegate.nil? }
266
+ end
267
+
268
+ def wake_setup
269
+ @shutdown_port&.send(:shutdown)
270
+ rescue Exception # rubocop:disable Lint/RescueException
271
+ nil
272
+ end
273
+
274
+ def wait_for_setup
275
+ @setup_thread&.join
276
+ end
277
+
278
+ def wait_for_ractors(ractors)
279
+ return if ractors.empty?
280
+
281
+ Thread.new do
282
+ ractors.each do |ractor|
283
+ ractor.value
284
+ rescue Exception # rubocop:disable Lint/RescueException
285
+ nil
286
+ end
287
+ end.join
288
+ end
289
+
290
+ def close_setup_queue
291
+ @setup_results.close
292
+ end
293
+
294
+ def deliver_setup(result)
295
+ @setup_results.push(result)
296
+ rescue ClosedQueueError, ThreadError
297
+ nil
298
+ end
299
+
300
+ def build_error(kind, error)
301
+ RactorPortSourceError.new(
302
+ kind: kind,
303
+ cause_class_name: error.class.name,
304
+ cause_message: error.message,
305
+ cause: error
306
+ )
307
+ end
308
+
309
+ def raise_error(error)
310
+ if error.is_a?(RactorPortSourceError) && error.original_cause
311
+ raise error, cause: error.original_cause
312
+ end
313
+
314
+ raise error
315
+ end
316
+
317
+ class << self
318
+ def spawn_producer(data_port, setup_port, definition) # :nodoc:
319
+ Ractor.new(
320
+ data_port,
321
+ setup_port,
322
+ definition.block,
323
+ definition.transfer,
324
+ definition.args
325
+ ) do |outbox, setup, block, transfer, args|
326
+ ack_port = Ractor::Port.new
327
+ setup.send(ack_port)
328
+ producer = RactorProducer.new(outbox, ack_port, transfer)
329
+
330
+ begin
331
+ block.call(producer, *args)
332
+ producer.complete unless producer.terminal? || producer.cancelled?
333
+ rescue Exception => error # rubocop:disable Lint/RescueException
334
+ producer.fail(error) unless producer.terminal? || producer.cancelled?
335
+ end
336
+
337
+ if producer.send_failed?
338
+ ProducerSendFailed.new
339
+ elsif producer.cancelled?
340
+ ProducerCancelled.new
341
+ else
342
+ ProducerTerminal.new
343
+ end
344
+ end
345
+ end
346
+ end
347
+ end
348
+ end
349
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Running-accumulator stage.
6
+ #
7
+ # It pulls one immediate upstream value for each downstream demand, updates
8
+ # the accumulator with the reducer, and emits the updated accumulator.
9
+ class Scan
10
+ def initialize(upstream, initial, reducer)
11
+ @upstream = upstream
12
+ @accumulator = initial
13
+ @reducer = reducer
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ value = @upstream.next
22
+ if Pull.done?(value)
23
+ @done = true
24
+ return DONE
25
+ end
26
+
27
+ @accumulator = @reducer.call(@accumulator, value)
28
+ end
29
+
30
+ def close
31
+ return if @closed
32
+
33
+ @closed = true
34
+ @upstream.close
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,134 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Delimiter-framing stage for `Flow.split`.
6
+ #
7
+ # The stage keeps an internal byte buffer because frames and separators can
8
+ # cross chunk boundaries. Length checks are per frame body, not against the
9
+ # aggregate buffer, so already complete valid frames can be emitted before a
10
+ # later over-limit frame fails.
11
+ class Split
12
+ def initialize(upstream, separator, keep_separator, max_length)
13
+ @upstream = upstream
14
+ @separator = separator.b.freeze
15
+ @keep_separator = keep_separator
16
+ @max_length = max_length
17
+ @buffer = +"".b
18
+ @closed = false
19
+ @upstream_done = false
20
+ end
21
+
22
+ def next
23
+ return DONE if @closed
24
+
25
+ loop do
26
+ frame = next_buffered_frame
27
+ return frame if frame
28
+
29
+ validate_pending_frame_length!
30
+ return complete_from_buffer if @upstream_done
31
+
32
+ append_next_chunk
33
+ end
34
+ end
35
+
36
+ def close
37
+ return if @closed
38
+
39
+ @closed = true
40
+ @buffer.clear
41
+ @upstream.close
42
+ end
43
+
44
+ private
45
+
46
+ def next_buffered_frame
47
+ separator_index = @buffer.index(@separator)
48
+ return nil unless separator_index
49
+
50
+ frame = @buffer.slice!(0, separator_index)
51
+ @buffer.slice!(0, @separator.bytesize)
52
+ validate_frame_length!(frame)
53
+ format_frame(frame)
54
+ end
55
+
56
+ def complete_from_buffer
57
+ return DONE if @buffer.empty?
58
+
59
+ frame = @buffer
60
+ @buffer = +"".b
61
+ validate_frame_length!(frame)
62
+ frame
63
+ end
64
+
65
+ def append_next_chunk
66
+ chunk = @upstream.next
67
+ if Pull.done?(chunk)
68
+ @upstream_done = true
69
+ return
70
+ end
71
+
72
+ unless chunk.is_a?(String)
73
+ raise TypeError, "Flow.split elements must be String"
74
+ end
75
+
76
+ @buffer << chunk.b
77
+ validate_pending_frame_length!
78
+ end
79
+
80
+ def validate_pending_frame_length!
81
+ return unless @max_length
82
+ return if pending_frame_body_bytesize <= @max_length
83
+
84
+ fail_frame_too_long
85
+ end
86
+
87
+ def validate_frame_length!(frame)
88
+ return unless @max_length
89
+ return if frame.bytesize <= @max_length
90
+
91
+ fail_frame_too_long
92
+ end
93
+
94
+ def pending_frame_body_bytesize
95
+ separator_index = @buffer.index(@separator)
96
+ return separator_index if separator_index
97
+
98
+ @buffer.bytesize - partial_separator_suffix_bytesize
99
+ end
100
+
101
+ def partial_separator_suffix_bytesize
102
+ max_suffix_bytesize = [@separator.bytesize - 1, @buffer.bytesize].min
103
+ return 0 if max_suffix_bytesize.zero?
104
+
105
+ max_suffix_bytesize.downto(1) do |bytesize|
106
+ suffix = @buffer.byteslice(@buffer.bytesize - bytesize, bytesize)
107
+ return bytesize if @separator.start_with?(suffix)
108
+ end
109
+
110
+ 0
111
+ end
112
+
113
+ def fail_frame_too_long
114
+ @closed = true
115
+ close_upstream
116
+ error = FrameTooLongError.new("frame exceeded max_length #{@max_length}")
117
+ raise error
118
+ end
119
+
120
+ def close_upstream
121
+ @upstream.close
122
+ nil
123
+ rescue StandardError => error
124
+ error
125
+ end
126
+
127
+ def format_frame(frame)
128
+ return frame unless @keep_separator
129
+
130
+ frame + @separator
131
+ end
132
+ end
133
+ end
134
+ end
@@ -11,11 +11,40 @@ module FiberStream
11
11
  # The sentinel must never escape through public APIs.
12
12
  module Pull
13
13
  DONE = Object.new.freeze
14
+ ProducerTerminal = Data.define
15
+ ProducerCancelled = Data.define
16
+ ProducerSendFailed = Data.define
17
+ private_constant :ProducerTerminal, :ProducerCancelled, :ProducerSendFailed
14
18
 
15
19
  def self.done?(value)
16
20
  value.equal?(DONE)
17
21
  end
18
22
 
23
+ def self.each_value(stream)
24
+ loop do
25
+ value = stream.next
26
+ break if done?(value)
27
+
28
+ yield value
29
+ end
30
+ end
31
+
32
+ def self.ractor_producer_termination_error(result)
33
+ cause_message =
34
+ case result
35
+ in ProducerSendFailed
36
+ "producer exited after failing to send the ack-permitted message"
37
+ else
38
+ "producer exited before sending the ack-permitted message"
39
+ end
40
+
41
+ RactorPortSourceError.new(
42
+ kind: :producer_failure,
43
+ cause_class_name: "RuntimeError",
44
+ cause_message: cause_message
45
+ )
46
+ end
47
+
19
48
  def self.each(enumerable)
20
49
  Each.new(enumerable)
21
50
  end
@@ -24,8 +53,20 @@ module FiberStream
24
53
  IOSource.new(io, chunk_size, close_io)
25
54
  end
26
55
 
27
- def self.ractor_port(port, ack_port, ack_transfer, cancel)
28
- RactorPortSource.new(port, ack_port, ack_transfer, cancel)
56
+ def self.ractor_port(port, ack_port, ack_transfer, cancel, producer_ractor = nil)
57
+ RactorPortSource.new(port, ack_port, ack_transfer, cancel, producer_ractor)
58
+ end
59
+
60
+ def self.ractor_merge_ports(port_pairs, ack_transfer, cancel)
61
+ RactorMergePortsSource.new(port_pairs, ack_transfer, cancel)
62
+ end
63
+
64
+ def self.ractor_producer(definitions, ack_transfer)
65
+ RactorProducerSource.new(definitions, ack_transfer, false)
66
+ end
67
+
68
+ def self.ractor_merge_producers(definitions, ack_transfer)
69
+ RactorProducerSource.new(definitions, ack_transfer, true)
29
70
  end
30
71
 
31
72
  def self.concat(left_materializer, right_materializer)
@@ -36,6 +77,10 @@ module FiberStream
36
77
  Zip.new(left_materializer, right_materializer)
37
78
  end
38
79
 
80
+ def self.merge(left_materializer, right_materializer)
81
+ Merge.new(left_materializer, right_materializer)
82
+ end
83
+
39
84
  def self.map(upstream, transform)
40
85
  Map.new(upstream, transform)
41
86
  end
@@ -44,6 +89,10 @@ module FiberStream
44
89
  ParallelMapBoundary.new(upstream, concurrency, transform)
45
90
  end
46
91
 
92
+ def self.parallel_unordered_map(upstream, concurrency, transform)
93
+ ParallelUnorderedMapBoundary.new(upstream, concurrency, transform)
94
+ end
95
+
47
96
  def self.ractor_map(upstream, workers, input_transfer, output_transfer, transform)
48
97
  RactorMapBoundary.new(upstream, workers, input_transfer, output_transfer, transform)
49
98
  end
@@ -60,6 +109,14 @@ module FiberStream
60
109
  Drop.new(upstream, count)
61
110
  end
62
111
 
112
+ def self.grouped(upstream, count)
113
+ Grouped.new(upstream, count)
114
+ end
115
+
116
+ def self.scan(upstream, initial, reducer)
117
+ Scan.new(upstream, initial, reducer)
118
+ end
119
+
63
120
  def self.take_while(upstream, predicate)
64
121
  TakeWhile.new(upstream, predicate)
65
122
  end
@@ -80,6 +137,10 @@ module FiberStream
80
137
  Lines.new(upstream, chomp, max_length)
81
138
  end
82
139
 
140
+ def self.split(upstream, separator, keep_separator, max_length)
141
+ Split.new(upstream, separator, keep_separator, max_length)
142
+ end
143
+
83
144
  private_constant :DONE
84
145
  end
85
146
  end
@@ -87,24 +148,32 @@ end
87
148
  require_relative "pull/each"
88
149
  require_relative "pull/io_source"
89
150
  require_relative "pull/ractor_port_source"
151
+ require_relative "pull/ractor_merge_ports_source"
152
+ require_relative "pull/ractor_producer_source"
90
153
  require_relative "pull/concat"
91
154
  require_relative "pull/zip"
155
+ require_relative "pull/merge"
92
156
  require_relative "pull/map"
93
157
  require_relative "pull/select"
94
158
  require_relative "pull/take"
95
159
  require_relative "pull/drop"
160
+ require_relative "pull/grouped"
161
+ require_relative "pull/scan"
96
162
  require_relative "pull/take_while"
97
163
  require_relative "pull/drop_while"
98
164
  require_relative "pull/lines"
165
+ require_relative "pull/split"
99
166
  require_relative "pull/async_boundary"
100
167
  require_relative "pull/buffer_boundary"
101
168
  require_relative "pull/parallel_map_boundary"
169
+ require_relative "pull/parallel_unordered_map_boundary"
102
170
  require_relative "pull/ractor_map_boundary"
103
171
 
104
172
  module FiberStream
105
173
  module Pull
106
- private_constant :Each, :IOSource, :RactorPortSource, :Concat, :Zip, :Map, :Select, :Take, :Drop,
107
- :TakeWhile, :DropWhile, :Lines, :AsyncBoundary, :BufferBoundary,
108
- :ParallelMapBoundary, :RactorMapBoundary
174
+ private_constant :Each, :IOSource, :RactorPortSource, :RactorMergePortsSource, :RactorProducerSource, :Concat,
175
+ :Zip, :Merge, :Map, :Select, :Take, :Drop, :Grouped, :Scan, :TakeWhile, :DropWhile, :Lines, :Split,
176
+ :AsyncBoundary, :BufferBoundary, :ParallelMapBoundary, :ParallelUnorderedMapBoundary,
177
+ :RactorMapBoundary
109
178
  end
110
179
  end
@@ -6,7 +6,9 @@ module FiberStream
6
6
  # Producers send `Element`, `Complete`, and `Failure` messages to the data
7
7
  # port. FiberStream sends `Ack` and `Cancel` messages to the producer-owned
8
8
  # acknowledgment port. The envelopes keep stream values distinct from control
9
- # messages and support Ruby pattern matching.
9
+ # messages and support Ruby pattern matching. `Failure` cause metadata is
10
+ # producer-provided and is surfaced through `RactorPortSourceError`; producers
11
+ # should sanitize it before crossing trust boundaries.
10
12
  module RactorPort
11
13
  Element = ::Data.define(:value)
12
14
  Complete = ::Data.define