fiber_stream 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,6 +30,23 @@ module FiberStream
30
30
  new { |upstream| Pull.parallel_map(upstream, concurrency, block) }
31
31
  end
32
32
 
33
+ # Creates an unordered scheduler-backed parallel mapping flow.
34
+ #
35
+ # The stage starts internal scheduled fibers on first downstream demand and
36
+ # requires an installed `Fiber.scheduler` in a non-blocking fiber at that
37
+ # point. At most `concurrency` mapping blocks run at the same time, and at
38
+ # most `concurrency` upstream elements are pulled but not yet emitted downstream.
39
+ # Results are emitted in completion order and input order is not preserved.
40
+ # Closing the boundary closes upstream and requests internal worker
41
+ # cancellation. FiberStream does not depend on Async at runtime.
42
+ def self.parallel_unordered_map(concurrency:, &block)
43
+ raise ArgumentError, "missing block" unless block
44
+ raise TypeError, "concurrency must be an Integer" unless concurrency.is_a?(Integer)
45
+ raise ArgumentError, "concurrency must be positive" unless concurrency.positive?
46
+
47
+ new { |upstream| Pull.parallel_unordered_map(upstream, concurrency, block) }
48
+ end
49
+
33
50
  # Creates an ordered Ractor-backed mapping flow.
34
51
  #
35
52
  # The mapper runs inside worker ractors and must be shareable, typically
@@ -42,8 +59,8 @@ module FiberStream
42
59
  raise TypeError, "workers must be an Integer" unless workers.is_a?(Integer)
43
60
  raise ArgumentError, "workers must be positive" unless workers.positive?
44
61
 
45
- validate_ractor_transfer_policy!(:input_transfer, input_transfer)
46
- validate_ractor_transfer_policy!(:output_transfer, output_transfer)
62
+ Internal::RactorTransferPolicy.validate!(:input_transfer, input_transfer)
63
+ Internal::RactorTransferPolicy.validate!(:output_transfer, output_transfer)
47
64
  raise TypeError, "block must be shareable" unless Ractor.shareable?(block)
48
65
 
49
66
  new { |upstream| Pull.ractor_map(upstream, workers, input_transfer, output_transfer, block) }
@@ -87,6 +104,31 @@ module FiberStream
87
104
  new { |upstream| Pull.drop(upstream, count) }
88
105
  end
89
106
 
107
+ # Creates a fixed-size grouping flow.
108
+ #
109
+ # The flow emits arrays containing up to `count` adjacent upstream elements.
110
+ # Full groups contain exactly `count` elements; normal upstream completion
111
+ # emits one final partial group when one exists. `count` must be a positive
112
+ # Integer.
113
+ def self.grouped(count)
114
+ raise TypeError, "count must be an Integer" unless count.is_a?(Integer)
115
+ raise ArgumentError, "count must be positive" unless count.positive?
116
+
117
+ new { |upstream| Pull.grouped(upstream, count) }
118
+ end
119
+
120
+ # Creates a running-accumulator flow.
121
+ #
122
+ # The block is called as `block.call(accumulator, element)` for each
123
+ # upstream element, matching `Sink.fold`. The block result becomes the new
124
+ # accumulator and is emitted downstream. The initial accumulator is not
125
+ # emitted before the first upstream element.
126
+ def self.scan(initial, &block)
127
+ raise ArgumentError, "missing block" unless block
128
+
129
+ new { |upstream| Pull.scan(upstream, initial, block) }
130
+ end
131
+
90
132
  # Creates a predicate-based limiting flow.
91
133
  #
92
134
  # The flow emits leading elements while the block result is truthy. The
@@ -142,7 +184,9 @@ module FiberStream
142
184
  #
143
185
  # The flow accepts String chunks and emits lines split on "\n". By default
144
186
  # it chomps the trailing newline and one preceding "\r". `max_length` is an
145
- # optional per-line bytesize limit.
187
+ # optional per-line bytesize limit. With `max_length: nil`, one
188
+ # unterminated line can buffer without bound. Set a positive `max_length`
189
+ # for untrusted, network-facing, or otherwise unbounded streams.
146
190
  def self.lines(chomp: true, max_length: nil)
147
191
  raise TypeError, "chomp must be true or false" unless [true, false].include?(chomp)
148
192
  unless max_length.nil? || max_length.is_a?(Integer)
@@ -153,13 +197,30 @@ module FiberStream
153
197
  new { |upstream| Pull.lines(upstream, chomp, max_length) }
154
198
  end
155
199
 
156
- def self.validate_ractor_transfer_policy!(name, value)
157
- return if [:copy, :move].include?(value)
200
+ # Creates a delimiter-splitting flow.
201
+ #
202
+ # The flow accepts String chunks and emits frames split on the non-empty
203
+ # String `separator`. Separator matching is byte-oriented. By default
204
+ # emitted frames exclude the separator; `keep_separator: true` preserves it
205
+ # on separator-terminated frames. `max_length` is an optional per-frame body
206
+ # bytesize limit. With `max_length: nil`, one unterminated frame can buffer
207
+ # without bound. Set a positive `max_length` for untrusted, network-facing,
208
+ # or otherwise unbounded streams.
209
+ def self.split(separator, keep_separator: false, max_length: nil)
210
+ raise TypeError, "separator must be String" unless separator.is_a?(String)
211
+ raise ArgumentError, "separator must not be empty" if separator.empty?
212
+ raise TypeError, "keep_separator must be true or false" unless [true, false].include?(keep_separator)
213
+ unless max_length.nil? || max_length.is_a?(Integer)
214
+ raise TypeError, "max_length must be nil or an Integer"
215
+ end
216
+ raise ArgumentError, "max_length must be positive" if max_length&.<= 0
158
217
 
159
- raise ArgumentError, "#{name} must be :copy or :move"
218
+ new { |upstream| Pull.split(upstream, separator, keep_separator, max_length) }
160
219
  end
161
220
 
162
- private_class_method :validate_ractor_transfer_policy!
221
+ def self.build(&attach) # :nodoc:
222
+ new(&attach)
223
+ end
163
224
 
164
225
  # Returns a reusable flow that applies this flow and then `flow`.
165
226
  #
@@ -168,11 +229,11 @@ module FiberStream
168
229
  def via(flow)
169
230
  raise TypeError, "expected FiberStream::Flow" unless flow.is_a?(Flow)
170
231
 
171
- self.class.__send__(:new) do |upstream|
172
- attached_stream = attach(upstream)
232
+ self.class.build do |upstream|
233
+ attached_stream = attach_to(upstream)
173
234
 
174
235
  begin
175
- flow.__send__(:attach, attached_stream)
236
+ flow.attach_to(attached_stream)
176
237
  rescue StandardError
177
238
  begin
178
239
  attached_stream.close
@@ -192,13 +253,13 @@ module FiberStream
192
253
  def to(sink)
193
254
  raise TypeError, "expected FiberStream::Sink" unless sink.is_a?(Sink)
194
255
 
195
- Sink.__send__(:new) do |stream|
256
+ Sink.build do |stream|
196
257
  attached_stream = nil
197
258
  primary_error = nil
198
259
 
199
260
  begin
200
- attached_stream = attach(stream)
201
- sink.__send__(:run, attached_stream)
261
+ attached_stream = attach_to(stream)
262
+ sink.run_stream(attached_stream)
202
263
  rescue StandardError => error
203
264
  primary_error = error
204
265
  raise
@@ -218,9 +279,7 @@ module FiberStream
218
279
 
219
280
  private_class_method :new
220
281
 
221
- private
222
-
223
- def attach(upstream)
282
+ def attach_to(upstream) # :nodoc:
224
283
  @attach.call(upstream)
225
284
  end
226
285
  end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Internal # :nodoc:
5
+ module RactorTransferPolicy # :nodoc:
6
+ module_function
7
+
8
+ def validate!(name, value)
9
+ return if [:copy, :move].include?(value)
10
+
11
+ raise ArgumentError, "#{name} must be :copy or :move"
12
+ end
13
+ end
14
+ end
15
+
16
+ private_constant :Internal
17
+ end
@@ -2,6 +2,10 @@
2
2
 
3
3
  module FiberStream
4
4
  class Pipeline
5
+ def self.build(source, sink) # :nodoc:
6
+ new(source, sink)
7
+ end
8
+
5
9
  def initialize(source, sink)
6
10
  @source = source
7
11
  @sink = sink
@@ -27,7 +31,7 @@ module FiberStream
27
31
  def run_async
28
32
  validate_scheduler!
29
33
 
30
- RunningPipeline.__send__(:new, Fiber.scheduler) { run }
34
+ RunningPipeline.start(Fiber.scheduler) { run }
31
35
  end
32
36
 
33
37
  private_class_method :new
@@ -9,12 +9,18 @@ module FiberStream
9
9
  # time back to the downstream caller, so it adds an async boundary without
10
10
  # adding prefetch.
11
11
  class AsyncBoundary
12
+ ValueMessage = Data.define(:value)
13
+ DoneMessage = Data.define
14
+ ErrorMessage = Data.define(:error)
15
+ private_constant :ValueMessage, :DoneMessage, :ErrorMessage
16
+
12
17
  def initialize(upstream)
13
18
  @upstream = upstream
14
19
  @producer = nil
15
20
  @started = false
16
21
  @closed = false
17
22
  @done = false
23
+ @upstream_closed = false
18
24
  end
19
25
 
20
26
  def next
@@ -23,14 +29,14 @@ module FiberStream
23
29
  start
24
30
  message = @producer.resume
25
31
 
26
- case message.fetch(0)
27
- when :value
28
- message.fetch(1)
29
- when :done
32
+ case message
33
+ in ValueMessage[value:]
34
+ value
35
+ in DoneMessage
30
36
  complete
31
- when :error
37
+ in ErrorMessage[error:]
32
38
  @done = true
33
- raise message.fetch(1)
39
+ raise error
34
40
  end
35
41
  end
36
42
 
@@ -39,7 +45,7 @@ module FiberStream
39
45
 
40
46
  @closed = true
41
47
  @done = true
42
- @upstream.close
48
+ close_upstream
43
49
  ensure
44
50
  cancel_producer
45
51
  end
@@ -60,16 +66,16 @@ module FiberStream
60
66
 
61
67
  value = @upstream.next
62
68
  if Pull.done?(value)
63
- Fiber.yield([:done])
69
+ Fiber.yield(DoneMessage.new)
64
70
  break
65
71
  end
66
72
 
67
- Fiber.yield([:value, value])
73
+ Fiber.yield(ValueMessage.new(value:))
68
74
  end
69
75
  rescue StandardError => exception
70
- Fiber.yield([:error, exception]) unless @closed
76
+ Fiber.yield(ErrorMessage.new(error: exception)) unless @closed
71
77
  ensure
72
- @upstream.close
78
+ close_upstream
73
79
  end
74
80
 
75
81
  def complete
@@ -77,7 +83,18 @@ module FiberStream
77
83
  DONE
78
84
  end
79
85
 
86
+ def close_upstream
87
+ return if @upstream_closed
88
+
89
+ @upstream_closed = true
90
+ @upstream.close
91
+ end
92
+
80
93
  def cancel_producer
94
+ return unless @producer&.alive?
95
+
96
+ @producer.kill
97
+ rescue StandardError
81
98
  nil
82
99
  end
83
100
  end
@@ -9,10 +9,17 @@ module FiberStream
9
9
  # queue capacity plus in-flight producer/consumer work. Close is responsible
10
10
  # for closing upstream and waking any producer blocked on a full queue.
11
11
  class BufferBoundary
12
+ CancellationError = Class.new(StandardError)
13
+ ValueMessage = Data.define(:value)
14
+ DoneMessage = Data.define
15
+ ErrorMessage = Data.define(:error)
16
+ private_constant :CancellationError, :ValueMessage, :DoneMessage, :ErrorMessage
17
+
12
18
  def initialize(upstream, count)
13
19
  @upstream = upstream
14
20
  @queue = Thread::SizedQueue.new(count)
15
21
  @producer = nil
22
+ @scheduler = nil
16
23
  @started = false
17
24
  @closed = false
18
25
  @done = false
@@ -27,14 +34,14 @@ module FiberStream
27
34
  message = @queue.pop
28
35
  return complete if message.nil?
29
36
 
30
- case message.fetch(0)
31
- when :value
32
- message.fetch(1)
33
- when :done
37
+ case message
38
+ in ValueMessage[value:]
39
+ value
40
+ in DoneMessage
34
41
  complete
35
- when :error
42
+ in ErrorMessage[error:]
36
43
  @done = true
37
- raise message.fetch(1)
44
+ raise error
38
45
  end
39
46
  end
40
47
 
@@ -58,7 +65,9 @@ module FiberStream
58
65
  raise SchedulerRequiredError, "Flow.buffer requires Fiber.scheduler" unless Fiber.scheduler
59
66
 
60
67
  @started = true
68
+ @scheduler = Fiber.scheduler
61
69
  @producer = Fiber.schedule { run_producer }
70
+ cancel_producer if @closed
62
71
  end
63
72
 
64
73
  def run_producer
@@ -67,8 +76,10 @@ module FiberStream
67
76
 
68
77
  message = pull_message
69
78
  break unless deliver(message)
70
- break unless message.fetch(0) == :value
79
+ break unless message.is_a?(ValueMessage)
71
80
  end
81
+ rescue CancellationError
82
+ nil
72
83
  ensure
73
84
  @upstream_close_error ||= close_upstream unless @upstream_closed
74
85
  end
@@ -77,15 +88,17 @@ module FiberStream
77
88
  value = @upstream.next
78
89
  return terminal_done_message if Pull.done?(value)
79
90
 
80
- [:value, value]
91
+ ValueMessage.new(value:)
92
+ rescue CancellationError
93
+ raise
81
94
  rescue StandardError => error
82
95
  close_upstream(record_error: false)
83
- [:error, error]
96
+ ErrorMessage.new(error:)
84
97
  end
85
98
 
86
99
  def terminal_done_message
87
100
  close_error = close_upstream
88
- close_error ? [:error, close_error] : [:done]
101
+ close_error ? ErrorMessage.new(error: close_error) : DoneMessage.new
89
102
  end
90
103
 
91
104
  def deliver(message)
@@ -116,6 +129,11 @@ module FiberStream
116
129
  end
117
130
 
118
131
  def cancel_producer
132
+ return unless @producer&.alive?
133
+ return unless @scheduler.respond_to?(:fiber_interrupt)
134
+
135
+ @scheduler.fiber_interrupt(@producer, CancellationError.new)
136
+ rescue NotImplementedError, StandardError
119
137
  nil
120
138
  end
121
139
  end
@@ -8,7 +8,7 @@ module FiberStream
8
8
  def initialize(left_materializer, right_materializer)
9
9
  @left_materializer = left_materializer
10
10
  @right_materializer = right_materializer
11
- @left = @left_materializer.call
11
+ @left = nil
12
12
  @right = nil
13
13
  @phase = :left
14
14
  @closed = false
@@ -38,6 +38,7 @@ module FiberStream
38
38
  private
39
39
 
40
40
  def next_left
41
+ materialize_left
41
42
  value = @left.next
42
43
  return value unless Pull.done?(value)
43
44
 
@@ -56,6 +57,13 @@ module FiberStream
56
57
  DONE
57
58
  end
58
59
 
60
+ def materialize_left
61
+ return if @left
62
+
63
+ stream = @left_materializer.call
64
+ @left = stream
65
+ end
66
+
59
67
  def close_left
60
68
  stream = @left
61
69
  return unless stream
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Fixed-size grouping stage.
6
+ #
7
+ # It collects adjacent upstream elements into distinct arrays of up to
8
+ # `count` elements. A final partial group is emitted when upstream completes
9
+ # normally.
10
+ class Grouped
11
+ def initialize(upstream, count)
12
+ @upstream = upstream
13
+ @count = count
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ group = []
22
+
23
+ while group.length < @count
24
+ value = @upstream.next
25
+ if Pull.done?(value)
26
+ @done = true
27
+ return DONE if group.empty?
28
+
29
+ return group
30
+ end
31
+
32
+ group << value
33
+ end
34
+
35
+ group
36
+ end
37
+
38
+ def close
39
+ return if @closed
40
+
41
+ @closed = true
42
+ @upstream.close
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Scheduler-backed ready-order source merge.
6
+ #
7
+ # Each input source is materialized by a scheduled producer fiber on first
8
+ # downstream demand. Producers publish values, completion, and failures into
9
+ # a bounded mailbox; downstream emits values in mailbox arrival order while
10
+ # preserving each input's own order.
11
+ class Merge
12
+ SIDE_ORDER = [:left, :right].freeze
13
+ CancellationError = Class.new(StandardError)
14
+ ValueMessage = Data.define(:side, :value)
15
+ DoneMessage = Data.define(:side)
16
+ ErrorMessage = Data.define(:side, :error)
17
+ private_constant :ValueMessage, :DoneMessage, :ErrorMessage
18
+
19
+ def initialize(left_materializer, right_materializer)
20
+ @materializers = { left: left_materializer, right: right_materializer }
21
+ @streams = { left: nil, right: nil }
22
+ @stream_closed = { left: false, right: false }
23
+ @side_done = { left: false, right: false }
24
+ @producers = {}
25
+ @mailbox = nil
26
+ @started = false
27
+ @closed = false
28
+ @done = false
29
+ end
30
+
31
+ def next
32
+ return DONE if @closed || @done
33
+
34
+ start
35
+ next_message
36
+ end
37
+
38
+ def close
39
+ return if @closed
40
+
41
+ @closed = true
42
+ @done = true
43
+ close_error = close_materialized_streams
44
+ close_mailbox
45
+ raise close_error if close_error
46
+ ensure
47
+ cancel_producers
48
+ end
49
+
50
+ private
51
+
52
+ def start
53
+ return if @started
54
+
55
+ validate_scheduler!
56
+
57
+ @mailbox = MergeMailbox.new(1)
58
+ @started = true
59
+ SIDE_ORDER.each do |side|
60
+ @producers[side] = Fiber.schedule { run_producer(side) }
61
+ end
62
+ end
63
+
64
+ def next_message
65
+ loop do
66
+ message = @mailbox.pop
67
+ return complete if message.nil?
68
+
69
+ case message
70
+ in ValueMessage[value:]
71
+ return value
72
+ in DoneMessage[side:]
73
+ mark_side_done(side)
74
+ return complete if all_done?
75
+ in ErrorMessage[error:]
76
+ return fail_with(error)
77
+ end
78
+ end
79
+ rescue MergeMailbox::Closed
80
+ complete
81
+ end
82
+
83
+ def run_producer(side)
84
+ stream = materialize_side(side)
85
+
86
+ loop do
87
+ break if @closed
88
+
89
+ message = pull_message(side, stream)
90
+ break unless deliver(message)
91
+ break unless message.is_a?(ValueMessage)
92
+ end
93
+ rescue MergeMailbox::Closed, CancellationError
94
+ nil
95
+ rescue StandardError => error
96
+ close_side(side, record_error: false)
97
+ deliver(ErrorMessage.new(side:, error:)) unless @closed
98
+ end
99
+
100
+ def materialize_side(side)
101
+ stream = @materializers.fetch(side).call
102
+ @streams[side] = stream
103
+ close_side(side) if @closed
104
+ stream
105
+ end
106
+
107
+ def pull_message(side, stream)
108
+ value = stream.next
109
+ return terminal_done_message(side) if Pull.done?(value)
110
+
111
+ ValueMessage.new(side:, value:)
112
+ rescue StandardError => error
113
+ close_side(side, record_error: false)
114
+ ErrorMessage.new(side:, error:)
115
+ end
116
+
117
+ def terminal_done_message(side)
118
+ close_error = close_side(side)
119
+ close_error ? ErrorMessage.new(side:, error: close_error) : DoneMessage.new(side:)
120
+ end
121
+
122
+ def deliver(message)
123
+ @mailbox.push(message)
124
+ true
125
+ rescue MergeMailbox::Closed
126
+ false
127
+ end
128
+
129
+ def mark_side_done(side)
130
+ @side_done[side] = true
131
+ end
132
+
133
+ def all_done?
134
+ SIDE_ORDER.all? { |side| @side_done.fetch(side) }
135
+ end
136
+
137
+ def complete
138
+ @done = true
139
+ close_mailbox
140
+ DONE
141
+ end
142
+
143
+ def fail_with(error)
144
+ @done = true
145
+ close_mailbox
146
+ close_materialized_streams
147
+ cancel_producers
148
+ raise error
149
+ end
150
+
151
+ def close_materialized_streams
152
+ first_error = nil
153
+
154
+ SIDE_ORDER.each do |side|
155
+ close_error = close_side(side)
156
+ first_error ||= close_error
157
+ end
158
+
159
+ first_error
160
+ end
161
+
162
+ def close_side(side, record_error: true)
163
+ return nil if @stream_closed.fetch(side)
164
+
165
+ stream = @streams[side]
166
+ return nil unless stream
167
+
168
+ @stream_closed[side] = true
169
+ @streams[side] = nil
170
+ stream.close
171
+ nil
172
+ rescue StandardError => error
173
+ error if record_error
174
+ end
175
+
176
+ def close_mailbox
177
+ @mailbox&.close
178
+ end
179
+
180
+ def cancel_producers
181
+ scheduler = Fiber.scheduler
182
+ return unless scheduler.respond_to?(:fiber_interrupt)
183
+
184
+ @producers.each_value do |fiber|
185
+ next unless fiber&.alive?
186
+
187
+ scheduler.fiber_interrupt(fiber, CancellationError.new)
188
+ rescue StandardError
189
+ nil
190
+ end
191
+ end
192
+
193
+ def validate_scheduler!
194
+ return if Fiber.scheduler && !Fiber.current.blocking?
195
+
196
+ message =
197
+ if Fiber.scheduler
198
+ "Source.merge requires a non-blocking fiber"
199
+ else
200
+ "Source.merge requires Fiber.scheduler"
201
+ end
202
+ raise SchedulerRequiredError, message
203
+ end
204
+
205
+ class MergeMailbox
206
+ Closed = Class.new(StandardError)
207
+
208
+ def initialize(capacity)
209
+ @queue = Thread::SizedQueue.new(capacity)
210
+ end
211
+
212
+ def push(message)
213
+ @queue << message
214
+ rescue ClosedQueueError
215
+ raise Closed
216
+ end
217
+
218
+ def pop
219
+ @queue.pop
220
+ rescue ClosedQueueError
221
+ raise Closed
222
+ end
223
+
224
+ def close
225
+ @queue.close
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end