fiber_stream 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,51 @@ module FiberStream
13
13
  new { |upstream| Pull.map(upstream, block) }
14
14
  end
15
15
 
16
+ # Creates a transform-and-filter flow.
17
+ #
18
+ # The block is called once for each upstream element observed by this
19
+ # stage. Truthy block results are emitted downstream as transformed values;
20
+ # false and nil results are dropped. Exceptions raised by the block fail the
21
+ # stream and are re-raised from `Source#run_with`.
22
+ def self.filter_map(&block)
23
+ raise ArgumentError, "missing block" unless block
24
+
25
+ new { |upstream| Pull.filter_map(upstream, block) }
26
+ end
27
+
28
+ # Creates a nil-dropping flow.
29
+ #
30
+ # The flow drops `nil` elements and passes every non-`nil` element through
31
+ # unchanged, including `false`.
32
+ def self.compact
33
+ new { |upstream| Pull.compact(upstream) }
34
+ end
35
+
36
+ # Creates a one-to-many mapping flow.
37
+ #
38
+ # The block is called once for each upstream element whose expansion is
39
+ # needed. It must return an object that responds to `#each`; yielded values
40
+ # are emitted in order before the next upstream element is pulled.
41
+ # Exceptions raised by the block or by the returned object's `#each` fail
42
+ # the stream and are re-raised from `Source#run_with`.
43
+ def self.map_concat(&block)
44
+ raise ArgumentError, "missing block" unless block
45
+
46
+ new { |upstream| Pull.map_concat(upstream, block) }
47
+ end
48
+
49
+ # Creates a pass-through observing flow.
50
+ #
51
+ # The block is called once for each element before that element is emitted
52
+ # downstream. The block return value is ignored and the original element is
53
+ # passed through unchanged. Exceptions raised by the block fail the stream
54
+ # and are re-raised from `Source#run_with`.
55
+ def self.tap(&block)
56
+ raise ArgumentError, "missing block" unless block
57
+
58
+ new { |upstream| Pull.tap(upstream, block) }
59
+ end
60
+
16
61
  # Creates an ordered scheduler-backed parallel mapping flow.
17
62
  #
18
63
  # The stage starts internal scheduled fibers on first downstream demand and
@@ -30,6 +75,23 @@ module FiberStream
30
75
  new { |upstream| Pull.parallel_map(upstream, concurrency, block) }
31
76
  end
32
77
 
78
+ # Creates an unordered scheduler-backed parallel mapping flow.
79
+ #
80
+ # The stage starts internal scheduled fibers on first downstream demand and
81
+ # requires an installed `Fiber.scheduler` in a non-blocking fiber at that
82
+ # point. At most `concurrency` mapping blocks run at the same time, and at
83
+ # most `concurrency` upstream elements are pulled but not yet emitted downstream.
84
+ # Results are emitted in completion order and input order is not preserved.
85
+ # Closing the boundary closes upstream and requests internal worker
86
+ # cancellation. FiberStream does not depend on Async at runtime.
87
+ def self.parallel_unordered_map(concurrency:, &block)
88
+ raise ArgumentError, "missing block" unless block
89
+ raise TypeError, "concurrency must be an Integer" unless concurrency.is_a?(Integer)
90
+ raise ArgumentError, "concurrency must be positive" unless concurrency.positive?
91
+
92
+ new { |upstream| Pull.parallel_unordered_map(upstream, concurrency, block) }
93
+ end
94
+
33
95
  # Creates an ordered Ractor-backed mapping flow.
34
96
  #
35
97
  # The mapper runs inside worker ractors and must be shareable, typically
@@ -42,8 +104,8 @@ module FiberStream
42
104
  raise TypeError, "workers must be an Integer" unless workers.is_a?(Integer)
43
105
  raise ArgumentError, "workers must be positive" unless workers.positive?
44
106
 
45
- validate_ractor_transfer_policy!(:input_transfer, input_transfer)
46
- validate_ractor_transfer_policy!(:output_transfer, output_transfer)
107
+ Internal::RactorTransferPolicy.validate!(:input_transfer, input_transfer)
108
+ Internal::RactorTransferPolicy.validate!(:output_transfer, output_transfer)
47
109
  raise TypeError, "block must be shareable" unless Ractor.shareable?(block)
48
110
 
49
111
  new { |upstream| Pull.ractor_map(upstream, workers, input_transfer, output_transfer, block) }
@@ -61,6 +123,19 @@ module FiberStream
61
123
  new { |upstream| Pull.select(upstream, block) }
62
124
  end
63
125
 
126
+ # Creates a complement filtering flow.
127
+ #
128
+ # The block is called for upstream elements until it returns `false` or
129
+ # `nil`, or upstream completes. Truthy predicate results drop the original
130
+ # element; false and nil results pass the element through unchanged.
131
+ # Exceptions raised by the block fail the stream and are re-raised from
132
+ # `Source#run_with`.
133
+ def self.reject(&block)
134
+ raise ArgumentError, "missing block" unless block
135
+
136
+ new { |upstream| Pull.reject(upstream, block) }
137
+ end
138
+
64
139
  # Creates a limiting flow.
65
140
  #
66
141
  # The flow emits at most `count` elements. `take(0)` completes without
@@ -100,6 +175,18 @@ module FiberStream
100
175
  new { |upstream| Pull.grouped(upstream, count) }
101
176
  end
102
177
 
178
+ # Creates a running-accumulator flow.
179
+ #
180
+ # The block is called as `block.call(accumulator, element)` for each
181
+ # upstream element, matching `Sink.fold`. The block result becomes the new
182
+ # accumulator and is emitted downstream. The initial accumulator is not
183
+ # emitted before the first upstream element.
184
+ def self.scan(initial, &block)
185
+ raise ArgumentError, "missing block" unless block
186
+
187
+ new { |upstream| Pull.scan(upstream, initial, block) }
188
+ end
189
+
103
190
  # Creates a predicate-based limiting flow.
104
191
  #
105
192
  # The flow emits leading elements while the block result is truthy. The
@@ -151,6 +238,19 @@ module FiberStream
151
238
  new { |upstream| Pull.buffer(upstream, count) }
152
239
  end
153
240
 
241
+ # Creates a scheduler-aware throttling flow.
242
+ #
243
+ # The `rate:` form creates a fresh `RateLimiter` for each materialization.
244
+ # The `limiter:` form uses the supplied limiter object, which must respond
245
+ # to `acquire(permits:)` and return only after permits are acquired. When
246
+ # FiberStream-owned waiting is required, the current fiber must be
247
+ # non-blocking with an installed `Fiber.scheduler`.
248
+ def self.throttle(**options)
249
+ limiter = build_throttle_limiter(options)
250
+
251
+ new { |upstream| Pull.throttle(upstream, limiter.call) }
252
+ end
253
+
154
254
  # Creates a line-splitting flow.
155
255
  #
156
256
  # The flow accepts String chunks and emits lines split on "\n". By default
@@ -189,13 +289,41 @@ module FiberStream
189
289
  new { |upstream| Pull.split(upstream, separator, keep_separator, max_length) }
190
290
  end
191
291
 
192
- def self.validate_ractor_transfer_policy!(name, value)
193
- return if [:copy, :move].include?(value)
292
+ def self.build(&attach) # :nodoc:
293
+ new(&attach)
294
+ end
295
+
296
+ def self.build_throttle_limiter(options)
297
+ unknown_keywords = options.keys - [:rate, :per, :burst, :limiter]
298
+ raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" unless unknown_keywords.empty?
299
+
300
+ rate_given = options.key?(:rate)
301
+ per_given = options.key?(:per)
302
+ burst_given = options.key?(:burst)
303
+ limiter_given = options.key?(:limiter)
304
+
305
+ if limiter_given
306
+ raise ArgumentError, "cannot pass rate and limiter together" if rate_given
307
+ raise ArgumentError, "cannot pass per with limiter" if per_given
308
+ raise ArgumentError, "cannot pass burst with limiter" if burst_given
194
309
 
195
- raise ArgumentError, "#{name} must be :copy or :move"
310
+ limiter = options.fetch(:limiter)
311
+ raise TypeError, "limiter must respond to acquire" unless limiter.respond_to?(:acquire)
312
+
313
+ return -> { limiter }
314
+ end
315
+
316
+ raise ArgumentError, "missing rate or limiter" unless rate_given
317
+
318
+ rate = options.fetch(:rate)
319
+ per = options.fetch(:per, 1)
320
+ burst = options.fetch(:burst, nil)
321
+ RateLimiter.validate_options!(rate:, per:, burst:)
322
+
323
+ -> { RateLimiter.new(rate:, per:, burst:) }
196
324
  end
197
325
 
198
- private_class_method :validate_ractor_transfer_policy!
326
+ private_class_method :build_throttle_limiter
199
327
 
200
328
  # Returns a reusable flow that applies this flow and then `flow`.
201
329
  #
@@ -204,11 +332,11 @@ module FiberStream
204
332
  def via(flow)
205
333
  raise TypeError, "expected FiberStream::Flow" unless flow.is_a?(Flow)
206
334
 
207
- self.class.__send__(:new) do |upstream|
208
- attached_stream = attach(upstream)
335
+ self.class.build do |upstream|
336
+ attached_stream = attach_to(upstream)
209
337
 
210
338
  begin
211
- flow.__send__(:attach, attached_stream)
339
+ flow.attach_to(attached_stream)
212
340
  rescue StandardError
213
341
  begin
214
342
  attached_stream.close
@@ -228,13 +356,13 @@ module FiberStream
228
356
  def to(sink)
229
357
  raise TypeError, "expected FiberStream::Sink" unless sink.is_a?(Sink)
230
358
 
231
- Sink.__send__(:new) do |stream|
359
+ Sink.build do |stream|
232
360
  attached_stream = nil
233
361
  primary_error = nil
234
362
 
235
363
  begin
236
- attached_stream = attach(stream)
237
- sink.__send__(:run, attached_stream)
364
+ attached_stream = attach_to(stream)
365
+ sink.run_stream(attached_stream)
238
366
  rescue StandardError => error
239
367
  primary_error = error
240
368
  raise
@@ -254,9 +382,7 @@ module FiberStream
254
382
 
255
383
  private_class_method :new
256
384
 
257
- private
258
-
259
- def attach(upstream)
385
+ def attach_to(upstream) # :nodoc:
260
386
  @attach.call(upstream)
261
387
  end
262
388
  end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Internal # :nodoc:
5
+ module RactorTransferPolicy # :nodoc:
6
+ module_function
7
+
8
+ def validate!(name, value)
9
+ return if [:copy, :move].include?(value)
10
+
11
+ raise ArgumentError, "#{name} must be :copy or :move"
12
+ end
13
+ end
14
+ end
15
+
16
+ private_constant :Internal
17
+ end
@@ -2,6 +2,10 @@
2
2
 
3
3
  module FiberStream
4
4
  class Pipeline
5
+ def self.build(source, sink) # :nodoc:
6
+ new(source, sink)
7
+ end
8
+
5
9
  def initialize(source, sink)
6
10
  @source = source
7
11
  @sink = sink
@@ -27,7 +31,7 @@ module FiberStream
27
31
  def run_async
28
32
  validate_scheduler!
29
33
 
30
- RunningPipeline.__send__(:new, Fiber.scheduler) { run }
34
+ RunningPipeline.start(Fiber.scheduler) { run }
31
35
  end
32
36
 
33
37
  private_class_method :new
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Nil-dropping stage.
6
+ #
7
+ # A single downstream demand may pull multiple upstream elements until a
8
+ # non-nil value is observed or upstream completes. Dropped nil values are
9
+ # discarded immediately and are not buffered.
10
+ class Compact
11
+ def initialize(upstream)
12
+ @upstream = upstream
13
+ @closed = false
14
+ @done = false
15
+ end
16
+
17
+ def next
18
+ return DONE if @closed || @done
19
+
20
+ loop do
21
+ value = @upstream.next
22
+ if Pull.done?(value)
23
+ @done = true
24
+ return DONE
25
+ end
26
+
27
+ return value unless value.nil?
28
+ end
29
+ end
30
+
31
+ def close
32
+ return if @closed
33
+
34
+ @closed = true
35
+ @upstream.close
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Transform-and-filter stage.
6
+ #
7
+ # A single downstream demand may pull multiple upstream elements until the
8
+ # transform returns a truthy value or upstream completes. Falsey transform
9
+ # results are discarded immediately and are not buffered.
10
+ class FilterMap
11
+ def initialize(upstream, transform)
12
+ @upstream = upstream
13
+ @transform = transform
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ loop do
22
+ value = @upstream.next
23
+ if Pull.done?(value)
24
+ @done = true
25
+ return DONE
26
+ end
27
+
28
+ result = @transform.call(value)
29
+ return result if result
30
+ end
31
+ end
32
+
33
+ def close
34
+ return if @closed
35
+
36
+ @closed = true
37
+ @upstream.close
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # One-to-many mapping stage.
6
+ #
7
+ # It expands one upstream element into the values yielded by one returned
8
+ # `#each` object. Only one expansion is active at a time, and the stage
9
+ # never pulls the next upstream element until the active expansion is
10
+ # exhausted.
11
+ class MapConcat
12
+ def initialize(upstream, transform)
13
+ @upstream = upstream
14
+ @transform = transform
15
+ @current_enumerator = nil
16
+ @closed = false
17
+ @done = false
18
+ end
19
+
20
+ def next
21
+ return DONE if @closed || @done
22
+
23
+ loop do
24
+ if @current_enumerator
25
+ begin
26
+ return @current_enumerator.next
27
+ rescue StopIteration
28
+ @current_enumerator = nil
29
+ end
30
+ end
31
+
32
+ value = @upstream.next
33
+ if Pull.done?(value)
34
+ @done = true
35
+ return DONE
36
+ end
37
+
38
+ result = @transform.call(value)
39
+ unless result.respond_to?(:each)
40
+ raise TypeError, "map_concat block result must respond to each"
41
+ end
42
+
43
+ @current_enumerator = result.to_enum(:each)
44
+ end
45
+ end
46
+
47
+ def close
48
+ return if @closed
49
+
50
+ @closed = true
51
+ @current_enumerator = nil
52
+ @upstream.close
53
+ end
54
+ end
55
+ end
56
+ end