fiber_stream 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -0
- data/README.md +179 -61
- data/examples/README.md +6 -0
- data/examples/ractor_producer_sources.rb +43 -0
- data/lib/fiber_stream/flow.rb +141 -15
- data/lib/fiber_stream/internal/ractor_transfer_policy.rb +17 -0
- data/lib/fiber_stream/pipeline.rb +5 -1
- data/lib/fiber_stream/pull/compact.rb +39 -0
- data/lib/fiber_stream/pull/filter_map.rb +41 -0
- data/lib/fiber_stream/pull/map_concat.rb +56 -0
- data/lib/fiber_stream/pull/parallel_unordered_map_boundary.rb +311 -0
- data/lib/fiber_stream/pull/ractor_map_boundary.rb +50 -51
- data/lib/fiber_stream/pull/ractor_merge_ports_source.rb +18 -3
- data/lib/fiber_stream/pull/ractor_port_source.rb +39 -6
- data/lib/fiber_stream/pull/ractor_producer_source.rb +349 -0
- data/lib/fiber_stream/pull/reject.rb +40 -0
- data/lib/fiber_stream/pull/scan.rb +38 -0
- data/lib/fiber_stream/pull/tap.rb +38 -0
- data/lib/fiber_stream/pull/throttle.rb +43 -0
- data/lib/fiber_stream/pull.rb +84 -5
- data/lib/fiber_stream/ractor_producer.rb +167 -0
- data/lib/fiber_stream/rate_limiter.rb +163 -0
- data/lib/fiber_stream/running_pipeline.rb +4 -0
- data/lib/fiber_stream/sink.rb +25 -19
- data/lib/fiber_stream/source.rb +125 -22
- data/lib/fiber_stream/version.rb +1 -1
- data/lib/fiber_stream.rb +3 -0
- data/sig/fiber_stream.rbs +43 -1
- metadata +16 -3
data/lib/fiber_stream/flow.rb
CHANGED
|
@@ -13,6 +13,51 @@ module FiberStream
|
|
|
13
13
|
new { |upstream| Pull.map(upstream, block) }
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
+
# Creates a transform-and-filter flow.
|
|
17
|
+
#
|
|
18
|
+
# The block is called once for each upstream element observed by this
|
|
19
|
+
# stage. Truthy block results are emitted downstream as transformed values;
|
|
20
|
+
# false and nil results are dropped. Exceptions raised by the block fail the
|
|
21
|
+
# stream and are re-raised from `Source#run_with`.
|
|
22
|
+
def self.filter_map(&block)
|
|
23
|
+
raise ArgumentError, "missing block" unless block
|
|
24
|
+
|
|
25
|
+
new { |upstream| Pull.filter_map(upstream, block) }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Creates a nil-dropping flow.
|
|
29
|
+
#
|
|
30
|
+
# The flow drops `nil` elements and passes every non-`nil` element through
|
|
31
|
+
# unchanged, including `false`.
|
|
32
|
+
def self.compact
|
|
33
|
+
new { |upstream| Pull.compact(upstream) }
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Creates a one-to-many mapping flow.
|
|
37
|
+
#
|
|
38
|
+
# The block is called once for each upstream element whose expansion is
|
|
39
|
+
# needed. It must return an object that responds to `#each`; yielded values
|
|
40
|
+
# are emitted in order before the next upstream element is pulled.
|
|
41
|
+
# Exceptions raised by the block or by the returned object's `#each` fail
|
|
42
|
+
# the stream and are re-raised from `Source#run_with`.
|
|
43
|
+
def self.map_concat(&block)
|
|
44
|
+
raise ArgumentError, "missing block" unless block
|
|
45
|
+
|
|
46
|
+
new { |upstream| Pull.map_concat(upstream, block) }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Creates a pass-through observing flow.
|
|
50
|
+
#
|
|
51
|
+
# The block is called once for each element before that element is emitted
|
|
52
|
+
# downstream. The block return value is ignored and the original element is
|
|
53
|
+
# passed through unchanged. Exceptions raised by the block fail the stream
|
|
54
|
+
# and are re-raised from `Source#run_with`.
|
|
55
|
+
def self.tap(&block)
|
|
56
|
+
raise ArgumentError, "missing block" unless block
|
|
57
|
+
|
|
58
|
+
new { |upstream| Pull.tap(upstream, block) }
|
|
59
|
+
end
|
|
60
|
+
|
|
16
61
|
# Creates an ordered scheduler-backed parallel mapping flow.
|
|
17
62
|
#
|
|
18
63
|
# The stage starts internal scheduled fibers on first downstream demand and
|
|
@@ -30,6 +75,23 @@ module FiberStream
|
|
|
30
75
|
new { |upstream| Pull.parallel_map(upstream, concurrency, block) }
|
|
31
76
|
end
|
|
32
77
|
|
|
78
|
+
# Creates an unordered scheduler-backed parallel mapping flow.
|
|
79
|
+
#
|
|
80
|
+
# The stage starts internal scheduled fibers on first downstream demand and
|
|
81
|
+
# requires an installed `Fiber.scheduler` in a non-blocking fiber at that
|
|
82
|
+
# point. At most `concurrency` mapping blocks run at the same time, and at
|
|
83
|
+
# most `concurrency` upstream elements are pulled but not yet emitted downstream.
|
|
84
|
+
# Results are emitted in completion order and input order is not preserved.
|
|
85
|
+
# Closing the boundary closes upstream and requests internal worker
|
|
86
|
+
# cancellation. FiberStream does not depend on Async at runtime.
|
|
87
|
+
def self.parallel_unordered_map(concurrency:, &block)
|
|
88
|
+
raise ArgumentError, "missing block" unless block
|
|
89
|
+
raise TypeError, "concurrency must be an Integer" unless concurrency.is_a?(Integer)
|
|
90
|
+
raise ArgumentError, "concurrency must be positive" unless concurrency.positive?
|
|
91
|
+
|
|
92
|
+
new { |upstream| Pull.parallel_unordered_map(upstream, concurrency, block) }
|
|
93
|
+
end
|
|
94
|
+
|
|
33
95
|
# Creates an ordered Ractor-backed mapping flow.
|
|
34
96
|
#
|
|
35
97
|
# The mapper runs inside worker ractors and must be shareable, typically
|
|
@@ -42,8 +104,8 @@ module FiberStream
|
|
|
42
104
|
raise TypeError, "workers must be an Integer" unless workers.is_a?(Integer)
|
|
43
105
|
raise ArgumentError, "workers must be positive" unless workers.positive?
|
|
44
106
|
|
|
45
|
-
|
|
46
|
-
|
|
107
|
+
Internal::RactorTransferPolicy.validate!(:input_transfer, input_transfer)
|
|
108
|
+
Internal::RactorTransferPolicy.validate!(:output_transfer, output_transfer)
|
|
47
109
|
raise TypeError, "block must be shareable" unless Ractor.shareable?(block)
|
|
48
110
|
|
|
49
111
|
new { |upstream| Pull.ractor_map(upstream, workers, input_transfer, output_transfer, block) }
|
|
@@ -61,6 +123,19 @@ module FiberStream
|
|
|
61
123
|
new { |upstream| Pull.select(upstream, block) }
|
|
62
124
|
end
|
|
63
125
|
|
|
126
|
+
# Creates a complement filtering flow.
|
|
127
|
+
#
|
|
128
|
+
# The block is called for upstream elements until it returns `false` or
|
|
129
|
+
# `nil`, or upstream completes. Truthy predicate results drop the original
|
|
130
|
+
# element; false and nil results pass the element through unchanged.
|
|
131
|
+
# Exceptions raised by the block fail the stream and are re-raised from
|
|
132
|
+
# `Source#run_with`.
|
|
133
|
+
def self.reject(&block)
|
|
134
|
+
raise ArgumentError, "missing block" unless block
|
|
135
|
+
|
|
136
|
+
new { |upstream| Pull.reject(upstream, block) }
|
|
137
|
+
end
|
|
138
|
+
|
|
64
139
|
# Creates a limiting flow.
|
|
65
140
|
#
|
|
66
141
|
# The flow emits at most `count` elements. `take(0)` completes without
|
|
@@ -100,6 +175,18 @@ module FiberStream
|
|
|
100
175
|
new { |upstream| Pull.grouped(upstream, count) }
|
|
101
176
|
end
|
|
102
177
|
|
|
178
|
+
# Creates a running-accumulator flow.
|
|
179
|
+
#
|
|
180
|
+
# The block is called as `block.call(accumulator, element)` for each
|
|
181
|
+
# upstream element, matching `Sink.fold`. The block result becomes the new
|
|
182
|
+
# accumulator and is emitted downstream. The initial accumulator is not
|
|
183
|
+
# emitted before the first upstream element.
|
|
184
|
+
def self.scan(initial, &block)
|
|
185
|
+
raise ArgumentError, "missing block" unless block
|
|
186
|
+
|
|
187
|
+
new { |upstream| Pull.scan(upstream, initial, block) }
|
|
188
|
+
end
|
|
189
|
+
|
|
103
190
|
# Creates a predicate-based limiting flow.
|
|
104
191
|
#
|
|
105
192
|
# The flow emits leading elements while the block result is truthy. The
|
|
@@ -151,6 +238,19 @@ module FiberStream
|
|
|
151
238
|
new { |upstream| Pull.buffer(upstream, count) }
|
|
152
239
|
end
|
|
153
240
|
|
|
241
|
+
# Creates a scheduler-aware throttling flow.
|
|
242
|
+
#
|
|
243
|
+
# The `rate:` form creates a fresh `RateLimiter` for each materialization.
|
|
244
|
+
# The `limiter:` form uses the supplied limiter object, which must respond
|
|
245
|
+
# to `acquire(permits:)` and return only after permits are acquired. When
|
|
246
|
+
# FiberStream-owned waiting is required, the current fiber must be
|
|
247
|
+
# non-blocking with an installed `Fiber.scheduler`.
|
|
248
|
+
def self.throttle(**options)
|
|
249
|
+
limiter = build_throttle_limiter(options)
|
|
250
|
+
|
|
251
|
+
new { |upstream| Pull.throttle(upstream, limiter.call) }
|
|
252
|
+
end
|
|
253
|
+
|
|
154
254
|
# Creates a line-splitting flow.
|
|
155
255
|
#
|
|
156
256
|
# The flow accepts String chunks and emits lines split on "\n". By default
|
|
@@ -189,13 +289,41 @@ module FiberStream
|
|
|
189
289
|
new { |upstream| Pull.split(upstream, separator, keep_separator, max_length) }
|
|
190
290
|
end
|
|
191
291
|
|
|
192
|
-
def self.
|
|
193
|
-
|
|
292
|
+
def self.build(&attach) # :nodoc:
|
|
293
|
+
new(&attach)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def self.build_throttle_limiter(options)
|
|
297
|
+
unknown_keywords = options.keys - [:rate, :per, :burst, :limiter]
|
|
298
|
+
raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" unless unknown_keywords.empty?
|
|
299
|
+
|
|
300
|
+
rate_given = options.key?(:rate)
|
|
301
|
+
per_given = options.key?(:per)
|
|
302
|
+
burst_given = options.key?(:burst)
|
|
303
|
+
limiter_given = options.key?(:limiter)
|
|
304
|
+
|
|
305
|
+
if limiter_given
|
|
306
|
+
raise ArgumentError, "cannot pass rate and limiter together" if rate_given
|
|
307
|
+
raise ArgumentError, "cannot pass per with limiter" if per_given
|
|
308
|
+
raise ArgumentError, "cannot pass burst with limiter" if burst_given
|
|
194
309
|
|
|
195
|
-
|
|
310
|
+
limiter = options.fetch(:limiter)
|
|
311
|
+
raise TypeError, "limiter must respond to acquire" unless limiter.respond_to?(:acquire)
|
|
312
|
+
|
|
313
|
+
return -> { limiter }
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
raise ArgumentError, "missing rate or limiter" unless rate_given
|
|
317
|
+
|
|
318
|
+
rate = options.fetch(:rate)
|
|
319
|
+
per = options.fetch(:per, 1)
|
|
320
|
+
burst = options.fetch(:burst, nil)
|
|
321
|
+
RateLimiter.validate_options!(rate:, per:, burst:)
|
|
322
|
+
|
|
323
|
+
-> { RateLimiter.new(rate:, per:, burst:) }
|
|
196
324
|
end
|
|
197
325
|
|
|
198
|
-
private_class_method :
|
|
326
|
+
private_class_method :build_throttle_limiter
|
|
199
327
|
|
|
200
328
|
# Returns a reusable flow that applies this flow and then `flow`.
|
|
201
329
|
#
|
|
@@ -204,11 +332,11 @@ module FiberStream
|
|
|
204
332
|
def via(flow)
|
|
205
333
|
raise TypeError, "expected FiberStream::Flow" unless flow.is_a?(Flow)
|
|
206
334
|
|
|
207
|
-
self.class.
|
|
208
|
-
attached_stream =
|
|
335
|
+
self.class.build do |upstream|
|
|
336
|
+
attached_stream = attach_to(upstream)
|
|
209
337
|
|
|
210
338
|
begin
|
|
211
|
-
flow.
|
|
339
|
+
flow.attach_to(attached_stream)
|
|
212
340
|
rescue StandardError
|
|
213
341
|
begin
|
|
214
342
|
attached_stream.close
|
|
@@ -228,13 +356,13 @@ module FiberStream
|
|
|
228
356
|
def to(sink)
|
|
229
357
|
raise TypeError, "expected FiberStream::Sink" unless sink.is_a?(Sink)
|
|
230
358
|
|
|
231
|
-
Sink.
|
|
359
|
+
Sink.build do |stream|
|
|
232
360
|
attached_stream = nil
|
|
233
361
|
primary_error = nil
|
|
234
362
|
|
|
235
363
|
begin
|
|
236
|
-
attached_stream =
|
|
237
|
-
sink.
|
|
364
|
+
attached_stream = attach_to(stream)
|
|
365
|
+
sink.run_stream(attached_stream)
|
|
238
366
|
rescue StandardError => error
|
|
239
367
|
primary_error = error
|
|
240
368
|
raise
|
|
@@ -254,9 +382,7 @@ module FiberStream
|
|
|
254
382
|
|
|
255
383
|
private_class_method :new
|
|
256
384
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
def attach(upstream)
|
|
385
|
+
def attach_to(upstream) # :nodoc:
|
|
260
386
|
@attach.call(upstream)
|
|
261
387
|
end
|
|
262
388
|
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FiberStream
|
|
4
|
+
module Internal # :nodoc:
|
|
5
|
+
module RactorTransferPolicy # :nodoc:
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
def validate!(name, value)
|
|
9
|
+
return if [:copy, :move].include?(value)
|
|
10
|
+
|
|
11
|
+
raise ArgumentError, "#{name} must be :copy or :move"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
private_constant :Internal
|
|
17
|
+
end
|
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
module FiberStream
|
|
4
4
|
class Pipeline
|
|
5
|
+
def self.build(source, sink) # :nodoc:
|
|
6
|
+
new(source, sink)
|
|
7
|
+
end
|
|
8
|
+
|
|
5
9
|
def initialize(source, sink)
|
|
6
10
|
@source = source
|
|
7
11
|
@sink = sink
|
|
@@ -27,7 +31,7 @@ module FiberStream
|
|
|
27
31
|
def run_async
|
|
28
32
|
validate_scheduler!
|
|
29
33
|
|
|
30
|
-
RunningPipeline.
|
|
34
|
+
RunningPipeline.start(Fiber.scheduler) { run }
|
|
31
35
|
end
|
|
32
36
|
|
|
33
37
|
private_class_method :new
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FiberStream
|
|
4
|
+
module Pull
|
|
5
|
+
# Nil-dropping stage.
|
|
6
|
+
#
|
|
7
|
+
# A single downstream demand may pull multiple upstream elements until a
|
|
8
|
+
# non-nil value is observed or upstream completes. Dropped nil values are
|
|
9
|
+
# discarded immediately and are not buffered.
|
|
10
|
+
class Compact
|
|
11
|
+
def initialize(upstream)
|
|
12
|
+
@upstream = upstream
|
|
13
|
+
@closed = false
|
|
14
|
+
@done = false
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def next
|
|
18
|
+
return DONE if @closed || @done
|
|
19
|
+
|
|
20
|
+
loop do
|
|
21
|
+
value = @upstream.next
|
|
22
|
+
if Pull.done?(value)
|
|
23
|
+
@done = true
|
|
24
|
+
return DONE
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
return value unless value.nil?
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def close
|
|
32
|
+
return if @closed
|
|
33
|
+
|
|
34
|
+
@closed = true
|
|
35
|
+
@upstream.close
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FiberStream
|
|
4
|
+
module Pull
|
|
5
|
+
# Transform-and-filter stage.
|
|
6
|
+
#
|
|
7
|
+
# A single downstream demand may pull multiple upstream elements until the
|
|
8
|
+
# transform returns a truthy value or upstream completes. Falsey transform
|
|
9
|
+
# results are discarded immediately and are not buffered.
|
|
10
|
+
class FilterMap
|
|
11
|
+
def initialize(upstream, transform)
|
|
12
|
+
@upstream = upstream
|
|
13
|
+
@transform = transform
|
|
14
|
+
@closed = false
|
|
15
|
+
@done = false
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def next
|
|
19
|
+
return DONE if @closed || @done
|
|
20
|
+
|
|
21
|
+
loop do
|
|
22
|
+
value = @upstream.next
|
|
23
|
+
if Pull.done?(value)
|
|
24
|
+
@done = true
|
|
25
|
+
return DONE
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
result = @transform.call(value)
|
|
29
|
+
return result if result
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def close
|
|
34
|
+
return if @closed
|
|
35
|
+
|
|
36
|
+
@closed = true
|
|
37
|
+
@upstream.close
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FiberStream
|
|
4
|
+
module Pull
|
|
5
|
+
# One-to-many mapping stage.
|
|
6
|
+
#
|
|
7
|
+
# It expands one upstream element into the values yielded by one returned
|
|
8
|
+
# `#each` object. Only one expansion is active at a time, and the stage
|
|
9
|
+
# never pulls the next upstream element until the active expansion is
|
|
10
|
+
# exhausted.
|
|
11
|
+
class MapConcat
|
|
12
|
+
def initialize(upstream, transform)
|
|
13
|
+
@upstream = upstream
|
|
14
|
+
@transform = transform
|
|
15
|
+
@current_enumerator = nil
|
|
16
|
+
@closed = false
|
|
17
|
+
@done = false
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def next
|
|
21
|
+
return DONE if @closed || @done
|
|
22
|
+
|
|
23
|
+
loop do
|
|
24
|
+
if @current_enumerator
|
|
25
|
+
begin
|
|
26
|
+
return @current_enumerator.next
|
|
27
|
+
rescue StopIteration
|
|
28
|
+
@current_enumerator = nil
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
value = @upstream.next
|
|
33
|
+
if Pull.done?(value)
|
|
34
|
+
@done = true
|
|
35
|
+
return DONE
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
result = @transform.call(value)
|
|
39
|
+
unless result.respond_to?(:each)
|
|
40
|
+
raise TypeError, "map_concat block result must respond to each"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
@current_enumerator = result.to_enum(:each)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def close
|
|
48
|
+
return if @closed
|
|
49
|
+
|
|
50
|
+
@closed = true
|
|
51
|
+
@current_enumerator = nil
|
|
52
|
+
@upstream.close
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|