fiber_stream 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/README.md +87 -56
- data/examples/README.md +6 -0
- data/examples/ractor_producer_sources.rb +43 -0
- data/lib/fiber_stream/flow.rb +40 -17
- data/lib/fiber_stream/internal/ractor_transfer_policy.rb +17 -0
- data/lib/fiber_stream/pipeline.rb +5 -1
- data/lib/fiber_stream/pull/parallel_unordered_map_boundary.rb +311 -0
- data/lib/fiber_stream/pull/ractor_map_boundary.rb +50 -51
- data/lib/fiber_stream/pull/ractor_merge_ports_source.rb +18 -3
- data/lib/fiber_stream/pull/ractor_port_source.rb +39 -6
- data/lib/fiber_stream/pull/ractor_producer_source.rb +349 -0
- data/lib/fiber_stream/pull/scan.rb +38 -0
- data/lib/fiber_stream/pull.rb +54 -5
- data/lib/fiber_stream/ractor_producer.rb +167 -0
- data/lib/fiber_stream/running_pipeline.rb +4 -0
- data/lib/fiber_stream/sink.rb +9 -19
- data/lib/fiber_stream/source.rb +78 -22
- data/lib/fiber_stream/version.rb +1 -1
- data/lib/fiber_stream.rb +2 -0
- data/sig/fiber_stream.rbs +18 -1
- metadata +9 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1cc93666d0610e659313a12dc756fca935579e62711972a9ea65d9ad818f6020
|
|
4
|
+
data.tar.gz: 97f315765ba573a5c047752fd083006db89404eff06e92bda5afbfa0f1933ed1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5e635531f9e34510ef0eab76254c33e70f36124be779f48d2f9692c351a2f26ed36c7c14fa0d1d596c5645b511bb4fa1abe2bb773fc434c1f4da39a6e19dbefc
|
|
7
|
+
data.tar.gz: 67eb50ae0a1d727c65fd172be572f7bf08ba82b3ee0cf4d9094c7e9ad579aac327155cc772bcd4e94b778cef53f63decf64a08e12e7ea5ef6949a6d813336b04
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,28 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.4.0 - 2026-06-09
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- `Flow.parallel_unordered_map(concurrency:)` and
|
|
8
|
+
`Source#parallel_unordered_map(concurrency:)` for scheduler-backed mapping
|
|
9
|
+
that emits results in completion order instead of preserving input order.
|
|
10
|
+
- `Source.ractor_producer` for FiberStream-owned single producer Ractors with
|
|
11
|
+
one-outstanding-ack backpressure and cooperative cleanup.
|
|
12
|
+
- `Source.ractor_merge_producers` for ready-order fan-in from multiple
|
|
13
|
+
FiberStream-owned producer Ractors without requiring a `Fiber.scheduler`.
|
|
14
|
+
- `Flow.scan(initial)` and `Source#scan(initial)` for lazy running
|
|
15
|
+
accumulators using `Sink.fold`-style reducer semantics.
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
|
|
19
|
+
- Updated README and website reference coverage for owned Ractor producers,
|
|
20
|
+
unordered parallel mapping, and scan.
|
|
21
|
+
- Prefer high-level owned Ractor producer examples in user-facing
|
|
22
|
+
documentation while keeping low-level port APIs documented for externally
|
|
23
|
+
owned producers.
|
|
24
|
+
- Updated the project Ruby pin to 4.0.5.
|
|
25
|
+
|
|
3
26
|
## 0.3.0 - 2026-06-06
|
|
4
27
|
|
|
5
28
|
### Added
|
data/README.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# FiberStream
|
|
2
|
+
FiberStream is a Ruby library for linear stream processing with pull-based backpressure.
|
|
2
3
|
|
|
3
|
-
|
|
4
|
-
processing with backpressure.
|
|
4
|
+
It builds lazy Source definitions, transforms values with Flow stages, and materializes results with Sink objects.
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
a `Sink`.
|
|
6
|
+
[](https://badge.fury.io/rb/fiber_stream)
|
|
8
7
|
|
|
9
8
|
## Quick Start
|
|
9
|
+
Please see the project [documentation](https://dakatsuka.github.io/fiber_stream/) for more details.
|
|
10
10
|
|
|
11
11
|
```ruby
|
|
12
12
|
require "fiber_stream"
|
|
@@ -27,11 +27,13 @@ FiberStream currently supports linear pipelines only.
|
|
|
27
27
|
|
|
28
28
|
Implemented capabilities:
|
|
29
29
|
|
|
30
|
-
- in-memory, IO,
|
|
30
|
+
- in-memory, IO, FiberStream-owned Ractor producer, backpressure-aware Ractor
|
|
31
|
+
port, and Ractor port merge sources
|
|
31
32
|
- lazy source concatenation, zipping, and scheduler-backed merging
|
|
32
33
|
- mapping, filtering, limiting, predicate-based limiting and dropping,
|
|
33
34
|
fixed-prefix dropping, fixed-size grouping, line splitting, buffering, async
|
|
34
|
-
boundaries, ordered parallel mapping, and ordered
|
|
35
|
+
boundaries, ordered and unordered parallel mapping, and ordered
|
|
36
|
+
Ractor-backed mapping
|
|
35
37
|
- array, first-element, fold, foreach, and IO sinks
|
|
36
38
|
- reusable flow composition and runnable pipelines
|
|
37
39
|
- foreground and scheduler-backed background pipeline execution
|
|
@@ -96,64 +98,51 @@ chunks =
|
|
|
96
98
|
end.wait
|
|
97
99
|
```
|
|
98
100
|
|
|
99
|
-
Ractor
|
|
100
|
-
The producer
|
|
101
|
-
|
|
101
|
+
Owned Ractor producer sources run producer blocks in FiberStream-managed
|
|
102
|
+
Ractors. The producer block receives a `RactorProducer` context and emits one
|
|
103
|
+
value per downstream demand:
|
|
102
104
|
|
|
103
105
|
```ruby
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
producer
|
|
108
|
-
Ractor.new(data_port, setup_port) do |outbox, setup|
|
|
109
|
-
ack_port = Ractor::Port.new
|
|
110
|
-
setup.send(ack_port)
|
|
111
|
-
|
|
112
|
-
values = [1, 2, 3].to_enum
|
|
113
|
-
|
|
114
|
-
loop do
|
|
115
|
-
case ack_port.receive
|
|
116
|
-
in FiberStream::RactorPort::Ack
|
|
117
|
-
begin
|
|
118
|
-
outbox.send(FiberStream::RactorPort::Element.new(values.next))
|
|
119
|
-
rescue StopIteration
|
|
120
|
-
outbox.send(FiberStream::RactorPort::Complete.new)
|
|
121
|
-
break
|
|
122
|
-
end
|
|
123
|
-
in FiberStream::RactorPort::Cancel
|
|
124
|
-
break
|
|
125
|
-
end
|
|
106
|
+
PRODUCE_VALUES =
|
|
107
|
+
Ractor.shareable_proc do |producer, values|
|
|
108
|
+
values.each do |value|
|
|
109
|
+
break unless producer.emit(value)
|
|
126
110
|
end
|
|
127
111
|
end
|
|
128
112
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
FiberStream::Source.ractor_port(data_port, ack_port: ack_port)
|
|
113
|
+
FiberStream::Source.ractor_producer([1, 2, 3], &PRODUCE_VALUES)
|
|
132
114
|
.run_with(FiberStream::Sink.to_a)
|
|
133
115
|
# => [1, 2, 3]
|
|
134
|
-
|
|
135
|
-
producer.value
|
|
136
116
|
```
|
|
137
117
|
|
|
138
|
-
|
|
139
|
-
`
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
Multiple producer Ractors can be merged directly without a scheduler-backed
|
|
143
|
-
`Source#merge`. Each producer still receives at most one outstanding ack:
|
|
118
|
+
Multiple owned producer Ractors can be merged directly without a
|
|
119
|
+
scheduler-backed `Source#merge`. Each producer still receives at most one
|
|
120
|
+
outstanding ack:
|
|
144
121
|
|
|
145
122
|
```ruby
|
|
123
|
+
PRODUCE_TAGGED_VALUES =
|
|
124
|
+
Ractor.shareable_proc do |producer, tag, values|
|
|
125
|
+
values.each do |value|
|
|
126
|
+
break unless producer.emit([tag, value])
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
146
130
|
source =
|
|
147
|
-
FiberStream::Source.
|
|
148
|
-
[
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
]
|
|
152
|
-
)
|
|
131
|
+
FiberStream::Source.ractor_merge_producers do |group|
|
|
132
|
+
group.producer(:a, [1, 2], &PRODUCE_TAGGED_VALUES)
|
|
133
|
+
group.producer(:b, [3, 4], &PRODUCE_TAGGED_VALUES)
|
|
134
|
+
end
|
|
153
135
|
|
|
154
|
-
|
|
136
|
+
source.run_with(FiberStream::Sink.to_a)
|
|
137
|
+
# Example result: [[:a, 1], [:b, 3], [:a, 2], [:b, 4]]
|
|
155
138
|
```
|
|
156
139
|
|
|
140
|
+
Use the lower-level `Source.ractor_port` and `Source.ractor_merge_ports` APIs
|
|
141
|
+
when producer Ractors are owned outside FiberStream or need custom lifecycle
|
|
142
|
+
handling. `RactorPort::Failure` cause metadata is producer-provided and is
|
|
143
|
+
surfaced on `RactorPortSourceError`; redact sensitive details before sending
|
|
144
|
+
failures across trust boundaries.
|
|
145
|
+
|
|
157
146
|
Streaming HTTP response bodies that implement `#each`, such as
|
|
158
147
|
`async-http` response bodies, can be used with `Source.each` without buffering
|
|
159
148
|
the full body first. Use the HTTP client's block form or an explicit `ensure`
|
|
@@ -240,6 +229,26 @@ profiles =
|
|
|
240
229
|
profiles.map { |profile| profile.fetch(:id) } # => [1, 2, 3, 4]
|
|
241
230
|
```
|
|
242
231
|
|
|
232
|
+
Use `parallel_unordered_map` when every result can be handled independently
|
|
233
|
+
and lower head-of-line blocking matters more than input order. It still limits
|
|
234
|
+
in-flight mapping work to `concurrency`, but emits values as mapping jobs
|
|
235
|
+
finish:
|
|
236
|
+
|
|
237
|
+
```ruby
|
|
238
|
+
require "async"
|
|
239
|
+
require "fiber_stream"
|
|
240
|
+
|
|
241
|
+
responses =
|
|
242
|
+
Sync do
|
|
243
|
+
FiberStream::Source.each(["/a", "/slow", "/b"])
|
|
244
|
+
.parallel_unordered_map(concurrency: 3) { |path| fetch_path(path) }
|
|
245
|
+
.run_with(FiberStream::Sink.to_a)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Results are in completion order, not necessarily input order.
|
|
249
|
+
responses
|
|
250
|
+
```
|
|
251
|
+
|
|
243
252
|
Use `ractor_map` for ordered CPU-bound mapping in Ractor workers. The mapper
|
|
244
253
|
must be shareable, usually by creating it with `Ractor.shareable_proc`.
|
|
245
254
|
|
|
@@ -384,6 +393,17 @@ batches =
|
|
|
384
393
|
batches # => [[1, 2], [3, 4], [5]]
|
|
385
394
|
```
|
|
386
395
|
|
|
396
|
+
`Flow.scan` emits the updated accumulator for each upstream element:
|
|
397
|
+
|
|
398
|
+
```ruby
|
|
399
|
+
running_totals =
|
|
400
|
+
FiberStream::Source.each([1, 2, 3, 4])
|
|
401
|
+
.scan(0) { |sum, number| sum + number }
|
|
402
|
+
.run_with(FiberStream::Sink.to_a)
|
|
403
|
+
|
|
404
|
+
running_totals # => [1, 3, 6, 10]
|
|
405
|
+
```
|
|
406
|
+
|
|
387
407
|
`Flow.take_while` emits the leading prefix while a predicate is truthy, then
|
|
388
408
|
closes upstream at the first false or nil result:
|
|
389
409
|
|
|
@@ -450,14 +470,15 @@ merged =
|
|
|
450
470
|
|
|
451
471
|
`merge` does not make scheduler-unaware blocking source work non-blocking and
|
|
452
472
|
does not provide CPU parallelism. Use producer ractors with
|
|
453
|
-
`Source.
|
|
454
|
-
true isolation.
|
|
473
|
+
`Source.ractor_producer` or `Source.ractor_merge_producers` when producer work
|
|
474
|
+
needs true isolation.
|
|
455
475
|
|
|
456
476
|
`Flow.buffer(count)` allows bounded prefetch. `Flow.async`, `Flow.buffer`,
|
|
457
|
-
`Flow.parallel_map`, `
|
|
458
|
-
`Pipeline#run_async` require an installed
|
|
459
|
-
current fiber when demanded or started.
|
|
460
|
-
scheduler and does not depend on Async at
|
|
477
|
+
`Flow.parallel_map`, `Flow.parallel_unordered_map`, `Source.io`,
|
|
478
|
+
`Source#merge`, `Sink.io`, and `Pipeline#run_async` require an installed
|
|
479
|
+
`Fiber.scheduler` and a non-blocking current fiber when demanded or started.
|
|
480
|
+
FiberStream does not install a scheduler and does not depend on Async at
|
|
481
|
+
runtime.
|
|
461
482
|
|
|
462
483
|
## API Surface
|
|
463
484
|
|
|
@@ -465,6 +486,8 @@ Sources:
|
|
|
465
486
|
|
|
466
487
|
- `FiberStream::Source.each(enumerable)`
|
|
467
488
|
- `FiberStream::Source.io(io, chunk_size: 16 * 1024, close: false)`
|
|
489
|
+
- `FiberStream::Source.ractor_producer(*args, transfer: :copy, ack_transfer: :copy) { |producer, *args| ... }`
|
|
490
|
+
- `FiberStream::Source.ractor_merge_producers(transfer: :copy, ack_transfer: :copy) { |group| ... }`
|
|
468
491
|
- `FiberStream::Source.ractor_port(port, ack_port:, ack_transfer: :copy, cancel: true)`
|
|
469
492
|
- `FiberStream::Source.ractor_merge_ports(ports, ack_transfer: :copy, cancel: true)`
|
|
470
493
|
|
|
@@ -476,11 +499,13 @@ Source convenience methods:
|
|
|
476
499
|
- `Source#merge(source)`
|
|
477
500
|
- `Source#map { |element| ... }`
|
|
478
501
|
- `Source#parallel_map(concurrency:) { |element| ... }`
|
|
502
|
+
- `Source#parallel_unordered_map(concurrency:) { |element| ... }`
|
|
479
503
|
- `Source#ractor_map(workers:, input_transfer: :copy, output_transfer: :copy) { |element| ... }`
|
|
480
504
|
- `Source#select { |element| ... }`
|
|
481
505
|
- `Source#take(count)`
|
|
482
506
|
- `Source#drop(count)`
|
|
483
507
|
- `Source#grouped(count)`
|
|
508
|
+
- `Source#scan(initial) { |accumulator, element| ... }`
|
|
484
509
|
- `Source#take_while { |element| ... }`
|
|
485
510
|
- `Source#drop_while { |element| ... }`
|
|
486
511
|
- `Source#async`
|
|
@@ -494,11 +519,13 @@ Flows:
|
|
|
494
519
|
|
|
495
520
|
- `FiberStream::Flow.map { |element| ... }`
|
|
496
521
|
- `FiberStream::Flow.parallel_map(concurrency:) { |element| ... }`
|
|
522
|
+
- `FiberStream::Flow.parallel_unordered_map(concurrency:) { |element| ... }`
|
|
497
523
|
- `FiberStream::Flow.ractor_map(workers:, input_transfer: :copy, output_transfer: :copy) { |element| ... }`
|
|
498
524
|
- `FiberStream::Flow.select { |element| ... }`
|
|
499
525
|
- `FiberStream::Flow.take(count)`
|
|
500
526
|
- `FiberStream::Flow.drop(count)`
|
|
501
527
|
- `FiberStream::Flow.grouped(count)`
|
|
528
|
+
- `FiberStream::Flow.scan(initial) { |accumulator, element| ... }`
|
|
502
529
|
- `FiberStream::Flow.take_while { |element| ... }`
|
|
503
530
|
- `FiberStream::Flow.drop_while { |element| ... }`
|
|
504
531
|
- `FiberStream::Flow.async`
|
|
@@ -541,6 +568,7 @@ bundle exec ruby examples/file_copy.rb
|
|
|
541
568
|
bundle exec ruby examples/backpressure_buffer.rb
|
|
542
569
|
bundle exec ruby examples/background_execution.rb
|
|
543
570
|
bundle exec ruby examples/ractor_map_hashing.rb
|
|
571
|
+
bundle exec ruby examples/ractor_producer_sources.rb
|
|
544
572
|
bundle exec ruby examples/ractor_port_source.rb
|
|
545
573
|
bundle exec ruby examples/ractor_merge_ports_and_map.rb
|
|
546
574
|
bundle exec ruby examples/async_http_requests.rb
|
|
@@ -553,6 +581,9 @@ events so the difference between direct demand and bounded prefetch is visible.
|
|
|
553
581
|
`examples/ractor_map_hashing.rb` demonstrates ordered Ractor-backed hashing
|
|
554
582
|
with a shareable mapper proc and `input_transfer: :move`.
|
|
555
583
|
|
|
584
|
+
`examples/ractor_producer_sources.rb` demonstrates high-level owned producer
|
|
585
|
+
Ractors with `Source.ractor_producer` and `Source.ractor_merge_producers`.
|
|
586
|
+
|
|
556
587
|
`examples/ractor_port_source.rb` demonstrates a producer Ractor that waits for
|
|
557
588
|
`RactorPort::Ack` before sending each `RactorPort::Element`.
|
|
558
589
|
|
|
@@ -578,7 +609,7 @@ bundle exec ruby benchmarks/heavy_cpu_map.rb
|
|
|
578
609
|
|
|
579
610
|
## Development
|
|
580
611
|
|
|
581
|
-
This project targets Ruby 4.x. The repository currently pins Ruby 4.0.
|
|
612
|
+
This project targets Ruby 4.x. The repository currently pins Ruby 4.0.5 in
|
|
582
613
|
`mise.toml`.
|
|
583
614
|
|
|
584
615
|
Install dependencies:
|
data/examples/README.md
CHANGED
|
@@ -11,6 +11,7 @@ bundle exec ruby examples/backpressure_buffer.rb
|
|
|
11
11
|
bundle exec ruby examples/background_execution.rb
|
|
12
12
|
bundle exec ruby examples/ractor_map_hashing.rb
|
|
13
13
|
bundle exec ruby examples/ractor_port_source.rb
|
|
14
|
+
bundle exec ruby examples/ractor_producer_sources.rb
|
|
14
15
|
bundle exec ruby examples/ractor_merge_ports_and_map.rb
|
|
15
16
|
bundle exec ruby examples/async_http_requests.rb
|
|
16
17
|
bundle exec ruby examples/async_http_streaming_body.rb
|
|
@@ -48,6 +49,11 @@ pipeline runs.
|
|
|
48
49
|
`RactorPort::Ack`, and sends one typed `RactorPort::Element` per downstream
|
|
49
50
|
demand.
|
|
50
51
|
|
|
52
|
+
`ractor_producer_sources.rb` demonstrates the high-level owned-producer APIs:
|
|
53
|
+
`Source.ractor_producer` for one producer and `Source.ractor_merge_producers`
|
|
54
|
+
for ready-order fan-in from multiple producers. FiberStream creates the ports,
|
|
55
|
+
producer Ractors, and cooperative cleanup path.
|
|
56
|
+
|
|
51
57
|
`ractor_merge_ports_and_map.rb` runs CPU-bound work in multiple producer
|
|
52
58
|
Ractors, merges their port outputs with `Source.ractor_merge_ports`, then runs
|
|
53
59
|
another CPU-bound verification stage with `ractor_map`.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
|
|
4
|
+
|
|
5
|
+
require "fiber_stream"
|
|
6
|
+
|
|
7
|
+
EMIT_NUMBERS =
|
|
8
|
+
Ractor.shareable_proc do |producer, range|
|
|
9
|
+
range.each do |number|
|
|
10
|
+
break unless producer.emit(number)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
EMIT_TAGGED_NUMBERS =
|
|
15
|
+
Ractor.shareable_proc do |producer, tag, range|
|
|
16
|
+
range.each do |number|
|
|
17
|
+
break unless producer.emit([tag, number])
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
squares =
|
|
22
|
+
FiberStream::Source.ractor_producer(1..5, &EMIT_NUMBERS)
|
|
23
|
+
.map { |number| number * number }
|
|
24
|
+
.run_with(FiberStream::Sink.to_a)
|
|
25
|
+
|
|
26
|
+
puts "Squares from one FiberStream-owned producer Ractor:"
|
|
27
|
+
puts squares.join(", ")
|
|
28
|
+
|
|
29
|
+
merged =
|
|
30
|
+
FiberStream::Source.ractor_merge_producers do |group|
|
|
31
|
+
group.producer(:low, 1..3, &EMIT_TAGGED_NUMBERS)
|
|
32
|
+
group.producer(:high, 10..12, &EMIT_TAGGED_NUMBERS)
|
|
33
|
+
end.run_with(FiberStream::Sink.to_a)
|
|
34
|
+
|
|
35
|
+
puts
|
|
36
|
+
puts "Merged values from two owned producer Ractors:"
|
|
37
|
+
merged.each do |tag, number|
|
|
38
|
+
puts "- #{tag}: #{number}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
puts
|
|
42
|
+
puts "Producer blocks use RactorProducer#emit and stop when it returns false."
|
|
43
|
+
puts "FiberStream creates the data ports, ack ports, producer Ractors, and cleanup path."
|
data/lib/fiber_stream/flow.rb
CHANGED
|
@@ -30,6 +30,23 @@ module FiberStream
|
|
|
30
30
|
new { |upstream| Pull.parallel_map(upstream, concurrency, block) }
|
|
31
31
|
end
|
|
32
32
|
|
|
33
|
+
# Creates an unordered scheduler-backed parallel mapping flow.
|
|
34
|
+
#
|
|
35
|
+
# The stage starts internal scheduled fibers on first downstream demand and
|
|
36
|
+
# requires an installed `Fiber.scheduler` in a non-blocking fiber at that
|
|
37
|
+
# point. At most `concurrency` mapping blocks run at the same time, and at
|
|
38
|
+
# most `concurrency` upstream elements are pulled but not yet emitted downstream.
|
|
39
|
+
# Results are emitted in completion order and input order is not preserved.
|
|
40
|
+
# Closing the boundary closes upstream and requests internal worker
|
|
41
|
+
# cancellation. FiberStream does not depend on Async at runtime.
|
|
42
|
+
def self.parallel_unordered_map(concurrency:, &block)
|
|
43
|
+
raise ArgumentError, "missing block" unless block
|
|
44
|
+
raise TypeError, "concurrency must be an Integer" unless concurrency.is_a?(Integer)
|
|
45
|
+
raise ArgumentError, "concurrency must be positive" unless concurrency.positive?
|
|
46
|
+
|
|
47
|
+
new { |upstream| Pull.parallel_unordered_map(upstream, concurrency, block) }
|
|
48
|
+
end
|
|
49
|
+
|
|
33
50
|
# Creates an ordered Ractor-backed mapping flow.
|
|
34
51
|
#
|
|
35
52
|
# The mapper runs inside worker ractors and must be shareable, typically
|
|
@@ -42,8 +59,8 @@ module FiberStream
|
|
|
42
59
|
raise TypeError, "workers must be an Integer" unless workers.is_a?(Integer)
|
|
43
60
|
raise ArgumentError, "workers must be positive" unless workers.positive?
|
|
44
61
|
|
|
45
|
-
|
|
46
|
-
|
|
62
|
+
Internal::RactorTransferPolicy.validate!(:input_transfer, input_transfer)
|
|
63
|
+
Internal::RactorTransferPolicy.validate!(:output_transfer, output_transfer)
|
|
47
64
|
raise TypeError, "block must be shareable" unless Ractor.shareable?(block)
|
|
48
65
|
|
|
49
66
|
new { |upstream| Pull.ractor_map(upstream, workers, input_transfer, output_transfer, block) }
|
|
@@ -100,6 +117,18 @@ module FiberStream
|
|
|
100
117
|
new { |upstream| Pull.grouped(upstream, count) }
|
|
101
118
|
end
|
|
102
119
|
|
|
120
|
+
# Creates a running-accumulator flow.
|
|
121
|
+
#
|
|
122
|
+
# The block is called as `block.call(accumulator, element)` for each
|
|
123
|
+
# upstream element, matching `Sink.fold`. The block result becomes the new
|
|
124
|
+
# accumulator and is emitted downstream. The initial accumulator is not
|
|
125
|
+
# emitted before the first upstream element.
|
|
126
|
+
def self.scan(initial, &block)
|
|
127
|
+
raise ArgumentError, "missing block" unless block
|
|
128
|
+
|
|
129
|
+
new { |upstream| Pull.scan(upstream, initial, block) }
|
|
130
|
+
end
|
|
131
|
+
|
|
103
132
|
# Creates a predicate-based limiting flow.
|
|
104
133
|
#
|
|
105
134
|
# The flow emits leading elements while the block result is truthy. The
|
|
@@ -189,14 +218,10 @@ module FiberStream
|
|
|
189
218
|
new { |upstream| Pull.split(upstream, separator, keep_separator, max_length) }
|
|
190
219
|
end
|
|
191
220
|
|
|
192
|
-
def self.
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
raise ArgumentError, "#{name} must be :copy or :move"
|
|
221
|
+
def self.build(&attach) # :nodoc:
|
|
222
|
+
new(&attach)
|
|
196
223
|
end
|
|
197
224
|
|
|
198
|
-
private_class_method :validate_ractor_transfer_policy!
|
|
199
|
-
|
|
200
225
|
# Returns a reusable flow that applies this flow and then `flow`.
|
|
201
226
|
#
|
|
202
227
|
# Construction is lazy. No upstream stream is attached and no elements are
|
|
@@ -204,11 +229,11 @@ module FiberStream
|
|
|
204
229
|
def via(flow)
|
|
205
230
|
raise TypeError, "expected FiberStream::Flow" unless flow.is_a?(Flow)
|
|
206
231
|
|
|
207
|
-
self.class.
|
|
208
|
-
attached_stream =
|
|
232
|
+
self.class.build do |upstream|
|
|
233
|
+
attached_stream = attach_to(upstream)
|
|
209
234
|
|
|
210
235
|
begin
|
|
211
|
-
flow.
|
|
236
|
+
flow.attach_to(attached_stream)
|
|
212
237
|
rescue StandardError
|
|
213
238
|
begin
|
|
214
239
|
attached_stream.close
|
|
@@ -228,13 +253,13 @@ module FiberStream
|
|
|
228
253
|
def to(sink)
|
|
229
254
|
raise TypeError, "expected FiberStream::Sink" unless sink.is_a?(Sink)
|
|
230
255
|
|
|
231
|
-
Sink.
|
|
256
|
+
Sink.build do |stream|
|
|
232
257
|
attached_stream = nil
|
|
233
258
|
primary_error = nil
|
|
234
259
|
|
|
235
260
|
begin
|
|
236
|
-
attached_stream =
|
|
237
|
-
sink.
|
|
261
|
+
attached_stream = attach_to(stream)
|
|
262
|
+
sink.run_stream(attached_stream)
|
|
238
263
|
rescue StandardError => error
|
|
239
264
|
primary_error = error
|
|
240
265
|
raise
|
|
@@ -254,9 +279,7 @@ module FiberStream
|
|
|
254
279
|
|
|
255
280
|
private_class_method :new
|
|
256
281
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
def attach(upstream)
|
|
282
|
+
def attach_to(upstream) # :nodoc:
|
|
260
283
|
@attach.call(upstream)
|
|
261
284
|
end
|
|
262
285
|
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FiberStream
|
|
4
|
+
module Internal # :nodoc:
|
|
5
|
+
module RactorTransferPolicy # :nodoc:
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
def validate!(name, value)
|
|
9
|
+
return if [:copy, :move].include?(value)
|
|
10
|
+
|
|
11
|
+
raise ArgumentError, "#{name} must be :copy or :move"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
private_constant :Internal
|
|
17
|
+
end
|
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
module FiberStream
|
|
4
4
|
class Pipeline
|
|
5
|
+
def self.build(source, sink) # :nodoc:
|
|
6
|
+
new(source, sink)
|
|
7
|
+
end
|
|
8
|
+
|
|
5
9
|
def initialize(source, sink)
|
|
6
10
|
@source = source
|
|
7
11
|
@sink = sink
|
|
@@ -27,7 +31,7 @@ module FiberStream
|
|
|
27
31
|
def run_async
|
|
28
32
|
validate_scheduler!
|
|
29
33
|
|
|
30
|
-
RunningPipeline.
|
|
34
|
+
RunningPipeline.start(Fiber.scheduler) { run }
|
|
31
35
|
end
|
|
32
36
|
|
|
33
37
|
private_class_method :new
|