fiber_stream 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/README.md +167 -43
- data/examples/README.md +11 -0
- data/examples/ractor_merge_ports_and_map.rb +116 -0
- data/examples/ractor_producer_sources.rb +43 -0
- data/lib/fiber_stream/errors.rb +4 -1
- data/lib/fiber_stream/flow.rb +75 -16
- data/lib/fiber_stream/internal/ractor_transfer_policy.rb +17 -0
- data/lib/fiber_stream/pipeline.rb +5 -1
- data/lib/fiber_stream/pull/async_boundary.rb +28 -11
- data/lib/fiber_stream/pull/buffer_boundary.rb +28 -10
- data/lib/fiber_stream/pull/concat.rb +9 -1
- data/lib/fiber_stream/pull/grouped.rb +46 -0
- data/lib/fiber_stream/pull/merge.rb +230 -0
- data/lib/fiber_stream/pull/parallel_map_boundary.rb +28 -24
- data/lib/fiber_stream/pull/parallel_unordered_map_boundary.rb +311 -0
- data/lib/fiber_stream/pull/ractor_map_boundary.rb +112 -89
- data/lib/fiber_stream/pull/ractor_merge_ports_source.rb +373 -0
- data/lib/fiber_stream/pull/ractor_port_source.rb +53 -20
- data/lib/fiber_stream/pull/ractor_producer_source.rb +349 -0
- data/lib/fiber_stream/pull/scan.rb +38 -0
- data/lib/fiber_stream/pull/split.rb +134 -0
- data/lib/fiber_stream/pull.rb +74 -5
- data/lib/fiber_stream/ractor_port.rb +3 -1
- data/lib/fiber_stream/ractor_producer.rb +167 -0
- data/lib/fiber_stream/running_pipeline.rb +22 -8
- data/lib/fiber_stream/sink.rb +9 -19
- data/lib/fiber_stream/source.rb +177 -19
- data/lib/fiber_stream/version.rb +1 -1
- data/lib/fiber_stream.rb +2 -0
- data/sig/fiber_stream.rbs +25 -1
- metadata +14 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1cc93666d0610e659313a12dc756fca935579e62711972a9ea65d9ad818f6020
|
|
4
|
+
data.tar.gz: 97f315765ba573a5c047752fd083006db89404eff06e92bda5afbfa0f1933ed1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5e635531f9e34510ef0eab76254c33e70f36124be779f48d2f9692c351a2f26ed36c7c14fa0d1d596c5645b511bb4fa1abe2bb773fc434c1f4da39a6e19dbefc
|
|
7
|
+
data.tar.gz: 67eb50ae0a1d727c65fd172be572f7bf08ba82b3ee0cf4d9094c7e9ad579aac327155cc772bcd4e94b778cef53f63decf64a08e12e7ea5ef6949a6d813336b04
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,65 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.4.0 - 2026-06-09
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- `Flow.parallel_unordered_map(concurrency:)` and
|
|
8
|
+
`Source#parallel_unordered_map(concurrency:)` for scheduler-backed mapping
|
|
9
|
+
that emits results in completion order instead of preserving input order.
|
|
10
|
+
- `Source.ractor_producer` for FiberStream-owned single producer Ractors with
|
|
11
|
+
one-outstanding-ack backpressure and cooperative cleanup.
|
|
12
|
+
- `Source.ractor_merge_producers` for ready-order fan-in from multiple
|
|
13
|
+
FiberStream-owned producer Ractors without requiring a `Fiber.scheduler`.
|
|
14
|
+
- `Flow.scan(initial)` and `Source#scan(initial)` for lazy running
|
|
15
|
+
accumulators using `Sink.fold`-style reducer semantics.
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
|
|
19
|
+
- Updated README and website reference coverage for owned Ractor producers,
|
|
20
|
+
unordered parallel mapping, and scan.
|
|
21
|
+
- Prefer high-level owned Ractor producer examples in user-facing
|
|
22
|
+
documentation while keeping low-level port APIs documented for externally
|
|
23
|
+
owned producers.
|
|
24
|
+
- Updated the project Ruby pin to 4.0.5.
|
|
25
|
+
|
|
26
|
+
## 0.3.0 - 2026-06-06
|
|
27
|
+
|
|
28
|
+
### Added
|
|
29
|
+
|
|
30
|
+
- `Flow.grouped(count)` and `Source#grouped(count)` for fixed-size batches
|
|
31
|
+
with final partial-group emission.
|
|
32
|
+
- `Source#merge(source)` for scheduler-backed ready-order merging of two
|
|
33
|
+
sources while preserving each input source's own order.
|
|
34
|
+
- `Source.ractor_merge_ports(ports)` for backpressure-aware merging of
|
|
35
|
+
multiple producer Ractor ports without requiring a `Fiber.scheduler`.
|
|
36
|
+
- `Flow.split(separator)` and `Source#split(separator)` for delimiter-based
|
|
37
|
+
framing with optional separator retention and per-frame length limits.
|
|
38
|
+
- Benchmarks and examples for async IO fanout, stream lifecycle probes, and
|
|
39
|
+
Ractor port merge workflows.
|
|
40
|
+
|
|
41
|
+
### Changed
|
|
42
|
+
|
|
43
|
+
- Reworked flow operator tests into focused per-operator test files.
|
|
44
|
+
- Expanded README and repository documentation for source merging, Ractor port
|
|
45
|
+
merging, split framing, grouped batches, and runtime safety guidance.
|
|
46
|
+
- Clarified that `Flow.lines(max_length: nil)` and
|
|
47
|
+
`Flow.split(max_length: nil)` may buffer one unterminated frame without
|
|
48
|
+
bound, and documented explicit `max_length` usage for untrusted streams.
|
|
49
|
+
- Clarified `Source.io` `chunk_size` allocation behavior and Ractor failure
|
|
50
|
+
metadata exposure.
|
|
51
|
+
|
|
52
|
+
### Fixed
|
|
53
|
+
|
|
54
|
+
- Deferred `Source#concat` receiver materialization until downstream demand
|
|
55
|
+
reaches the concatenated source.
|
|
56
|
+
- Cancelled async and buffer producers when downstream closes early.
|
|
57
|
+
- Removed polling from Ractor map enqueue and cleanup paths.
|
|
58
|
+
- Re-raised background pipeline process-control exceptions instead of treating
|
|
59
|
+
them as ordinary stream failures.
|
|
60
|
+
- Hardened Ractor map worker teardown notifications so secondary send failures
|
|
61
|
+
do not cascade during shutdown.
|
|
62
|
+
|
|
3
63
|
## 0.2.0 - 2026-06-05
|
|
4
64
|
|
|
5
65
|
### Added
|
data/README.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# FiberStream
|
|
2
|
+
FiberStream is a Ruby library for linear stream processing with pull-based backpressure.
|
|
2
3
|
|
|
3
|
-
|
|
4
|
-
processing with backpressure.
|
|
4
|
+
It builds lazy Source definitions, transforms values with Flow stages, and materializes results with Sink objects.
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
a `Sink`.
|
|
6
|
+
[](https://badge.fury.io/rb/fiber_stream)
|
|
8
7
|
|
|
9
8
|
## Quick Start
|
|
9
|
+
Please see the project [documentation](https://dakatsuka.github.io/fiber_stream/) for more details.
|
|
10
10
|
|
|
11
11
|
```ruby
|
|
12
12
|
require "fiber_stream"
|
|
@@ -27,11 +27,13 @@ FiberStream currently supports linear pipelines only.
|
|
|
27
27
|
|
|
28
28
|
Implemented capabilities:
|
|
29
29
|
|
|
30
|
-
- in-memory, IO,
|
|
31
|
-
|
|
30
|
+
- in-memory, IO, FiberStream-owned Ractor producer, backpressure-aware Ractor
|
|
31
|
+
port, and Ractor port merge sources
|
|
32
|
+
- lazy source concatenation, zipping, and scheduler-backed merging
|
|
32
33
|
- mapping, filtering, limiting, predicate-based limiting and dropping,
|
|
33
|
-
fixed-prefix dropping, line splitting, buffering, async
|
|
34
|
-
parallel mapping, and ordered
|
|
34
|
+
fixed-prefix dropping, fixed-size grouping, line splitting, buffering, async
|
|
35
|
+
boundaries, ordered and unordered parallel mapping, and ordered
|
|
36
|
+
Ractor-backed mapping
|
|
35
37
|
- array, first-element, fold, foreach, and IO sinks
|
|
36
38
|
- reusable flow composition and runnable pipelines
|
|
37
39
|
- foreground and scheduler-backed background pipeline execution
|
|
@@ -78,7 +80,10 @@ result # => [[1, "a"], [2, "b"]]
|
|
|
78
80
|
```
|
|
79
81
|
|
|
80
82
|
IO sources read chunks on demand and require a scheduler-backed non-blocking
|
|
81
|
-
fiber
|
|
83
|
+
fiber. The `chunk_size` option is the maximum byte count passed to
|
|
84
|
+
`readpartial` for one downstream pull; very large values may cause the IO
|
|
85
|
+
implementation to attempt large allocations, so choose a bounded value
|
|
86
|
+
appropriate for the workload:
|
|
82
87
|
|
|
83
88
|
```ruby
|
|
84
89
|
require "async"
|
|
@@ -93,45 +98,51 @@ chunks =
|
|
|
93
98
|
end.wait
|
|
94
99
|
```
|
|
95
100
|
|
|
96
|
-
Ractor
|
|
97
|
-
The producer
|
|
98
|
-
|
|
101
|
+
Owned Ractor producer sources run producer blocks in FiberStream-managed
|
|
102
|
+
Ractors. The producer block receives a `RactorProducer` context and emits one
|
|
103
|
+
value per downstream demand:
|
|
99
104
|
|
|
100
105
|
```ruby
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
producer
|
|
105
|
-
Ractor.new(data_port, setup_port) do |outbox, setup|
|
|
106
|
-
ack_port = Ractor::Port.new
|
|
107
|
-
setup.send(ack_port)
|
|
108
|
-
|
|
109
|
-
values = [1, 2, 3].to_enum
|
|
110
|
-
|
|
111
|
-
loop do
|
|
112
|
-
case ack_port.receive
|
|
113
|
-
in FiberStream::RactorPort::Ack
|
|
114
|
-
begin
|
|
115
|
-
outbox.send(FiberStream::RactorPort::Element.new(values.next))
|
|
116
|
-
rescue StopIteration
|
|
117
|
-
outbox.send(FiberStream::RactorPort::Complete.new)
|
|
118
|
-
break
|
|
119
|
-
end
|
|
120
|
-
in FiberStream::RactorPort::Cancel
|
|
121
|
-
break
|
|
122
|
-
end
|
|
106
|
+
PRODUCE_VALUES =
|
|
107
|
+
Ractor.shareable_proc do |producer, values|
|
|
108
|
+
values.each do |value|
|
|
109
|
+
break unless producer.emit(value)
|
|
123
110
|
end
|
|
124
111
|
end
|
|
125
112
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
FiberStream::Source.ractor_port(data_port, ack_port: ack_port)
|
|
113
|
+
FiberStream::Source.ractor_producer([1, 2, 3], &PRODUCE_VALUES)
|
|
129
114
|
.run_with(FiberStream::Sink.to_a)
|
|
130
115
|
# => [1, 2, 3]
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Multiple owned producer Ractors can be merged directly without a
|
|
119
|
+
scheduler-backed `Source#merge`. Each producer still receives at most one
|
|
120
|
+
outstanding ack:
|
|
121
|
+
|
|
122
|
+
```ruby
|
|
123
|
+
PRODUCE_TAGGED_VALUES =
|
|
124
|
+
Ractor.shareable_proc do |producer, tag, values|
|
|
125
|
+
values.each do |value|
|
|
126
|
+
break unless producer.emit([tag, value])
|
|
127
|
+
end
|
|
128
|
+
end
|
|
131
129
|
|
|
132
|
-
|
|
130
|
+
source =
|
|
131
|
+
FiberStream::Source.ractor_merge_producers do |group|
|
|
132
|
+
group.producer(:a, [1, 2], &PRODUCE_TAGGED_VALUES)
|
|
133
|
+
group.producer(:b, [3, 4], &PRODUCE_TAGGED_VALUES)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
source.run_with(FiberStream::Sink.to_a)
|
|
137
|
+
# Example result: [[:a, 1], [:b, 3], [:a, 2], [:b, 4]]
|
|
133
138
|
```
|
|
134
139
|
|
|
140
|
+
Use the lower-level `Source.ractor_port` and `Source.ractor_merge_ports` APIs
|
|
141
|
+
when producer Ractors are owned outside FiberStream or need custom lifecycle
|
|
142
|
+
handling. `RactorPort::Failure` cause metadata is producer-provided and is
|
|
143
|
+
surfaced on `RactorPortSourceError`; redact sensitive details before sending
|
|
144
|
+
failures across trust boundaries.
|
|
145
|
+
|
|
135
146
|
Streaming HTTP response bodies that implement `#each`, such as
|
|
136
147
|
`async-http` response bodies, can be used with `Source.each` without buffering
|
|
137
148
|
the full body first. Use the HTTP client's block form or an explicit `ensure`
|
|
@@ -194,6 +205,50 @@ FiberStream::Source.each([" a ", "", " b "])
|
|
|
194
205
|
# => ["a", "b"]
|
|
195
206
|
```
|
|
196
207
|
|
|
208
|
+
Use `parallel_map` for ordered scheduler-backed mapping when each element
|
|
209
|
+
waits on non-blocking IO. It preserves input order while allowing up to
|
|
210
|
+
`concurrency` mapping operations to be in flight:
|
|
211
|
+
|
|
212
|
+
```ruby
|
|
213
|
+
require "async"
|
|
214
|
+
require "fiber_stream"
|
|
215
|
+
|
|
216
|
+
def fetch_profile(user_id)
|
|
217
|
+
# Example: perform scheduler-aware HTTP, database, or socket IO here.
|
|
218
|
+
sleep 0.05
|
|
219
|
+
{ id: user_id, name: "user-#{user_id}" }
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
profiles =
|
|
223
|
+
Sync do
|
|
224
|
+
FiberStream::Source.each([1, 2, 3, 4])
|
|
225
|
+
.parallel_map(concurrency: 4) { |user_id| fetch_profile(user_id) }
|
|
226
|
+
.run_with(FiberStream::Sink.to_a)
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
profiles.map { |profile| profile.fetch(:id) } # => [1, 2, 3, 4]
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
Use `parallel_unordered_map` when every result can be handled independently
|
|
233
|
+
and lower head-of-line blocking matters more than input order. It still limits
|
|
234
|
+
in-flight mapping work to `concurrency`, but emits values as mapping jobs
|
|
235
|
+
finish:
|
|
236
|
+
|
|
237
|
+
```ruby
|
|
238
|
+
require "async"
|
|
239
|
+
require "fiber_stream"
|
|
240
|
+
|
|
241
|
+
responses =
|
|
242
|
+
Sync do
|
|
243
|
+
FiberStream::Source.each(["/a", "/slow", "/b"])
|
|
244
|
+
.parallel_unordered_map(concurrency: 3) { |path| fetch_path(path) }
|
|
245
|
+
.run_with(FiberStream::Sink.to_a)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Results are in completion order, not necessarily input order.
|
|
249
|
+
responses
|
|
250
|
+
```
|
|
251
|
+
|
|
197
252
|
Use `ractor_map` for ordered CPU-bound mapping in Ractor workers. The mapper
|
|
198
253
|
must be shareable, usually by creating it with `Ractor.shareable_proc`.
|
|
199
254
|
|
|
@@ -326,6 +381,29 @@ tail =
|
|
|
326
381
|
tail # => [3, 4]
|
|
327
382
|
```
|
|
328
383
|
|
|
384
|
+
`Flow.grouped` batches adjacent elements into arrays and emits the final
|
|
385
|
+
partial group:
|
|
386
|
+
|
|
387
|
+
```ruby
|
|
388
|
+
batches =
|
|
389
|
+
FiberStream::Source.each([1, 2, 3, 4, 5])
|
|
390
|
+
.grouped(2)
|
|
391
|
+
.run_with(FiberStream::Sink.to_a)
|
|
392
|
+
|
|
393
|
+
batches # => [[1, 2], [3, 4], [5]]
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
`Flow.scan` emits the updated accumulator for each upstream element:
|
|
397
|
+
|
|
398
|
+
```ruby
|
|
399
|
+
running_totals =
|
|
400
|
+
FiberStream::Source.each([1, 2, 3, 4])
|
|
401
|
+
.scan(0) { |sum, number| sum + number }
|
|
402
|
+
.run_with(FiberStream::Sink.to_a)
|
|
403
|
+
|
|
404
|
+
running_totals # => [1, 3, 6, 10]
|
|
405
|
+
```
|
|
406
|
+
|
|
329
407
|
`Flow.take_while` emits the leading prefix while a predicate is truthy, then
|
|
330
408
|
closes upstream at the first false or nil result:
|
|
331
409
|
|
|
@@ -376,11 +454,31 @@ first =
|
|
|
376
454
|
first # => [1, 2]
|
|
377
455
|
```
|
|
378
456
|
|
|
457
|
+
`Source#merge` emits values from either input source in scheduler-observed
|
|
458
|
+
ready order while preserving each input's own order:
|
|
459
|
+
|
|
460
|
+
```ruby
|
|
461
|
+
merged =
|
|
462
|
+
Sync do
|
|
463
|
+
FiberStream::Source.each([1, 2])
|
|
464
|
+
.merge(FiberStream::Source.each(["a", "b"]))
|
|
465
|
+
.run_with(FiberStream::Sink.to_a)
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
# Example result: [1, "a", 2, "b"]
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
`merge` does not make scheduler-unaware blocking source work non-blocking and
|
|
472
|
+
does not provide CPU parallelism. Use producer ractors with
|
|
473
|
+
`Source.ractor_producer` or `Source.ractor_merge_producers` when producer work
|
|
474
|
+
needs true isolation.
|
|
475
|
+
|
|
379
476
|
`Flow.buffer(count)` allows bounded prefetch. `Flow.async`, `Flow.buffer`,
|
|
380
|
-
`Flow.parallel_map`, `
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
at
|
|
477
|
+
`Flow.parallel_map`, `Flow.parallel_unordered_map`, `Source.io`,
|
|
478
|
+
`Source#merge`, `Sink.io`, and `Pipeline#run_async` require an installed
|
|
479
|
+
`Fiber.scheduler` and a non-blocking current fiber when demanded or started.
|
|
480
|
+
FiberStream does not install a scheduler and does not depend on Async at
|
|
481
|
+
runtime.
|
|
384
482
|
|
|
385
483
|
## API Surface
|
|
386
484
|
|
|
@@ -388,24 +486,32 @@ Sources:
|
|
|
388
486
|
|
|
389
487
|
- `FiberStream::Source.each(enumerable)`
|
|
390
488
|
- `FiberStream::Source.io(io, chunk_size: 16 * 1024, close: false)`
|
|
489
|
+
- `FiberStream::Source.ractor_producer(*args, transfer: :copy, ack_transfer: :copy) { |producer, *args| ... }`
|
|
490
|
+
- `FiberStream::Source.ractor_merge_producers(transfer: :copy, ack_transfer: :copy) { |group| ... }`
|
|
391
491
|
- `FiberStream::Source.ractor_port(port, ack_port:, ack_transfer: :copy, cancel: true)`
|
|
492
|
+
- `FiberStream::Source.ractor_merge_ports(ports, ack_transfer: :copy, cancel: true)`
|
|
392
493
|
|
|
393
494
|
Source convenience methods:
|
|
394
495
|
|
|
395
496
|
- `Source#via(flow)`
|
|
396
497
|
- `Source#concat(source)`
|
|
397
498
|
- `Source#zip(source)`
|
|
499
|
+
- `Source#merge(source)`
|
|
398
500
|
- `Source#map { |element| ... }`
|
|
399
501
|
- `Source#parallel_map(concurrency:) { |element| ... }`
|
|
502
|
+
- `Source#parallel_unordered_map(concurrency:) { |element| ... }`
|
|
400
503
|
- `Source#ractor_map(workers:, input_transfer: :copy, output_transfer: :copy) { |element| ... }`
|
|
401
504
|
- `Source#select { |element| ... }`
|
|
402
505
|
- `Source#take(count)`
|
|
403
506
|
- `Source#drop(count)`
|
|
507
|
+
- `Source#grouped(count)`
|
|
508
|
+
- `Source#scan(initial) { |accumulator, element| ... }`
|
|
404
509
|
- `Source#take_while { |element| ... }`
|
|
405
510
|
- `Source#drop_while { |element| ... }`
|
|
406
511
|
- `Source#async`
|
|
407
512
|
- `Source#buffer(count)`
|
|
408
513
|
- `Source#lines(chomp: true, max_length: nil)`
|
|
514
|
+
- `Source#split(separator, keep_separator: false, max_length: nil)`
|
|
409
515
|
- `Source#to(sink)`
|
|
410
516
|
- `Source#run_with(sink)`
|
|
411
517
|
|
|
@@ -413,18 +519,26 @@ Flows:
|
|
|
413
519
|
|
|
414
520
|
- `FiberStream::Flow.map { |element| ... }`
|
|
415
521
|
- `FiberStream::Flow.parallel_map(concurrency:) { |element| ... }`
|
|
522
|
+
- `FiberStream::Flow.parallel_unordered_map(concurrency:) { |element| ... }`
|
|
416
523
|
- `FiberStream::Flow.ractor_map(workers:, input_transfer: :copy, output_transfer: :copy) { |element| ... }`
|
|
417
524
|
- `FiberStream::Flow.select { |element| ... }`
|
|
418
525
|
- `FiberStream::Flow.take(count)`
|
|
419
526
|
- `FiberStream::Flow.drop(count)`
|
|
527
|
+
- `FiberStream::Flow.grouped(count)`
|
|
528
|
+
- `FiberStream::Flow.scan(initial) { |accumulator, element| ... }`
|
|
420
529
|
- `FiberStream::Flow.take_while { |element| ... }`
|
|
421
530
|
- `FiberStream::Flow.drop_while { |element| ... }`
|
|
422
531
|
- `FiberStream::Flow.async`
|
|
423
532
|
- `FiberStream::Flow.buffer(count)`
|
|
424
533
|
- `FiberStream::Flow.lines(chomp: true, max_length: nil)`
|
|
534
|
+
- `FiberStream::Flow.split(separator, keep_separator: false, max_length: nil)`
|
|
425
535
|
- `Flow#via(flow)`
|
|
426
536
|
- `Flow#to(sink)`
|
|
427
537
|
|
|
538
|
+
`lines` and `split` default to `max_length: nil`, which allows one
|
|
539
|
+
unterminated line or frame to buffer without bound. Set a positive
|
|
540
|
+
`max_length` for untrusted, network-facing, or otherwise unbounded inputs.
|
|
541
|
+
|
|
428
542
|
Sinks:
|
|
429
543
|
|
|
430
544
|
- `FiberStream::Sink.to_a`
|
|
@@ -454,7 +568,9 @@ bundle exec ruby examples/file_copy.rb
|
|
|
454
568
|
bundle exec ruby examples/backpressure_buffer.rb
|
|
455
569
|
bundle exec ruby examples/background_execution.rb
|
|
456
570
|
bundle exec ruby examples/ractor_map_hashing.rb
|
|
571
|
+
bundle exec ruby examples/ractor_producer_sources.rb
|
|
457
572
|
bundle exec ruby examples/ractor_port_source.rb
|
|
573
|
+
bundle exec ruby examples/ractor_merge_ports_and_map.rb
|
|
458
574
|
bundle exec ruby examples/async_http_requests.rb
|
|
459
575
|
bundle exec ruby examples/async_http_streaming_body.rb
|
|
460
576
|
```
|
|
@@ -465,9 +581,16 @@ events so the difference between direct demand and bounded prefetch is visible.
|
|
|
465
581
|
`examples/ractor_map_hashing.rb` demonstrates ordered Ractor-backed hashing
|
|
466
582
|
with a shareable mapper proc and `input_transfer: :move`.
|
|
467
583
|
|
|
584
|
+
`examples/ractor_producer_sources.rb` demonstrates high-level owned producer
|
|
585
|
+
Ractors with `Source.ractor_producer` and `Source.ractor_merge_producers`.
|
|
586
|
+
|
|
468
587
|
`examples/ractor_port_source.rb` demonstrates a producer Ractor that waits for
|
|
469
588
|
`RactorPort::Ack` before sending each `RactorPort::Element`.
|
|
470
589
|
|
|
590
|
+
`examples/ractor_merge_ports_and_map.rb` demonstrates CPU-bound producer
|
|
591
|
+
Ractors merged with `Source.ractor_merge_ports`, followed by CPU-bound
|
|
592
|
+
verification in `ractor_map` workers.
|
|
593
|
+
|
|
471
594
|
`examples/async_http_requests.rb` starts a local HTTP server and shows
|
|
472
595
|
FiberStream overlapping independent HTTP request waits with `parallel_map`.
|
|
473
596
|
|
|
@@ -480,12 +603,13 @@ Benchmark scripts live under `benchmarks/`.
|
|
|
480
603
|
```sh
|
|
481
604
|
bundle exec ruby benchmarks/stream_transform.rb
|
|
482
605
|
bundle exec ruby benchmarks/latency_overlap.rb
|
|
606
|
+
bundle exec ruby benchmarks/async_io_fanout.rb
|
|
483
607
|
bundle exec ruby benchmarks/heavy_cpu_map.rb
|
|
484
608
|
```
|
|
485
609
|
|
|
486
610
|
## Development
|
|
487
611
|
|
|
488
|
-
This project targets Ruby 4.x. The repository currently pins Ruby 4.0.
|
|
612
|
+
This project targets Ruby 4.x. The repository currently pins Ruby 4.0.5 in
|
|
489
613
|
`mise.toml`.
|
|
490
614
|
|
|
491
615
|
Install dependencies:
|
data/examples/README.md
CHANGED
|
@@ -11,6 +11,8 @@ bundle exec ruby examples/backpressure_buffer.rb
|
|
|
11
11
|
bundle exec ruby examples/background_execution.rb
|
|
12
12
|
bundle exec ruby examples/ractor_map_hashing.rb
|
|
13
13
|
bundle exec ruby examples/ractor_port_source.rb
|
|
14
|
+
bundle exec ruby examples/ractor_producer_sources.rb
|
|
15
|
+
bundle exec ruby examples/ractor_merge_ports_and_map.rb
|
|
14
16
|
bundle exec ruby examples/async_http_requests.rb
|
|
15
17
|
bundle exec ruby examples/async_http_streaming_body.rb
|
|
16
18
|
```
|
|
@@ -47,6 +49,15 @@ pipeline runs.
|
|
|
47
49
|
`RactorPort::Ack`, and sends one typed `RactorPort::Element` per downstream
|
|
48
50
|
demand.
|
|
49
51
|
|
|
52
|
+
`ractor_producer_sources.rb` demonstrates the high-level owned-producer APIs:
|
|
53
|
+
`Source.ractor_producer` for one producer and `Source.ractor_merge_producers`
|
|
54
|
+
for ready-order fan-in from multiple producers. FiberStream creates the ports,
|
|
55
|
+
producer Ractors, and cooperative cleanup path.
|
|
56
|
+
|
|
57
|
+
`ractor_merge_ports_and_map.rb` runs CPU-bound work in multiple producer
|
|
58
|
+
Ractors, merges their port outputs with `Source.ractor_merge_ports`, then runs
|
|
59
|
+
another CPU-bound verification stage with `ractor_map`.
|
|
60
|
+
|
|
50
61
|
`async_http_requests.rb` starts a local HTTP server and compares serial
|
|
51
62
|
requests with FiberStream `parallel_map` requests. It keeps responses ordered
|
|
52
63
|
while overlapping independent network waits.
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
|
|
4
|
+
|
|
5
|
+
require "digest"
|
|
6
|
+
require "fiber_stream"
|
|
7
|
+
|
|
8
|
+
PRODUCER_JOBS = [
|
|
9
|
+
[
|
|
10
|
+
"producer-a",
|
|
11
|
+
[
|
|
12
|
+
{ name: "alpha.bin", payload: +"A" * 180_000, seed_rounds: 80, verify_rounds: 60 },
|
|
13
|
+
{ name: "bravo.bin", payload: +"B" * 140_000, seed_rounds: 70, verify_rounds: 55 }
|
|
14
|
+
]
|
|
15
|
+
],
|
|
16
|
+
[
|
|
17
|
+
"producer-b",
|
|
18
|
+
[
|
|
19
|
+
{ name: "charlie.bin", payload: +"C" * 220_000, seed_rounds: 85, verify_rounds: 65 },
|
|
20
|
+
{ name: "delta.bin", payload: +"D" * 120_000, seed_rounds: 75, verify_rounds: 50 }
|
|
21
|
+
]
|
|
22
|
+
]
|
|
23
|
+
].freeze
|
|
24
|
+
|
|
25
|
+
VERIFY_RECORD =
|
|
26
|
+
Ractor.shareable_proc do |record|
|
|
27
|
+
digest = record.fetch(:seed_sha256)
|
|
28
|
+
|
|
29
|
+
record.fetch(:verify_rounds).times do |index|
|
|
30
|
+
digest = Digest::SHA256.hexdigest("#{digest}:verify:#{index}")
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
record.merge(final_sha256: digest)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def spawn_digest_producer(data_port, producer_name, jobs)
|
|
37
|
+
setup_port = Ractor::Port.new
|
|
38
|
+
producer =
|
|
39
|
+
Ractor.new(data_port, setup_port, producer_name, jobs) do |outbox, setup, name, producer_jobs|
|
|
40
|
+
ack_port = Ractor::Port.new
|
|
41
|
+
setup.send(ack_port)
|
|
42
|
+
|
|
43
|
+
enumerator = producer_jobs.to_enum
|
|
44
|
+
sent = 0
|
|
45
|
+
|
|
46
|
+
loop do
|
|
47
|
+
case ack_port.receive
|
|
48
|
+
in FiberStream::RactorPort::Ack
|
|
49
|
+
begin
|
|
50
|
+
job = enumerator.next
|
|
51
|
+
digest = job.fetch(:payload)
|
|
52
|
+
|
|
53
|
+
job.fetch(:seed_rounds).times do |index|
|
|
54
|
+
digest = Digest::SHA256.hexdigest("#{digest}:#{name}:#{index}")
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
sent += 1
|
|
58
|
+
outbox.send(
|
|
59
|
+
FiberStream::RactorPort::Element.new(
|
|
60
|
+
{
|
|
61
|
+
producer: name,
|
|
62
|
+
name: job.fetch(:name),
|
|
63
|
+
bytes: job.fetch(:payload).bytesize,
|
|
64
|
+
seed_sha256: digest,
|
|
65
|
+
verify_rounds: job.fetch(:verify_rounds)
|
|
66
|
+
}
|
|
67
|
+
),
|
|
68
|
+
move: true
|
|
69
|
+
)
|
|
70
|
+
rescue StopIteration
|
|
71
|
+
outbox.send(FiberStream::RactorPort::Complete.new)
|
|
72
|
+
break [:completed, name, sent]
|
|
73
|
+
end
|
|
74
|
+
in FiberStream::RactorPort::Cancel[reason]
|
|
75
|
+
break [:cancelled, name, sent, reason]
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
[producer, setup_port.receive]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
port_pairs = []
|
|
84
|
+
producers =
|
|
85
|
+
PRODUCER_JOBS.map do |producer_name, jobs|
|
|
86
|
+
data_port = Ractor::Port.new
|
|
87
|
+
producer, ack_port = spawn_digest_producer(data_port, producer_name, jobs)
|
|
88
|
+
port_pairs << { port: data_port, ack_port: ack_port }
|
|
89
|
+
producer
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
records =
|
|
93
|
+
FiberStream::Source.ractor_merge_ports(port_pairs)
|
|
94
|
+
.ractor_map(workers: 2, input_transfer: :move, output_transfer: :move, &VERIFY_RECORD)
|
|
95
|
+
.run_with(FiberStream::Sink.to_a)
|
|
96
|
+
|
|
97
|
+
puts "Merged producer Ractors, then verified in ractor_map workers"
|
|
98
|
+
records.each do |record|
|
|
99
|
+
puts format(
|
|
100
|
+
"- %-10<producer>s %-11<name>s %7<bytes>d bytes %<final_sha256>s",
|
|
101
|
+
producer: record.fetch(:producer),
|
|
102
|
+
name: record.fetch(:name),
|
|
103
|
+
bytes: record.fetch(:bytes),
|
|
104
|
+
final_sha256: record.fetch(:final_sha256)
|
|
105
|
+
)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
puts
|
|
109
|
+
puts "Producer statuses:"
|
|
110
|
+
producers.each do |producer|
|
|
111
|
+
puts "- #{producer.value.inspect}"
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
puts
|
|
115
|
+
puts "Source.ractor_merge_ports emits producers in ready order."
|
|
116
|
+
puts "ractor_map preserves that merged input order while running verification in Ractor workers."
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
|
|
4
|
+
|
|
5
|
+
require "fiber_stream"
|
|
6
|
+
|
|
7
|
+
EMIT_NUMBERS =
|
|
8
|
+
Ractor.shareable_proc do |producer, range|
|
|
9
|
+
range.each do |number|
|
|
10
|
+
break unless producer.emit(number)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
EMIT_TAGGED_NUMBERS =
|
|
15
|
+
Ractor.shareable_proc do |producer, tag, range|
|
|
16
|
+
range.each do |number|
|
|
17
|
+
break unless producer.emit([tag, number])
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
squares =
|
|
22
|
+
FiberStream::Source.ractor_producer(1..5, &EMIT_NUMBERS)
|
|
23
|
+
.map { |number| number * number }
|
|
24
|
+
.run_with(FiberStream::Sink.to_a)
|
|
25
|
+
|
|
26
|
+
puts "Squares from one FiberStream-owned producer Ractor:"
|
|
27
|
+
puts squares.join(", ")
|
|
28
|
+
|
|
29
|
+
merged =
|
|
30
|
+
FiberStream::Source.ractor_merge_producers do |group|
|
|
31
|
+
group.producer(:low, 1..3, &EMIT_TAGGED_NUMBERS)
|
|
32
|
+
group.producer(:high, 10..12, &EMIT_TAGGED_NUMBERS)
|
|
33
|
+
end.run_with(FiberStream::Sink.to_a)
|
|
34
|
+
|
|
35
|
+
puts
|
|
36
|
+
puts "Merged values from two owned producer Ractors:"
|
|
37
|
+
merged.each do |tag, number|
|
|
38
|
+
puts "- #{tag}: #{number}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
puts
|
|
42
|
+
puts "Producer blocks use RactorProducer#emit and stop when it returns false."
|
|
43
|
+
puts "FiberStream creates the data ports, ack ports, producer Ractors, and cleanup path."
|
data/lib/fiber_stream/errors.rb
CHANGED
|
@@ -9,7 +9,10 @@ module FiberStream
|
|
|
9
9
|
#
|
|
10
10
|
# Producer failures, invalid protocol messages, and source-side Ractor port
|
|
11
11
|
# failures use this stable error shape so callers do not need to depend on
|
|
12
|
-
# Ruby's Ractor transport exceptions.
|
|
12
|
+
# Ruby's Ractor transport exceptions. For producer failures,
|
|
13
|
+
# `cause_class_name` and `cause_message` come from the producer's
|
|
14
|
+
# `RactorPort::Failure` envelope and are included in this error's public
|
|
15
|
+
# message.
|
|
13
16
|
class RactorPortSourceError < RuntimeError
|
|
14
17
|
attr_reader :kind, :cause_class_name, :cause_message, :original_cause
|
|
15
18
|
|