fiber_stream 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -0
- data/README.md +102 -9
- data/examples/README.md +5 -0
- data/examples/ractor_merge_ports_and_map.rb +116 -0
- data/lib/fiber_stream/errors.rb +4 -1
- data/lib/fiber_stream/flow.rb +37 -1
- data/lib/fiber_stream/pull/async_boundary.rb +28 -11
- data/lib/fiber_stream/pull/buffer_boundary.rb +28 -10
- data/lib/fiber_stream/pull/concat.rb +9 -1
- data/lib/fiber_stream/pull/grouped.rb +46 -0
- data/lib/fiber_stream/pull/merge.rb +230 -0
- data/lib/fiber_stream/pull/parallel_map_boundary.rb +28 -24
- data/lib/fiber_stream/pull/ractor_map_boundary.rb +103 -79
- data/lib/fiber_stream/pull/ractor_merge_ports_source.rb +358 -0
- data/lib/fiber_stream/pull/ractor_port_source.rb +14 -14
- data/lib/fiber_stream/pull/split.rb +134 -0
- data/lib/fiber_stream/pull.rb +23 -3
- data/lib/fiber_stream/ractor_port.rb +3 -1
- data/lib/fiber_stream/running_pipeline.rb +18 -8
- data/lib/fiber_stream/source.rb +105 -3
- data/lib/fiber_stream/version.rb +1 -1
- data/sig/fiber_stream.rbs +7 -0
- metadata +7 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2401496eff99cd4792deda8fa927688fc3ba0e97e8a35637db4395895ab04cd9
|
|
4
|
+
data.tar.gz: 9f9674e2bc7dc9ce49b899727a02b9158ce6c9629c744ec6d626b16923e14160
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e7237c7c15b66105b09cdccf1e52980f3c030c74acbd2dc7cc3b5cfb720a716836925b5801bdc59351131dcd7ecab72ebdedb6b50593f5258a1cff2d0ebb39a9
|
|
7
|
+
data.tar.gz: d9bc7c85992c344bef4a36b0f234840c154c06274c450ef0696246d3042a017f8764c22ab1a2df43e3059420c52af7827852e63b4a5e91d668f4c9bb6a191d57
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,42 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.3.0 - 2026-06-06
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- `Flow.grouped(count)` and `Source#grouped(count)` for fixed-size batches
|
|
8
|
+
with final partial-group emission.
|
|
9
|
+
- `Source#merge(source)` for scheduler-backed ready-order merging of two
|
|
10
|
+
sources while preserving each input source's own order.
|
|
11
|
+
- `Source.ractor_merge_ports(ports)` for backpressure-aware merging of
|
|
12
|
+
multiple producer Ractor ports without requiring a `Fiber.scheduler`.
|
|
13
|
+
- `Flow.split(separator)` and `Source#split(separator)` for delimiter-based
|
|
14
|
+
framing with optional separator retention and per-frame length limits.
|
|
15
|
+
- Benchmarks and examples for async IO fanout, stream lifecycle probes, and
|
|
16
|
+
Ractor port merge workflows.
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
|
|
20
|
+
- Reworked flow operator tests into focused per-operator test files.
|
|
21
|
+
- Expanded README and repository documentation for source merging, Ractor port
|
|
22
|
+
merging, split framing, grouped batches, and runtime safety guidance.
|
|
23
|
+
- Clarified that `Flow.lines(max_length: nil)` and
|
|
24
|
+
`Flow.split(max_length: nil)` may buffer one unterminated frame without
|
|
25
|
+
bound, and documented explicit `max_length` usage for untrusted streams.
|
|
26
|
+
- Clarified `Source.io` `chunk_size` allocation behavior and Ractor failure
|
|
27
|
+
metadata exposure.
|
|
28
|
+
|
|
29
|
+
### Fixed
|
|
30
|
+
|
|
31
|
+
- Deferred `Source#concat` receiver materialization until downstream demand
|
|
32
|
+
reaches the concatenated source.
|
|
33
|
+
- Cancelled async and buffer producers when downstream closes early.
|
|
34
|
+
- Removed polling from Ractor map enqueue and cleanup paths.
|
|
35
|
+
- Re-raised background pipeline process-control exceptions instead of treating
|
|
36
|
+
them as ordinary stream failures.
|
|
37
|
+
- Hardened Ractor map worker teardown notifications so secondary send failures
|
|
38
|
+
do not cascade during shutdown.
|
|
39
|
+
|
|
3
40
|
## 0.2.0 - 2026-06-05
|
|
4
41
|
|
|
5
42
|
### Added
|
data/README.md
CHANGED
|
@@ -27,11 +27,11 @@ FiberStream currently supports linear pipelines only.
|
|
|
27
27
|
|
|
28
28
|
Implemented capabilities:
|
|
29
29
|
|
|
30
|
-
- in-memory, IO,
|
|
31
|
-
- lazy source concatenation and
|
|
30
|
+
- in-memory, IO, backpressure-aware Ractor port, and Ractor port merge sources
|
|
31
|
+
- lazy source concatenation, zipping, and scheduler-backed merging
|
|
32
32
|
- mapping, filtering, limiting, predicate-based limiting and dropping,
|
|
33
|
-
fixed-prefix dropping, line splitting, buffering, async
|
|
34
|
-
parallel mapping, and ordered Ractor-backed mapping
|
|
33
|
+
fixed-prefix dropping, fixed-size grouping, line splitting, buffering, async
|
|
34
|
+
boundaries, ordered parallel mapping, and ordered Ractor-backed mapping
|
|
35
35
|
- array, first-element, fold, foreach, and IO sinks
|
|
36
36
|
- reusable flow composition and runnable pipelines
|
|
37
37
|
- foreground and scheduler-backed background pipeline execution
|
|
@@ -78,7 +78,10 @@ result # => [[1, "a"], [2, "b"]]
|
|
|
78
78
|
```
|
|
79
79
|
|
|
80
80
|
IO sources read chunks on demand and require a scheduler-backed non-blocking
|
|
81
|
-
fiber
|
|
81
|
+
fiber. The `chunk_size` option is the maximum byte count passed to
|
|
82
|
+
`readpartial` for one downstream pull; very large values may cause the IO
|
|
83
|
+
implementation to attempt large allocations, so choose a bounded value
|
|
84
|
+
appropriate for the workload:
|
|
82
85
|
|
|
83
86
|
```ruby
|
|
84
87
|
require "async"
|
|
@@ -132,6 +135,25 @@ FiberStream::Source.ractor_port(data_port, ack_port: ack_port)
|
|
|
132
135
|
producer.value
|
|
133
136
|
```
|
|
134
137
|
|
|
138
|
+
`RactorPort::Failure` cause metadata is producer-provided and is surfaced on
|
|
139
|
+
`RactorPortSourceError`. Redact internal paths, secrets, tenant data, or other
|
|
140
|
+
sensitive details before sending failures across trust boundaries.
|
|
141
|
+
|
|
142
|
+
Multiple producer Ractors can be merged directly without a scheduler-backed
|
|
143
|
+
`Source#merge`. Each producer still receives at most one outstanding ack:
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
source =
|
|
147
|
+
FiberStream::Source.ractor_merge_ports(
|
|
148
|
+
[
|
|
149
|
+
{ port: data_port_a, ack_port: ack_port_a },
|
|
150
|
+
{ port: data_port_b, ack_port: ack_port_b }
|
|
151
|
+
]
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
values = source.run_with(FiberStream::Sink.to_a)
|
|
155
|
+
```
|
|
156
|
+
|
|
135
157
|
Streaming HTTP response bodies that implement `#each`, such as
|
|
136
158
|
`async-http` response bodies, can be used with `Source.each` without buffering
|
|
137
159
|
the full body first. Use the HTTP client's block form or an explicit `ensure`
|
|
@@ -194,6 +216,30 @@ FiberStream::Source.each([" a ", "", " b "])
|
|
|
194
216
|
# => ["a", "b"]
|
|
195
217
|
```
|
|
196
218
|
|
|
219
|
+
Use `parallel_map` for ordered scheduler-backed mapping when each element
|
|
220
|
+
waits on non-blocking IO. It preserves input order while allowing up to
|
|
221
|
+
`concurrency` mapping operations to be in flight:
|
|
222
|
+
|
|
223
|
+
```ruby
|
|
224
|
+
require "async"
|
|
225
|
+
require "fiber_stream"
|
|
226
|
+
|
|
227
|
+
def fetch_profile(user_id)
|
|
228
|
+
# Example: perform scheduler-aware HTTP, database, or socket IO here.
|
|
229
|
+
sleep 0.05
|
|
230
|
+
{ id: user_id, name: "user-#{user_id}" }
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
profiles =
|
|
234
|
+
Sync do
|
|
235
|
+
FiberStream::Source.each([1, 2, 3, 4])
|
|
236
|
+
.parallel_map(concurrency: 4) { |user_id| fetch_profile(user_id) }
|
|
237
|
+
.run_with(FiberStream::Sink.to_a)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
profiles.map { |profile| profile.fetch(:id) } # => [1, 2, 3, 4]
|
|
241
|
+
```
|
|
242
|
+
|
|
197
243
|
Use `ractor_map` for ordered CPU-bound mapping in Ractor workers. The mapper
|
|
198
244
|
must be shareable, usually by creating it with `Ractor.shareable_proc`.
|
|
199
245
|
|
|
@@ -326,6 +372,18 @@ tail =
|
|
|
326
372
|
tail # => [3, 4]
|
|
327
373
|
```
|
|
328
374
|
|
|
375
|
+
`Flow.grouped` batches adjacent elements into arrays and emits the final
|
|
376
|
+
partial group:
|
|
377
|
+
|
|
378
|
+
```ruby
|
|
379
|
+
batches =
|
|
380
|
+
FiberStream::Source.each([1, 2, 3, 4, 5])
|
|
381
|
+
.grouped(2)
|
|
382
|
+
.run_with(FiberStream::Sink.to_a)
|
|
383
|
+
|
|
384
|
+
batches # => [[1, 2], [3, 4], [5]]
|
|
385
|
+
```
|
|
386
|
+
|
|
329
387
|
`Flow.take_while` emits the leading prefix while a predicate is truthy, then
|
|
330
388
|
closes upstream at the first false or nil result:
|
|
331
389
|
|
|
@@ -376,11 +434,30 @@ first =
|
|
|
376
434
|
first # => [1, 2]
|
|
377
435
|
```
|
|
378
436
|
|
|
437
|
+
`Source#merge` emits values from either input source in scheduler-observed
|
|
438
|
+
ready order while preserving each input's own order:
|
|
439
|
+
|
|
440
|
+
```ruby
|
|
441
|
+
merged =
|
|
442
|
+
Sync do
|
|
443
|
+
FiberStream::Source.each([1, 2])
|
|
444
|
+
.merge(FiberStream::Source.each(["a", "b"]))
|
|
445
|
+
.run_with(FiberStream::Sink.to_a)
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
# Example result: [1, "a", 2, "b"]
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
`merge` does not make scheduler-unaware blocking source work non-blocking and
|
|
452
|
+
does not provide CPU parallelism. Use producer ractors with
|
|
453
|
+
`Source.ractor_port` or `Source.ractor_merge_ports` when producer work needs
|
|
454
|
+
true isolation.
|
|
455
|
+
|
|
379
456
|
`Flow.buffer(count)` allows bounded prefetch. `Flow.async`, `Flow.buffer`,
|
|
380
|
-
`Flow.parallel_map`, `Source.io`, `Sink.io`, and
|
|
381
|
-
installed `Fiber.scheduler` and a non-blocking
|
|
382
|
-
started. FiberStream does not install a
|
|
383
|
-
at runtime.
|
|
457
|
+
`Flow.parallel_map`, `Source.io`, `Source#merge`, `Sink.io`, and
|
|
458
|
+
`Pipeline#run_async` require an installed `Fiber.scheduler` and a non-blocking
|
|
459
|
+
current fiber when demanded or started. FiberStream does not install a
|
|
460
|
+
scheduler and does not depend on Async at runtime.
|
|
384
461
|
|
|
385
462
|
## API Surface
|
|
386
463
|
|
|
@@ -389,23 +466,27 @@ Sources:
|
|
|
389
466
|
- `FiberStream::Source.each(enumerable)`
|
|
390
467
|
- `FiberStream::Source.io(io, chunk_size: 16 * 1024, close: false)`
|
|
391
468
|
- `FiberStream::Source.ractor_port(port, ack_port:, ack_transfer: :copy, cancel: true)`
|
|
469
|
+
- `FiberStream::Source.ractor_merge_ports(ports, ack_transfer: :copy, cancel: true)`
|
|
392
470
|
|
|
393
471
|
Source convenience methods:
|
|
394
472
|
|
|
395
473
|
- `Source#via(flow)`
|
|
396
474
|
- `Source#concat(source)`
|
|
397
475
|
- `Source#zip(source)`
|
|
476
|
+
- `Source#merge(source)`
|
|
398
477
|
- `Source#map { |element| ... }`
|
|
399
478
|
- `Source#parallel_map(concurrency:) { |element| ... }`
|
|
400
479
|
- `Source#ractor_map(workers:, input_transfer: :copy, output_transfer: :copy) { |element| ... }`
|
|
401
480
|
- `Source#select { |element| ... }`
|
|
402
481
|
- `Source#take(count)`
|
|
403
482
|
- `Source#drop(count)`
|
|
483
|
+
- `Source#grouped(count)`
|
|
404
484
|
- `Source#take_while { |element| ... }`
|
|
405
485
|
- `Source#drop_while { |element| ... }`
|
|
406
486
|
- `Source#async`
|
|
407
487
|
- `Source#buffer(count)`
|
|
408
488
|
- `Source#lines(chomp: true, max_length: nil)`
|
|
489
|
+
- `Source#split(separator, keep_separator: false, max_length: nil)`
|
|
409
490
|
- `Source#to(sink)`
|
|
410
491
|
- `Source#run_with(sink)`
|
|
411
492
|
|
|
@@ -417,14 +498,20 @@ Flows:
|
|
|
417
498
|
- `FiberStream::Flow.select { |element| ... }`
|
|
418
499
|
- `FiberStream::Flow.take(count)`
|
|
419
500
|
- `FiberStream::Flow.drop(count)`
|
|
501
|
+
- `FiberStream::Flow.grouped(count)`
|
|
420
502
|
- `FiberStream::Flow.take_while { |element| ... }`
|
|
421
503
|
- `FiberStream::Flow.drop_while { |element| ... }`
|
|
422
504
|
- `FiberStream::Flow.async`
|
|
423
505
|
- `FiberStream::Flow.buffer(count)`
|
|
424
506
|
- `FiberStream::Flow.lines(chomp: true, max_length: nil)`
|
|
507
|
+
- `FiberStream::Flow.split(separator, keep_separator: false, max_length: nil)`
|
|
425
508
|
- `Flow#via(flow)`
|
|
426
509
|
- `Flow#to(sink)`
|
|
427
510
|
|
|
511
|
+
`lines` and `split` default to `max_length: nil`, which allows one
|
|
512
|
+
unterminated line or frame to buffer without bound. Set a positive
|
|
513
|
+
`max_length` for untrusted, network-facing, or otherwise unbounded inputs.
|
|
514
|
+
|
|
428
515
|
Sinks:
|
|
429
516
|
|
|
430
517
|
- `FiberStream::Sink.to_a`
|
|
@@ -455,6 +542,7 @@ bundle exec ruby examples/backpressure_buffer.rb
|
|
|
455
542
|
bundle exec ruby examples/background_execution.rb
|
|
456
543
|
bundle exec ruby examples/ractor_map_hashing.rb
|
|
457
544
|
bundle exec ruby examples/ractor_port_source.rb
|
|
545
|
+
bundle exec ruby examples/ractor_merge_ports_and_map.rb
|
|
458
546
|
bundle exec ruby examples/async_http_requests.rb
|
|
459
547
|
bundle exec ruby examples/async_http_streaming_body.rb
|
|
460
548
|
```
|
|
@@ -468,6 +556,10 @@ with a shareable mapper proc and `input_transfer: :move`.
|
|
|
468
556
|
`examples/ractor_port_source.rb` demonstrates a producer Ractor that waits for
|
|
469
557
|
`RactorPort::Ack` before sending each `RactorPort::Element`.
|
|
470
558
|
|
|
559
|
+
`examples/ractor_merge_ports_and_map.rb` demonstrates CPU-bound producer
|
|
560
|
+
Ractors merged with `Source.ractor_merge_ports`, followed by CPU-bound
|
|
561
|
+
verification in `ractor_map` workers.
|
|
562
|
+
|
|
471
563
|
`examples/async_http_requests.rb` starts a local HTTP server and shows
|
|
472
564
|
FiberStream overlapping independent HTTP request waits with `parallel_map`.
|
|
473
565
|
|
|
@@ -480,6 +572,7 @@ Benchmark scripts live under `benchmarks/`.
|
|
|
480
572
|
```sh
|
|
481
573
|
bundle exec ruby benchmarks/stream_transform.rb
|
|
482
574
|
bundle exec ruby benchmarks/latency_overlap.rb
|
|
575
|
+
bundle exec ruby benchmarks/async_io_fanout.rb
|
|
483
576
|
bundle exec ruby benchmarks/heavy_cpu_map.rb
|
|
484
577
|
```
|
|
485
578
|
|
data/examples/README.md
CHANGED
|
@@ -11,6 +11,7 @@ bundle exec ruby examples/backpressure_buffer.rb
|
|
|
11
11
|
bundle exec ruby examples/background_execution.rb
|
|
12
12
|
bundle exec ruby examples/ractor_map_hashing.rb
|
|
13
13
|
bundle exec ruby examples/ractor_port_source.rb
|
|
14
|
+
bundle exec ruby examples/ractor_merge_ports_and_map.rb
|
|
14
15
|
bundle exec ruby examples/async_http_requests.rb
|
|
15
16
|
bundle exec ruby examples/async_http_streaming_body.rb
|
|
16
17
|
```
|
|
@@ -47,6 +48,10 @@ pipeline runs.
|
|
|
47
48
|
`RactorPort::Ack`, and sends one typed `RactorPort::Element` per downstream
|
|
48
49
|
demand.
|
|
49
50
|
|
|
51
|
+
`ractor_merge_ports_and_map.rb` runs CPU-bound work in multiple producer
|
|
52
|
+
Ractors, merges their port outputs with `Source.ractor_merge_ports`, then runs
|
|
53
|
+
another CPU-bound verification stage with `ractor_map`.
|
|
54
|
+
|
|
50
55
|
`async_http_requests.rb` starts a local HTTP server and compares serial
|
|
51
56
|
requests with FiberStream `parallel_map` requests. It keeps responses ordered
|
|
52
57
|
while overlapping independent network waits.
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
|
|
4
|
+
|
|
5
|
+
require "digest"
|
|
6
|
+
require "fiber_stream"
|
|
7
|
+
|
|
8
|
+
PRODUCER_JOBS = [
|
|
9
|
+
[
|
|
10
|
+
"producer-a",
|
|
11
|
+
[
|
|
12
|
+
{ name: "alpha.bin", payload: +"A" * 180_000, seed_rounds: 80, verify_rounds: 60 },
|
|
13
|
+
{ name: "bravo.bin", payload: +"B" * 140_000, seed_rounds: 70, verify_rounds: 55 }
|
|
14
|
+
]
|
|
15
|
+
],
|
|
16
|
+
[
|
|
17
|
+
"producer-b",
|
|
18
|
+
[
|
|
19
|
+
{ name: "charlie.bin", payload: +"C" * 220_000, seed_rounds: 85, verify_rounds: 65 },
|
|
20
|
+
{ name: "delta.bin", payload: +"D" * 120_000, seed_rounds: 75, verify_rounds: 50 }
|
|
21
|
+
]
|
|
22
|
+
]
|
|
23
|
+
].freeze
|
|
24
|
+
|
|
25
|
+
VERIFY_RECORD =
|
|
26
|
+
Ractor.shareable_proc do |record|
|
|
27
|
+
digest = record.fetch(:seed_sha256)
|
|
28
|
+
|
|
29
|
+
record.fetch(:verify_rounds).times do |index|
|
|
30
|
+
digest = Digest::SHA256.hexdigest("#{digest}:verify:#{index}")
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
record.merge(final_sha256: digest)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def spawn_digest_producer(data_port, producer_name, jobs)
|
|
37
|
+
setup_port = Ractor::Port.new
|
|
38
|
+
producer =
|
|
39
|
+
Ractor.new(data_port, setup_port, producer_name, jobs) do |outbox, setup, name, producer_jobs|
|
|
40
|
+
ack_port = Ractor::Port.new
|
|
41
|
+
setup.send(ack_port)
|
|
42
|
+
|
|
43
|
+
enumerator = producer_jobs.to_enum
|
|
44
|
+
sent = 0
|
|
45
|
+
|
|
46
|
+
loop do
|
|
47
|
+
case ack_port.receive
|
|
48
|
+
in FiberStream::RactorPort::Ack
|
|
49
|
+
begin
|
|
50
|
+
job = enumerator.next
|
|
51
|
+
digest = job.fetch(:payload)
|
|
52
|
+
|
|
53
|
+
job.fetch(:seed_rounds).times do |index|
|
|
54
|
+
digest = Digest::SHA256.hexdigest("#{digest}:#{name}:#{index}")
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
sent += 1
|
|
58
|
+
outbox.send(
|
|
59
|
+
FiberStream::RactorPort::Element.new(
|
|
60
|
+
{
|
|
61
|
+
producer: name,
|
|
62
|
+
name: job.fetch(:name),
|
|
63
|
+
bytes: job.fetch(:payload).bytesize,
|
|
64
|
+
seed_sha256: digest,
|
|
65
|
+
verify_rounds: job.fetch(:verify_rounds)
|
|
66
|
+
}
|
|
67
|
+
),
|
|
68
|
+
move: true
|
|
69
|
+
)
|
|
70
|
+
rescue StopIteration
|
|
71
|
+
outbox.send(FiberStream::RactorPort::Complete.new)
|
|
72
|
+
break [:completed, name, sent]
|
|
73
|
+
end
|
|
74
|
+
in FiberStream::RactorPort::Cancel[reason]
|
|
75
|
+
break [:cancelled, name, sent, reason]
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
[producer, setup_port.receive]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
port_pairs = []
|
|
84
|
+
producers =
|
|
85
|
+
PRODUCER_JOBS.map do |producer_name, jobs|
|
|
86
|
+
data_port = Ractor::Port.new
|
|
87
|
+
producer, ack_port = spawn_digest_producer(data_port, producer_name, jobs)
|
|
88
|
+
port_pairs << { port: data_port, ack_port: ack_port }
|
|
89
|
+
producer
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
records =
|
|
93
|
+
FiberStream::Source.ractor_merge_ports(port_pairs)
|
|
94
|
+
.ractor_map(workers: 2, input_transfer: :move, output_transfer: :move, &VERIFY_RECORD)
|
|
95
|
+
.run_with(FiberStream::Sink.to_a)
|
|
96
|
+
|
|
97
|
+
puts "Merged producer Ractors, then verified in ractor_map workers"
|
|
98
|
+
records.each do |record|
|
|
99
|
+
puts format(
|
|
100
|
+
"- %-10<producer>s %-11<name>s %7<bytes>d bytes %<final_sha256>s",
|
|
101
|
+
producer: record.fetch(:producer),
|
|
102
|
+
name: record.fetch(:name),
|
|
103
|
+
bytes: record.fetch(:bytes),
|
|
104
|
+
final_sha256: record.fetch(:final_sha256)
|
|
105
|
+
)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
puts
|
|
109
|
+
puts "Producer statuses:"
|
|
110
|
+
producers.each do |producer|
|
|
111
|
+
puts "- #{producer.value.inspect}"
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
puts
|
|
115
|
+
puts "Source.ractor_merge_ports emits producers in ready order."
|
|
116
|
+
puts "ractor_map preserves that merged input order while running verification in Ractor workers."
|
data/lib/fiber_stream/errors.rb
CHANGED
|
@@ -9,7 +9,10 @@ module FiberStream
|
|
|
9
9
|
#
|
|
10
10
|
# Producer failures, invalid protocol messages, and source-side Ractor port
|
|
11
11
|
# failures use this stable error shape so callers do not need to depend on
|
|
12
|
-
# Ruby's Ractor transport exceptions.
|
|
12
|
+
# Ruby's Ractor transport exceptions. For producer failures,
|
|
13
|
+
# `cause_class_name` and `cause_message` come from the producer's
|
|
14
|
+
# `RactorPort::Failure` envelope and are included in this error's public
|
|
15
|
+
# message.
|
|
13
16
|
class RactorPortSourceError < RuntimeError
|
|
14
17
|
attr_reader :kind, :cause_class_name, :cause_message, :original_cause
|
|
15
18
|
|
data/lib/fiber_stream/flow.rb
CHANGED
|
@@ -87,6 +87,19 @@ module FiberStream
|
|
|
87
87
|
new { |upstream| Pull.drop(upstream, count) }
|
|
88
88
|
end
|
|
89
89
|
|
|
90
|
+
# Creates a fixed-size grouping flow.
|
|
91
|
+
#
|
|
92
|
+
# The flow emits arrays containing up to `count` adjacent upstream elements.
|
|
93
|
+
# Full groups contain exactly `count` elements; normal upstream completion
|
|
94
|
+
# emits one final partial group when one exists. `count` must be a positive
|
|
95
|
+
# Integer.
|
|
96
|
+
def self.grouped(count)
|
|
97
|
+
raise TypeError, "count must be an Integer" unless count.is_a?(Integer)
|
|
98
|
+
raise ArgumentError, "count must be positive" unless count.positive?
|
|
99
|
+
|
|
100
|
+
new { |upstream| Pull.grouped(upstream, count) }
|
|
101
|
+
end
|
|
102
|
+
|
|
90
103
|
# Creates a predicate-based limiting flow.
|
|
91
104
|
#
|
|
92
105
|
# The flow emits leading elements while the block result is truthy. The
|
|
@@ -142,7 +155,9 @@ module FiberStream
|
|
|
142
155
|
#
|
|
143
156
|
# The flow accepts String chunks and emits lines split on "\n". By default
|
|
144
157
|
# it chomps the trailing newline and one preceding "\r". `max_length` is an
|
|
145
|
-
# optional per-line bytesize limit.
|
|
158
|
+
# optional per-line bytesize limit. With `max_length: nil`, one
|
|
159
|
+
# unterminated line can buffer without bound. Set a positive `max_length`
|
|
160
|
+
# for untrusted, network-facing, or otherwise unbounded streams.
|
|
146
161
|
def self.lines(chomp: true, max_length: nil)
|
|
147
162
|
raise TypeError, "chomp must be true or false" unless [true, false].include?(chomp)
|
|
148
163
|
unless max_length.nil? || max_length.is_a?(Integer)
|
|
@@ -153,6 +168,27 @@ module FiberStream
|
|
|
153
168
|
new { |upstream| Pull.lines(upstream, chomp, max_length) }
|
|
154
169
|
end
|
|
155
170
|
|
|
171
|
+
# Creates a delimiter-splitting flow.
|
|
172
|
+
#
|
|
173
|
+
# The flow accepts String chunks and emits frames split on the non-empty
|
|
174
|
+
# String `separator`. Separator matching is byte-oriented. By default
|
|
175
|
+
# emitted frames exclude the separator; `keep_separator: true` preserves it
|
|
176
|
+
# on separator-terminated frames. `max_length` is an optional per-frame body
|
|
177
|
+
# bytesize limit. With `max_length: nil`, one unterminated frame can buffer
|
|
178
|
+
# without bound. Set a positive `max_length` for untrusted, network-facing,
|
|
179
|
+
# or otherwise unbounded streams.
|
|
180
|
+
def self.split(separator, keep_separator: false, max_length: nil)
|
|
181
|
+
raise TypeError, "separator must be String" unless separator.is_a?(String)
|
|
182
|
+
raise ArgumentError, "separator must not be empty" if separator.empty?
|
|
183
|
+
raise TypeError, "keep_separator must be true or false" unless [true, false].include?(keep_separator)
|
|
184
|
+
unless max_length.nil? || max_length.is_a?(Integer)
|
|
185
|
+
raise TypeError, "max_length must be nil or an Integer"
|
|
186
|
+
end
|
|
187
|
+
raise ArgumentError, "max_length must be positive" if max_length&.<= 0
|
|
188
|
+
|
|
189
|
+
new { |upstream| Pull.split(upstream, separator, keep_separator, max_length) }
|
|
190
|
+
end
|
|
191
|
+
|
|
156
192
|
def self.validate_ractor_transfer_policy!(name, value)
|
|
157
193
|
return if [:copy, :move].include?(value)
|
|
158
194
|
|
|
@@ -9,12 +9,18 @@ module FiberStream
|
|
|
9
9
|
# time back to the downstream caller, so it adds an async boundary without
|
|
10
10
|
# adding prefetch.
|
|
11
11
|
class AsyncBoundary
|
|
12
|
+
ValueMessage = Data.define(:value)
|
|
13
|
+
DoneMessage = Data.define
|
|
14
|
+
ErrorMessage = Data.define(:error)
|
|
15
|
+
private_constant :ValueMessage, :DoneMessage, :ErrorMessage
|
|
16
|
+
|
|
12
17
|
def initialize(upstream)
|
|
13
18
|
@upstream = upstream
|
|
14
19
|
@producer = nil
|
|
15
20
|
@started = false
|
|
16
21
|
@closed = false
|
|
17
22
|
@done = false
|
|
23
|
+
@upstream_closed = false
|
|
18
24
|
end
|
|
19
25
|
|
|
20
26
|
def next
|
|
@@ -23,14 +29,14 @@ module FiberStream
|
|
|
23
29
|
start
|
|
24
30
|
message = @producer.resume
|
|
25
31
|
|
|
26
|
-
case message
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
32
|
+
case message
|
|
33
|
+
in ValueMessage[value:]
|
|
34
|
+
value
|
|
35
|
+
in DoneMessage
|
|
30
36
|
complete
|
|
31
|
-
|
|
37
|
+
in ErrorMessage[error:]
|
|
32
38
|
@done = true
|
|
33
|
-
raise
|
|
39
|
+
raise error
|
|
34
40
|
end
|
|
35
41
|
end
|
|
36
42
|
|
|
@@ -39,7 +45,7 @@ module FiberStream
|
|
|
39
45
|
|
|
40
46
|
@closed = true
|
|
41
47
|
@done = true
|
|
42
|
-
|
|
48
|
+
close_upstream
|
|
43
49
|
ensure
|
|
44
50
|
cancel_producer
|
|
45
51
|
end
|
|
@@ -60,16 +66,16 @@ module FiberStream
|
|
|
60
66
|
|
|
61
67
|
value = @upstream.next
|
|
62
68
|
if Pull.done?(value)
|
|
63
|
-
Fiber.yield(
|
|
69
|
+
Fiber.yield(DoneMessage.new)
|
|
64
70
|
break
|
|
65
71
|
end
|
|
66
72
|
|
|
67
|
-
Fiber.yield(
|
|
73
|
+
Fiber.yield(ValueMessage.new(value:))
|
|
68
74
|
end
|
|
69
75
|
rescue StandardError => exception
|
|
70
|
-
Fiber.yield(
|
|
76
|
+
Fiber.yield(ErrorMessage.new(error: exception)) unless @closed
|
|
71
77
|
ensure
|
|
72
|
-
|
|
78
|
+
close_upstream
|
|
73
79
|
end
|
|
74
80
|
|
|
75
81
|
def complete
|
|
@@ -77,7 +83,18 @@ module FiberStream
|
|
|
77
83
|
DONE
|
|
78
84
|
end
|
|
79
85
|
|
|
86
|
+
def close_upstream
|
|
87
|
+
return if @upstream_closed
|
|
88
|
+
|
|
89
|
+
@upstream_closed = true
|
|
90
|
+
@upstream.close
|
|
91
|
+
end
|
|
92
|
+
|
|
80
93
|
def cancel_producer
|
|
94
|
+
return unless @producer&.alive?
|
|
95
|
+
|
|
96
|
+
@producer.kill
|
|
97
|
+
rescue StandardError
|
|
81
98
|
nil
|
|
82
99
|
end
|
|
83
100
|
end
|
|
@@ -9,10 +9,17 @@ module FiberStream
|
|
|
9
9
|
# queue capacity plus in-flight producer/consumer work. Close is responsible
|
|
10
10
|
# for closing upstream and waking any producer blocked on a full queue.
|
|
11
11
|
class BufferBoundary
|
|
12
|
+
CancellationError = Class.new(StandardError)
|
|
13
|
+
ValueMessage = Data.define(:value)
|
|
14
|
+
DoneMessage = Data.define
|
|
15
|
+
ErrorMessage = Data.define(:error)
|
|
16
|
+
private_constant :CancellationError, :ValueMessage, :DoneMessage, :ErrorMessage
|
|
17
|
+
|
|
12
18
|
def initialize(upstream, count)
|
|
13
19
|
@upstream = upstream
|
|
14
20
|
@queue = Thread::SizedQueue.new(count)
|
|
15
21
|
@producer = nil
|
|
22
|
+
@scheduler = nil
|
|
16
23
|
@started = false
|
|
17
24
|
@closed = false
|
|
18
25
|
@done = false
|
|
@@ -27,14 +34,14 @@ module FiberStream
|
|
|
27
34
|
message = @queue.pop
|
|
28
35
|
return complete if message.nil?
|
|
29
36
|
|
|
30
|
-
case message
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
37
|
+
case message
|
|
38
|
+
in ValueMessage[value:]
|
|
39
|
+
value
|
|
40
|
+
in DoneMessage
|
|
34
41
|
complete
|
|
35
|
-
|
|
42
|
+
in ErrorMessage[error:]
|
|
36
43
|
@done = true
|
|
37
|
-
raise
|
|
44
|
+
raise error
|
|
38
45
|
end
|
|
39
46
|
end
|
|
40
47
|
|
|
@@ -58,7 +65,9 @@ module FiberStream
|
|
|
58
65
|
raise SchedulerRequiredError, "Flow.buffer requires Fiber.scheduler" unless Fiber.scheduler
|
|
59
66
|
|
|
60
67
|
@started = true
|
|
68
|
+
@scheduler = Fiber.scheduler
|
|
61
69
|
@producer = Fiber.schedule { run_producer }
|
|
70
|
+
cancel_producer if @closed
|
|
62
71
|
end
|
|
63
72
|
|
|
64
73
|
def run_producer
|
|
@@ -67,8 +76,10 @@ module FiberStream
|
|
|
67
76
|
|
|
68
77
|
message = pull_message
|
|
69
78
|
break unless deliver(message)
|
|
70
|
-
break unless message.
|
|
79
|
+
break unless message.is_a?(ValueMessage)
|
|
71
80
|
end
|
|
81
|
+
rescue CancellationError
|
|
82
|
+
nil
|
|
72
83
|
ensure
|
|
73
84
|
@upstream_close_error ||= close_upstream unless @upstream_closed
|
|
74
85
|
end
|
|
@@ -77,15 +88,17 @@ module FiberStream
|
|
|
77
88
|
value = @upstream.next
|
|
78
89
|
return terminal_done_message if Pull.done?(value)
|
|
79
90
|
|
|
80
|
-
|
|
91
|
+
ValueMessage.new(value:)
|
|
92
|
+
rescue CancellationError
|
|
93
|
+
raise
|
|
81
94
|
rescue StandardError => error
|
|
82
95
|
close_upstream(record_error: false)
|
|
83
|
-
|
|
96
|
+
ErrorMessage.new(error:)
|
|
84
97
|
end
|
|
85
98
|
|
|
86
99
|
def terminal_done_message
|
|
87
100
|
close_error = close_upstream
|
|
88
|
-
close_error ?
|
|
101
|
+
close_error ? ErrorMessage.new(error: close_error) : DoneMessage.new
|
|
89
102
|
end
|
|
90
103
|
|
|
91
104
|
def deliver(message)
|
|
@@ -116,6 +129,11 @@ module FiberStream
|
|
|
116
129
|
end
|
|
117
130
|
|
|
118
131
|
def cancel_producer
|
|
132
|
+
return unless @producer&.alive?
|
|
133
|
+
return unless @scheduler.respond_to?(:fiber_interrupt)
|
|
134
|
+
|
|
135
|
+
@scheduler.fiber_interrupt(@producer, CancellationError.new)
|
|
136
|
+
rescue NotImplementedError, StandardError
|
|
119
137
|
nil
|
|
120
138
|
end
|
|
121
139
|
end
|
|
@@ -8,7 +8,7 @@ module FiberStream
|
|
|
8
8
|
def initialize(left_materializer, right_materializer)
|
|
9
9
|
@left_materializer = left_materializer
|
|
10
10
|
@right_materializer = right_materializer
|
|
11
|
-
@left =
|
|
11
|
+
@left = nil
|
|
12
12
|
@right = nil
|
|
13
13
|
@phase = :left
|
|
14
14
|
@closed = false
|
|
@@ -38,6 +38,7 @@ module FiberStream
|
|
|
38
38
|
private
|
|
39
39
|
|
|
40
40
|
def next_left
|
|
41
|
+
materialize_left
|
|
41
42
|
value = @left.next
|
|
42
43
|
return value unless Pull.done?(value)
|
|
43
44
|
|
|
@@ -56,6 +57,13 @@ module FiberStream
|
|
|
56
57
|
DONE
|
|
57
58
|
end
|
|
58
59
|
|
|
60
|
+
def materialize_left
|
|
61
|
+
return if @left
|
|
62
|
+
|
|
63
|
+
stream = @left_materializer.call
|
|
64
|
+
@left = stream
|
|
65
|
+
end
|
|
66
|
+
|
|
59
67
|
def close_left
|
|
60
68
|
stream = @left
|
|
61
69
|
return unless stream
|