fiber_stream 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +57 -0
- data/README.md +283 -12
- data/examples/README.md +10 -0
- data/examples/async_http_streaming_body.rb +115 -0
- data/examples/ractor_merge_ports_and_map.rb +116 -0
- data/lib/fiber_stream/errors.rb +4 -1
- data/lib/fiber_stream/flow.rb +74 -1
- data/lib/fiber_stream/pull/async_boundary.rb +28 -11
- data/lib/fiber_stream/pull/buffer_boundary.rb +28 -10
- data/lib/fiber_stream/pull/concat.rb +103 -0
- data/lib/fiber_stream/pull/drop.rb +58 -0
- data/lib/fiber_stream/pull/drop_while.rb +61 -0
- data/lib/fiber_stream/pull/grouped.rb +46 -0
- data/lib/fiber_stream/pull/merge.rb +230 -0
- data/lib/fiber_stream/pull/parallel_map_boundary.rb +28 -24
- data/lib/fiber_stream/pull/ractor_map_boundary.rb +103 -79
- data/lib/fiber_stream/pull/ractor_merge_ports_source.rb +358 -0
- data/lib/fiber_stream/pull/ractor_port_source.rb +14 -14
- data/lib/fiber_stream/pull/split.rb +134 -0
- data/lib/fiber_stream/pull/take_while.rb +42 -0
- data/lib/fiber_stream/pull/zip.rb +83 -0
- data/lib/fiber_stream/pull.rb +48 -3
- data/lib/fiber_stream/ractor_port.rb +3 -1
- data/lib/fiber_stream/running_pipeline.rb +18 -8
- data/lib/fiber_stream/sink.rb +24 -0
- data/lib/fiber_stream/source.rb +190 -7
- data/lib/fiber_stream/version.rb +1 -1
- data/sig/fiber_stream.rbs +18 -1
- metadata +27 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2401496eff99cd4792deda8fa927688fc3ba0e97e8a35637db4395895ab04cd9
|
|
4
|
+
data.tar.gz: 9f9674e2bc7dc9ce49b899727a02b9158ce6c9629c744ec6d626b16923e14160
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e7237c7c15b66105b09cdccf1e52980f3c030c74acbd2dc7cc3b5cfb720a716836925b5801bdc59351131dcd7ecab72ebdedb6b50593f5258a1cff2d0ebb39a9
|
|
7
|
+
data.tar.gz: d9bc7c85992c344bef4a36b0f234840c154c06274c450ef0696246d3042a017f8764c22ab1a2df43e3059420c52af7827852e63b4a5e91d668f4c9bb6a191d57
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,62 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.3.0 - 2026-06-06
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- `Flow.grouped(count)` and `Source#grouped(count)` for fixed-size batches
|
|
8
|
+
with final partial-group emission.
|
|
9
|
+
- `Source#merge(source)` for scheduler-backed ready-order merging of two
|
|
10
|
+
sources while preserving each input source's own order.
|
|
11
|
+
- `Source.ractor_merge_ports(ports)` for backpressure-aware merging of
|
|
12
|
+
multiple producer Ractor ports without requiring a `Fiber.scheduler`.
|
|
13
|
+
- `Flow.split(separator)` and `Source#split(separator)` for delimiter-based
|
|
14
|
+
framing with optional separator retention and per-frame length limits.
|
|
15
|
+
- Benchmarks and examples for async IO fanout, stream lifecycle probes, and
|
|
16
|
+
Ractor port merge workflows.
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
|
|
20
|
+
- Reworked flow operator tests into focused per-operator test files.
|
|
21
|
+
- Expanded README and repository documentation for source merging, Ractor port
|
|
22
|
+
merging, split framing, grouped batches, and runtime safety guidance.
|
|
23
|
+
- Clarified that `Flow.lines(max_length: nil)` and
|
|
24
|
+
`Flow.split(max_length: nil)` may buffer one unterminated frame without
|
|
25
|
+
bound, and documented explicit `max_length` usage for untrusted streams.
|
|
26
|
+
- Clarified `Source.io` `chunk_size` allocation behavior and Ractor failure
|
|
27
|
+
metadata exposure.
|
|
28
|
+
|
|
29
|
+
### Fixed
|
|
30
|
+
|
|
31
|
+
- Deferred `Source#concat` receiver materialization until downstream demand
|
|
32
|
+
reaches the concatenated source.
|
|
33
|
+
- Cancelled async and buffer producers when downstream closes early.
|
|
34
|
+
- Removed polling from Ractor map enqueue and cleanup paths.
|
|
35
|
+
- Re-raised background pipeline process-control exceptions instead of treating
|
|
36
|
+
them as ordinary stream failures.
|
|
37
|
+
- Hardened Ractor map worker teardown notifications so secondary send failures
|
|
38
|
+
do not cascade during shutdown.
|
|
39
|
+
|
|
40
|
+
## 0.2.0 - 2026-06-05
|
|
41
|
+
|
|
42
|
+
### Added
|
|
43
|
+
|
|
44
|
+
- `Source#zip(source)` for element-wise pairing of two sources with
|
|
45
|
+
demand-driven materialization and shortest-source completion.
|
|
46
|
+
- `Source#concat(source)` for lazy source concatenation.
|
|
47
|
+
- `Sink.foreach { |element| ... }` for side-effecting stream consumption
|
|
48
|
+
without accumulating elements.
|
|
49
|
+
- `Flow.drop(count)` and `Source#drop(count)` for fixed-prefix dropping.
|
|
50
|
+
- `Flow.take_while { |element| ... }` and `Source#take_while { |element| ... }`
|
|
51
|
+
for predicate-based prefix limiting.
|
|
52
|
+
- `Flow.drop_while { |element| ... }` and
|
|
53
|
+
`Source#drop_while { |element| ... }` for predicate-based prefix dropping.
|
|
54
|
+
|
|
55
|
+
### Changed
|
|
56
|
+
|
|
57
|
+
- Clarified documentation around FiberStream's linear roadmap and Ractor port
|
|
58
|
+
cancellation contract.
|
|
59
|
+
|
|
3
60
|
## 0.1.0 - 2026-06-03
|
|
4
61
|
|
|
5
62
|
Initial release.
|
data/README.md
CHANGED
|
@@ -27,17 +27,18 @@ FiberStream currently supports linear pipelines only.
|
|
|
27
27
|
|
|
28
28
|
Implemented capabilities:
|
|
29
29
|
|
|
30
|
-
- in-memory, IO,
|
|
31
|
-
-
|
|
32
|
-
|
|
33
|
-
-
|
|
30
|
+
- in-memory, IO, backpressure-aware Ractor port, and Ractor port merge sources
|
|
31
|
+
- lazy source concatenation, zipping, and scheduler-backed merging
|
|
32
|
+
- mapping, filtering, limiting, predicate-based limiting and dropping,
|
|
33
|
+
fixed-prefix dropping, fixed-size grouping, line splitting, buffering, async
|
|
34
|
+
boundaries, ordered parallel mapping, and ordered Ractor-backed mapping
|
|
35
|
+
- array, first-element, fold, foreach, and IO sinks
|
|
34
36
|
- reusable flow composition and runnable pipelines
|
|
35
37
|
- foreground and scheduler-backed background pipeline execution
|
|
36
38
|
- public RBS signatures
|
|
37
39
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
- graph DSLs
|
|
40
|
+
FiberStream intentionally keeps the public model linear: one source, an
|
|
41
|
+
ordered chain of flows, and one sink.
|
|
41
42
|
|
|
42
43
|
## Core Concepts
|
|
43
44
|
|
|
@@ -52,8 +53,35 @@ source = FiberStream::Source.each([1, 2, 3])
|
|
|
52
53
|
source.run_with(FiberStream::Sink.to_a) # => [1, 2, 3]
|
|
53
54
|
```
|
|
54
55
|
|
|
56
|
+
Sources can be concatenated without materializing the appended source until the
|
|
57
|
+
first source completes:
|
|
58
|
+
|
|
59
|
+
```ruby
|
|
60
|
+
result =
|
|
61
|
+
FiberStream::Source.each([1, 2])
|
|
62
|
+
.concat(FiberStream::Source.each([3, 4]))
|
|
63
|
+
.run_with(FiberStream::Sink.to_a)
|
|
64
|
+
|
|
65
|
+
result # => [1, 2, 3, 4]
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Sources can also be zipped element-by-element. The zipped source emits pairs
|
|
69
|
+
and completes when either input source completes:
|
|
70
|
+
|
|
71
|
+
```ruby
|
|
72
|
+
result =
|
|
73
|
+
FiberStream::Source.each([1, 2, 3])
|
|
74
|
+
.zip(FiberStream::Source.each(["a", "b"]))
|
|
75
|
+
.run_with(FiberStream::Sink.to_a)
|
|
76
|
+
|
|
77
|
+
result # => [[1, "a"], [2, "b"]]
|
|
78
|
+
```
|
|
79
|
+
|
|
55
80
|
IO sources read chunks on demand and require a scheduler-backed non-blocking
|
|
56
|
-
fiber
|
|
81
|
+
fiber. The `chunk_size` option is the maximum byte count passed to
|
|
82
|
+
`readpartial` for one downstream pull; very large values may cause the IO
|
|
83
|
+
implementation to attempt large allocations, so choose a bounded value
|
|
84
|
+
appropriate for the workload:
|
|
57
85
|
|
|
58
86
|
```ruby
|
|
59
87
|
require "async"
|
|
@@ -107,6 +135,58 @@ FiberStream::Source.ractor_port(data_port, ack_port: ack_port)
|
|
|
107
135
|
producer.value
|
|
108
136
|
```
|
|
109
137
|
|
|
138
|
+
`RactorPort::Failure` cause metadata is producer-provided and is surfaced on
|
|
139
|
+
`RactorPortSourceError`. Redact internal paths, secrets, tenant data, or other
|
|
140
|
+
sensitive details before sending failures across trust boundaries.
|
|
141
|
+
|
|
142
|
+
Multiple producer Ractors can be merged directly without a scheduler-backed
|
|
143
|
+
`Source#merge`. Each producer still receives at most one outstanding ack:
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
source =
|
|
147
|
+
FiberStream::Source.ractor_merge_ports(
|
|
148
|
+
[
|
|
149
|
+
{ port: data_port_a, ack_port: ack_port_a },
|
|
150
|
+
{ port: data_port_b, ack_port: ack_port_b }
|
|
151
|
+
]
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
values = source.run_with(FiberStream::Sink.to_a)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Streaming HTTP response bodies that implement `#each`, such as
|
|
158
|
+
`async-http` response bodies, can be used with `Source.each` without buffering
|
|
159
|
+
the full body first. Use the HTTP client's block form or an explicit `ensure`
|
|
160
|
+
close because `Source.each` does not own the response body:
|
|
161
|
+
|
|
162
|
+
```ruby
|
|
163
|
+
require "async"
|
|
164
|
+
require "async/http/internet/instance"
|
|
165
|
+
require "fiber_stream"
|
|
166
|
+
|
|
167
|
+
url = "https://raw.githubusercontent.com/elastic/examples/master/" \
|
|
168
|
+
"Common%20Data%20Formats/nginx_logs/nginx_logs"
|
|
169
|
+
|
|
170
|
+
status_counts = Hash.new(0)
|
|
171
|
+
|
|
172
|
+
processed =
|
|
173
|
+
Sync do
|
|
174
|
+
Async::HTTP::Internet.get(url) do |response|
|
|
175
|
+
raise "unexpected status #{response.status}" unless response.status == 200
|
|
176
|
+
|
|
177
|
+
FiberStream::Source.each(response.body)
|
|
178
|
+
.lines(max_length: 16 * 1024)
|
|
179
|
+
.map { |line| line.split.fetch(8, nil) }
|
|
180
|
+
.select { |status| status&.match?(/\A\d{3}\z/) }
|
|
181
|
+
.run_with(
|
|
182
|
+
FiberStream::Sink.foreach do |status|
|
|
183
|
+
status_counts[status] += 1
|
|
184
|
+
end
|
|
185
|
+
)
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
```
|
|
189
|
+
|
|
110
190
|
### Flows
|
|
111
191
|
|
|
112
192
|
Flows transform a stream lazily. Convenience methods on `Source` delegate to
|
|
@@ -136,6 +216,64 @@ FiberStream::Source.each([" a ", "", " b "])
|
|
|
136
216
|
# => ["a", "b"]
|
|
137
217
|
```
|
|
138
218
|
|
|
219
|
+
Use `parallel_map` for ordered scheduler-backed mapping when each element
|
|
220
|
+
waits on non-blocking IO. It preserves input order while allowing up to
|
|
221
|
+
`concurrency` mapping operations to be in flight:
|
|
222
|
+
|
|
223
|
+
```ruby
|
|
224
|
+
require "async"
|
|
225
|
+
require "fiber_stream"
|
|
226
|
+
|
|
227
|
+
def fetch_profile(user_id)
|
|
228
|
+
# Example: perform scheduler-aware HTTP, database, or socket IO here.
|
|
229
|
+
sleep 0.05
|
|
230
|
+
{ id: user_id, name: "user-#{user_id}" }
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
profiles =
|
|
234
|
+
Sync do
|
|
235
|
+
FiberStream::Source.each([1, 2, 3, 4])
|
|
236
|
+
.parallel_map(concurrency: 4) { |user_id| fetch_profile(user_id) }
|
|
237
|
+
.run_with(FiberStream::Sink.to_a)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
profiles.map { |profile| profile.fetch(:id) } # => [1, 2, 3, 4]
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Use `ractor_map` for ordered CPU-bound mapping in Ractor workers. The mapper
|
|
244
|
+
must be shareable, usually by creating it with `Ractor.shareable_proc`.
|
|
245
|
+
|
|
246
|
+
```ruby
|
|
247
|
+
require "digest"
|
|
248
|
+
require "fiber_stream"
|
|
249
|
+
|
|
250
|
+
records = [
|
|
251
|
+
{ name: "alpha.bin", payload: +"A" * 200_000 },
|
|
252
|
+
{ name: "bravo.bin", payload: +"B" * 120_000 }
|
|
253
|
+
]
|
|
254
|
+
|
|
255
|
+
HASH_RECORD =
|
|
256
|
+
Ractor.shareable_proc do |record|
|
|
257
|
+
payload = record.fetch(:payload)
|
|
258
|
+
|
|
259
|
+
{
|
|
260
|
+
name: record.fetch(:name),
|
|
261
|
+
bytes: payload.bytesize,
|
|
262
|
+
sha256: Digest::SHA256.hexdigest(payload)
|
|
263
|
+
}
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
digests =
|
|
267
|
+
FiberStream::Source.each(records)
|
|
268
|
+
.ractor_map(workers: 2, input_transfer: :move, &HASH_RECORD)
|
|
269
|
+
.run_with(FiberStream::Sink.to_a)
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
`ractor_map` preserves input order, limits pulled-but-unemitted work to
|
|
273
|
+
`workers`, and does not require `Fiber.scheduler`. Use `input_transfer: :move`
|
|
274
|
+
or `output_transfer: :move` only when the moved object will not be reused by
|
|
275
|
+
the sender.
|
|
276
|
+
|
|
139
277
|
### Sinks
|
|
140
278
|
|
|
141
279
|
A `Sink` consumes the stream and returns a materialized value.
|
|
@@ -146,6 +284,17 @@ FiberStream::Source.each([1, 2, 3])
|
|
|
146
284
|
# => 6
|
|
147
285
|
```
|
|
148
286
|
|
|
287
|
+
Use `Sink.foreach` when the terminal operation is a side effect and the stream
|
|
288
|
+
values should not be accumulated:
|
|
289
|
+
|
|
290
|
+
```ruby
|
|
291
|
+
count =
|
|
292
|
+
FiberStream::Source.each(["a", "b", "c"])
|
|
293
|
+
.run_with(FiberStream::Sink.foreach { |value| puts value })
|
|
294
|
+
|
|
295
|
+
count # => 3
|
|
296
|
+
```
|
|
297
|
+
|
|
149
298
|
### Pipelines
|
|
150
299
|
|
|
151
300
|
`Source#to(sink)` creates a reusable runnable pipeline.
|
|
@@ -212,11 +361,103 @@ limited =
|
|
|
212
361
|
limited # => [1, 2]
|
|
213
362
|
```
|
|
214
363
|
|
|
364
|
+
`Flow.drop` skips a fixed prefix and then passes later elements through:
|
|
365
|
+
|
|
366
|
+
```ruby
|
|
367
|
+
tail =
|
|
368
|
+
FiberStream::Source.each([1, 2, 3, 4])
|
|
369
|
+
.drop(2)
|
|
370
|
+
.run_with(FiberStream::Sink.to_a)
|
|
371
|
+
|
|
372
|
+
tail # => [3, 4]
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
`Flow.grouped` batches adjacent elements into arrays and emits the final
|
|
376
|
+
partial group:
|
|
377
|
+
|
|
378
|
+
```ruby
|
|
379
|
+
batches =
|
|
380
|
+
FiberStream::Source.each([1, 2, 3, 4, 5])
|
|
381
|
+
.grouped(2)
|
|
382
|
+
.run_with(FiberStream::Sink.to_a)
|
|
383
|
+
|
|
384
|
+
batches # => [[1, 2], [3, 4], [5]]
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
`Flow.take_while` emits the leading prefix while a predicate is truthy, then
|
|
388
|
+
closes upstream at the first false or nil result:
|
|
389
|
+
|
|
390
|
+
```ruby
|
|
391
|
+
prefix =
|
|
392
|
+
FiberStream::Source.each([1, 2, 3, 1])
|
|
393
|
+
.take_while { |number| number < 3 }
|
|
394
|
+
.run_with(FiberStream::Sink.to_a)
|
|
395
|
+
|
|
396
|
+
prefix # => [1, 2]
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
`Flow.drop_while` skips the leading prefix while a predicate is truthy, then
|
|
400
|
+
passes the first false or nil result and all later elements through:
|
|
401
|
+
|
|
402
|
+
```ruby
|
|
403
|
+
tail =
|
|
404
|
+
FiberStream::Source.each([1, 2, 3, 1])
|
|
405
|
+
.drop_while { |number| number < 3 }
|
|
406
|
+
.run_with(FiberStream::Sink.to_a)
|
|
407
|
+
|
|
408
|
+
tail # => [3, 1]
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
`Source#concat` preserves pull-driven demand across source boundaries. The
|
|
412
|
+
appended source is not materialized while the first source can still satisfy
|
|
413
|
+
downstream demand:
|
|
414
|
+
|
|
415
|
+
```ruby
|
|
416
|
+
first =
|
|
417
|
+
FiberStream::Source.each([1])
|
|
418
|
+
.concat(FiberStream::Source.each([2]))
|
|
419
|
+
.run_with(FiberStream::Sink.first)
|
|
420
|
+
|
|
421
|
+
first # => 1
|
|
422
|
+
```
|
|
423
|
+
|
|
424
|
+
`Source#zip` keeps input source materialization behind downstream demand. The
|
|
425
|
+
other source is not materialized until the receiver has produced an element for
|
|
426
|
+
a pair:
|
|
427
|
+
|
|
428
|
+
```ruby
|
|
429
|
+
first =
|
|
430
|
+
FiberStream::Source.each([1])
|
|
431
|
+
.zip(FiberStream::Source.each([2]))
|
|
432
|
+
.run_with(FiberStream::Sink.first)
|
|
433
|
+
|
|
434
|
+
first # => [1, 2]
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
`Source#merge` emits values from either input source in scheduler-observed
|
|
438
|
+
ready order while preserving each input's own order:
|
|
439
|
+
|
|
440
|
+
```ruby
|
|
441
|
+
merged =
|
|
442
|
+
Sync do
|
|
443
|
+
FiberStream::Source.each([1, 2])
|
|
444
|
+
.merge(FiberStream::Source.each(["a", "b"]))
|
|
445
|
+
.run_with(FiberStream::Sink.to_a)
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
# Example result: [1, "a", 2, "b"]
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
`merge` does not make scheduler-unaware blocking source work non-blocking and
|
|
452
|
+
does not provide CPU parallelism. Use producer ractors with
|
|
453
|
+
`Source.ractor_port` or `Source.ractor_merge_ports` when producer work needs
|
|
454
|
+
true isolation.
|
|
455
|
+
|
|
215
456
|
`Flow.buffer(count)` allows bounded prefetch. `Flow.async`, `Flow.buffer`,
|
|
216
|
-
`Flow.parallel_map`, `Source.io`, `Sink.io`, and
|
|
217
|
-
installed `Fiber.scheduler` and a non-blocking
|
|
218
|
-
started. FiberStream does not install a
|
|
219
|
-
at runtime.
|
|
457
|
+
`Flow.parallel_map`, `Source.io`, `Source#merge`, `Sink.io`, and
|
|
458
|
+
`Pipeline#run_async` require an installed `Fiber.scheduler` and a non-blocking
|
|
459
|
+
current fiber when demanded or started. FiberStream does not install a
|
|
460
|
+
scheduler and does not depend on Async at runtime.
|
|
220
461
|
|
|
221
462
|
## API Surface
|
|
222
463
|
|
|
@@ -225,18 +466,27 @@ Sources:
|
|
|
225
466
|
- `FiberStream::Source.each(enumerable)`
|
|
226
467
|
- `FiberStream::Source.io(io, chunk_size: 16 * 1024, close: false)`
|
|
227
468
|
- `FiberStream::Source.ractor_port(port, ack_port:, ack_transfer: :copy, cancel: true)`
|
|
469
|
+
- `FiberStream::Source.ractor_merge_ports(ports, ack_transfer: :copy, cancel: true)`
|
|
228
470
|
|
|
229
471
|
Source convenience methods:
|
|
230
472
|
|
|
231
473
|
- `Source#via(flow)`
|
|
474
|
+
- `Source#concat(source)`
|
|
475
|
+
- `Source#zip(source)`
|
|
476
|
+
- `Source#merge(source)`
|
|
232
477
|
- `Source#map { |element| ... }`
|
|
233
478
|
- `Source#parallel_map(concurrency:) { |element| ... }`
|
|
234
479
|
- `Source#ractor_map(workers:, input_transfer: :copy, output_transfer: :copy) { |element| ... }`
|
|
235
480
|
- `Source#select { |element| ... }`
|
|
236
481
|
- `Source#take(count)`
|
|
482
|
+
- `Source#drop(count)`
|
|
483
|
+
- `Source#grouped(count)`
|
|
484
|
+
- `Source#take_while { |element| ... }`
|
|
485
|
+
- `Source#drop_while { |element| ... }`
|
|
237
486
|
- `Source#async`
|
|
238
487
|
- `Source#buffer(count)`
|
|
239
488
|
- `Source#lines(chomp: true, max_length: nil)`
|
|
489
|
+
- `Source#split(separator, keep_separator: false, max_length: nil)`
|
|
240
490
|
- `Source#to(sink)`
|
|
241
491
|
- `Source#run_with(sink)`
|
|
242
492
|
|
|
@@ -247,17 +497,27 @@ Flows:
|
|
|
247
497
|
- `FiberStream::Flow.ractor_map(workers:, input_transfer: :copy, output_transfer: :copy) { |element| ... }`
|
|
248
498
|
- `FiberStream::Flow.select { |element| ... }`
|
|
249
499
|
- `FiberStream::Flow.take(count)`
|
|
500
|
+
- `FiberStream::Flow.drop(count)`
|
|
501
|
+
- `FiberStream::Flow.grouped(count)`
|
|
502
|
+
- `FiberStream::Flow.take_while { |element| ... }`
|
|
503
|
+
- `FiberStream::Flow.drop_while { |element| ... }`
|
|
250
504
|
- `FiberStream::Flow.async`
|
|
251
505
|
- `FiberStream::Flow.buffer(count)`
|
|
252
506
|
- `FiberStream::Flow.lines(chomp: true, max_length: nil)`
|
|
507
|
+
- `FiberStream::Flow.split(separator, keep_separator: false, max_length: nil)`
|
|
253
508
|
- `Flow#via(flow)`
|
|
254
509
|
- `Flow#to(sink)`
|
|
255
510
|
|
|
511
|
+
`lines` and `split` default to `max_length: nil`, which allows one
|
|
512
|
+
unterminated line or frame to buffer without bound. Set a positive
|
|
513
|
+
`max_length` for untrusted, network-facing, or otherwise unbounded inputs.
|
|
514
|
+
|
|
256
515
|
Sinks:
|
|
257
516
|
|
|
258
517
|
- `FiberStream::Sink.to_a`
|
|
259
518
|
- `FiberStream::Sink.first`
|
|
260
519
|
- `FiberStream::Sink.fold(initial) { |accumulator, element| ... }`
|
|
520
|
+
- `FiberStream::Sink.foreach { |element| ... }`
|
|
261
521
|
- `FiberStream::Sink.io(io, close: false, flush: false)`
|
|
262
522
|
|
|
263
523
|
Pipelines:
|
|
@@ -282,7 +542,9 @@ bundle exec ruby examples/backpressure_buffer.rb
|
|
|
282
542
|
bundle exec ruby examples/background_execution.rb
|
|
283
543
|
bundle exec ruby examples/ractor_map_hashing.rb
|
|
284
544
|
bundle exec ruby examples/ractor_port_source.rb
|
|
545
|
+
bundle exec ruby examples/ractor_merge_ports_and_map.rb
|
|
285
546
|
bundle exec ruby examples/async_http_requests.rb
|
|
547
|
+
bundle exec ruby examples/async_http_streaming_body.rb
|
|
286
548
|
```
|
|
287
549
|
|
|
288
550
|
`examples/backpressure_buffer.rb` prints timestamped producer and consumer
|
|
@@ -294,14 +556,23 @@ with a shareable mapper proc and `input_transfer: :move`.
|
|
|
294
556
|
`examples/ractor_port_source.rb` demonstrates a producer Ractor that waits for
|
|
295
557
|
`RactorPort::Ack` before sending each `RactorPort::Element`.
|
|
296
558
|
|
|
559
|
+
`examples/ractor_merge_ports_and_map.rb` demonstrates CPU-bound producer
|
|
560
|
+
Ractors merged with `Source.ractor_merge_ports`, followed by CPU-bound
|
|
561
|
+
verification in `ractor_map` workers.
|
|
562
|
+
|
|
297
563
|
`examples/async_http_requests.rb` starts a local HTTP server and shows
|
|
298
564
|
FiberStream overlapping independent HTTP request waits with `parallel_map`.
|
|
299
565
|
|
|
566
|
+
`examples/async_http_streaming_body.rb` streams a public nginx access log with
|
|
567
|
+
`async-http`, feeds the response body through `Source.each(response.body)`, and
|
|
568
|
+
aggregates lines without storing the full body.
|
|
569
|
+
|
|
300
570
|
Benchmark scripts live under `benchmarks/`.
|
|
301
571
|
|
|
302
572
|
```sh
|
|
303
573
|
bundle exec ruby benchmarks/stream_transform.rb
|
|
304
574
|
bundle exec ruby benchmarks/latency_overlap.rb
|
|
575
|
+
bundle exec ruby benchmarks/async_io_fanout.rb
|
|
305
576
|
bundle exec ruby benchmarks/heavy_cpu_map.rb
|
|
306
577
|
```
|
|
307
578
|
|
data/examples/README.md
CHANGED
|
@@ -11,7 +11,9 @@ bundle exec ruby examples/backpressure_buffer.rb
|
|
|
11
11
|
bundle exec ruby examples/background_execution.rb
|
|
12
12
|
bundle exec ruby examples/ractor_map_hashing.rb
|
|
13
13
|
bundle exec ruby examples/ractor_port_source.rb
|
|
14
|
+
bundle exec ruby examples/ractor_merge_ports_and_map.rb
|
|
14
15
|
bundle exec ruby examples/async_http_requests.rb
|
|
16
|
+
bundle exec ruby examples/async_http_streaming_body.rb
|
|
15
17
|
```
|
|
16
18
|
|
|
17
19
|
`basic_pipeline.rb` uses only in-memory values and does not require an async
|
|
@@ -46,6 +48,14 @@ pipeline runs.
|
|
|
46
48
|
`RactorPort::Ack`, and sends one typed `RactorPort::Element` per downstream
|
|
47
49
|
demand.
|
|
48
50
|
|
|
51
|
+
`ractor_merge_ports_and_map.rb` runs CPU-bound work in multiple producer
|
|
52
|
+
Ractors, merges their port outputs with `Source.ractor_merge_ports`, then runs
|
|
53
|
+
another CPU-bound verification stage with `ractor_map`.
|
|
54
|
+
|
|
49
55
|
`async_http_requests.rb` starts a local HTTP server and compares serial
|
|
50
56
|
requests with FiberStream `parallel_map` requests. It keeps responses ordered
|
|
51
57
|
while overlapping independent network waits.
|
|
58
|
+
|
|
59
|
+
`async_http_streaming_body.rb` downloads a public nginx access log with
|
|
60
|
+
`async-http` and streams `response.body` through `Source.each`, `Flow.lines`,
|
|
61
|
+
and `Sink.foreach` so the full HTTP body is not buffered in memory.
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
|
|
4
|
+
|
|
5
|
+
require "async"
|
|
6
|
+
require "async/http/internet/instance"
|
|
7
|
+
require "fiber_stream"
|
|
8
|
+
|
|
9
|
+
DEFAULT_URL =
|
|
10
|
+
"https://raw.githubusercontent.com/elastic/examples/master/" \
|
|
11
|
+
"Common%20Data%20Formats/nginx_logs/nginx_logs"
|
|
12
|
+
|
|
13
|
+
URL = ENV.fetch("FIBER_STREAM_HTTP_LOG_URL", DEFAULT_URL)
|
|
14
|
+
PROGRESS_EVERY = Integer(ENV.fetch("FIBER_STREAM_HTTP_PROGRESS_EVERY", "10_000"))
|
|
15
|
+
|
|
16
|
+
LOG_LINE =
|
|
17
|
+
/
|
|
18
|
+
\A
|
|
19
|
+
(?<remote_addr>\S+)\s+\S+\s+\S+\s+
|
|
20
|
+
\[[^\]]+\]\s+
|
|
21
|
+
"[^"]+"\s+
|
|
22
|
+
(?<status>\d{3})\s+
|
|
23
|
+
(?<bytes>\d+|-)\s
|
|
24
|
+
/x
|
|
25
|
+
|
|
26
|
+
def monotonic_time
|
|
27
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def parse_access_log(line)
|
|
31
|
+
match = LOG_LINE.match(line)
|
|
32
|
+
return nil unless match
|
|
33
|
+
|
|
34
|
+
{
|
|
35
|
+
remote_addr: match[:remote_addr],
|
|
36
|
+
status: match[:status],
|
|
37
|
+
bytes: match[:bytes] == "-" ? 0 : match[:bytes].to_i
|
|
38
|
+
}
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def empty_stats
|
|
42
|
+
{
|
|
43
|
+
lines: 0,
|
|
44
|
+
parsed: 0,
|
|
45
|
+
payload_bytes: 0,
|
|
46
|
+
statuses: Hash.new(0),
|
|
47
|
+
remote_addrs: Hash.new(0),
|
|
48
|
+
started_at: monotonic_time
|
|
49
|
+
}
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def record_entry(stats, entry)
|
|
53
|
+
stats[:lines] += 1
|
|
54
|
+
|
|
55
|
+
if entry
|
|
56
|
+
stats[:parsed] += 1
|
|
57
|
+
stats[:payload_bytes] += entry.fetch(:bytes)
|
|
58
|
+
stats[:statuses][entry.fetch(:status)] += 1
|
|
59
|
+
stats[:remote_addrs][entry.fetch(:remote_addr)] += 1
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
if (stats[:lines] % PROGRESS_EVERY).zero?
|
|
63
|
+
elapsed = monotonic_time - stats.fetch(:started_at)
|
|
64
|
+
puts format(
|
|
65
|
+
"processed %<lines>d lines in %<elapsed>.2fs",
|
|
66
|
+
lines: stats.fetch(:lines),
|
|
67
|
+
elapsed: elapsed
|
|
68
|
+
)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
stats
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def print_summary(stats)
|
|
75
|
+
elapsed = monotonic_time - stats.fetch(:started_at)
|
|
76
|
+
mib = stats.fetch(:payload_bytes).fdiv(1024 * 1024)
|
|
77
|
+
|
|
78
|
+
puts
|
|
79
|
+
puts "Streaming HTTP body summary"
|
|
80
|
+
puts "URL: #{URL}"
|
|
81
|
+
puts format("lines parsed: %<parsed>d/%<lines>d", stats)
|
|
82
|
+
puts format("logged payload bytes: %<mib>.2f MiB", mib: mib)
|
|
83
|
+
puts format("unique remote addresses: %<count>d", count: stats.fetch(:remote_addrs).length)
|
|
84
|
+
puts format("elapsed: %<elapsed>.2fs", elapsed: elapsed)
|
|
85
|
+
|
|
86
|
+
puts
|
|
87
|
+
puts "HTTP status counts"
|
|
88
|
+
stats.fetch(:statuses).sort.each do |status, count|
|
|
89
|
+
puts format("- %<status>s: %<count>d", status: status, count: count)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
stats = empty_stats
|
|
94
|
+
|
|
95
|
+
processed =
|
|
96
|
+
Sync do
|
|
97
|
+
Async::HTTP::Internet.get(URL) do |response|
|
|
98
|
+
unless response.status == 200
|
|
99
|
+
raise "unexpected HTTP status #{response.status} for #{URL}"
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
FiberStream::Source.each(response.body)
|
|
103
|
+
.lines(max_length: 16 * 1024)
|
|
104
|
+
.map { |line| parse_access_log(line) }
|
|
105
|
+
.run_with(
|
|
106
|
+
FiberStream::Sink.foreach do |entry|
|
|
107
|
+
record_entry(stats, entry)
|
|
108
|
+
end
|
|
109
|
+
)
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
raise "processed count mismatch" unless processed == stats.fetch(:lines)
|
|
114
|
+
|
|
115
|
+
print_summary(stats)
|