fiber_stream 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +25 -0
- data/LICENSE +19 -0
- data/README.md +361 -0
- data/examples/README.md +51 -0
- data/examples/async_http_requests.rb +132 -0
- data/examples/background_execution.rb +31 -0
- data/examples/backpressure_buffer.rb +66 -0
- data/examples/basic_pipeline.rb +28 -0
- data/examples/composable_pipeline.rb +43 -0
- data/examples/file_copy.rb +33 -0
- data/examples/line_processing.rb +20 -0
- data/examples/ractor_map_hashing.rb +43 -0
- data/examples/ractor_port_source.rb +45 -0
- data/lib/fiber_stream/errors.rb +44 -0
- data/lib/fiber_stream/flow.rb +190 -0
- data/lib/fiber_stream/pipeline.rb +49 -0
- data/lib/fiber_stream/pull/async_boundary.rb +85 -0
- data/lib/fiber_stream/pull/buffer_boundary.rb +123 -0
- data/lib/fiber_stream/pull/each.rb +31 -0
- data/lib/fiber_stream/pull/io_source.rb +89 -0
- data/lib/fiber_stream/pull/lines.rb +121 -0
- data/lib/fiber_stream/pull/map.rb +37 -0
- data/lib/fiber_stream/pull/parallel_map_boundary.rb +299 -0
- data/lib/fiber_stream/pull/ractor_map_boundary.rb +500 -0
- data/lib/fiber_stream/pull/ractor_port_source.rb +242 -0
- data/lib/fiber_stream/pull/select.rb +40 -0
- data/lib/fiber_stream/pull/take.rb +47 -0
- data/lib/fiber_stream/pull.rb +85 -0
- data/lib/fiber_stream/ractor_port.rb +17 -0
- data/lib/fiber_stream/running_pipeline.rb +156 -0
- data/lib/fiber_stream/sink.rb +176 -0
- data/lib/fiber_stream/source.rb +184 -0
- data/lib/fiber_stream/version.rb +5 -0
- data/lib/fiber_stream.rb +15 -0
- data/sig/fiber_stream.rbs +97 -0
- metadata +154 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: ab4449558105eb57805e40970a28971e32a37e748176df0875de75816bf53d2c
|
|
4
|
+
data.tar.gz: 6d1998ce477a4602a6f23d60817f2f49b9acc6efde64b18ee90ebd7084905bd5
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 4242c44baa5c1db6f7cb6c39ac217729603838dba31876180f4fcc2382e72308cad4d5197d8687b522317dfc9ad964bcd6f85b9449f580f88324098536b8906c
|
|
7
|
+
data.tar.gz: c2cc8091ec14b27eef4a60a82c1c22503118eee9da0a1a53e24a3e137058e1dd8c5e1536e89c5ba13c7f725cdc0ad241d48ade86cbe2162f6ac5ae2d9ce8f96f
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.1.0 - 2026-06-03
|
|
4
|
+
|
|
5
|
+
Initial release.
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Lazy linear `Source`, `Flow`, `Sink`, and `Pipeline` APIs.
|
|
10
|
+
- Pull-based backpressure with `Source.each`, `Flow.map`, `Flow.select`,
|
|
11
|
+
`Flow.take`, `Sink.to_a`, `Sink.first`, and `Sink.fold`.
|
|
12
|
+
- Scheduler-aware IO source and sink support.
|
|
13
|
+
- Line framing with `Flow.lines`.
|
|
14
|
+
- Scheduler-backed async and bounded buffer boundaries.
|
|
15
|
+
- Ordered `Flow.parallel_map` and `Flow.ractor_map`.
|
|
16
|
+
- Backpressure-aware `Source.ractor_port` with typed Ractor protocol envelopes.
|
|
17
|
+
- Background pipeline execution with cancellation support.
|
|
18
|
+
- Public RBS signatures.
|
|
19
|
+
|
|
20
|
+
### Known Limitations
|
|
21
|
+
|
|
22
|
+
- Only linear pipelines are supported.
|
|
23
|
+
- IO and scheduler-backed stages require the caller to provide a Ruby
|
|
24
|
+
`Fiber.scheduler`; FiberStream does not install one.
|
|
25
|
+
- Ractor APIs are experimental in Ruby and may change in future Ruby releases.
|
data/LICENSE
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright (c) 2026 Dai Akatsuka
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
in the Software without restriction, including without limitation the rights
|
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
|
11
|
+
all copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
19
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
# FiberStream
|
|
2
|
+
|
|
3
|
+
FiberStream is an early-stage Ruby library for linear, pull-based stream
|
|
4
|
+
processing with backpressure.
|
|
5
|
+
|
|
6
|
+
Build a lazy `Source`, transform it with `Flow` stages, and materialize it with
|
|
7
|
+
a `Sink`.
|
|
8
|
+
|
|
9
|
+
## Quick Start
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
require "fiber_stream"
|
|
13
|
+
|
|
14
|
+
result =
|
|
15
|
+
FiberStream::Source.each([1, 2, 3, 4])
|
|
16
|
+
.map { |number| number * 2 }
|
|
17
|
+
.select(&:even?)
|
|
18
|
+
.take(2)
|
|
19
|
+
.run_with(FiberStream::Sink.to_a)
|
|
20
|
+
|
|
21
|
+
result # => [2, 4]
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Status
|
|
25
|
+
|
|
26
|
+
FiberStream currently supports linear pipelines only.
|
|
27
|
+
|
|
28
|
+
Implemented capabilities:
|
|
29
|
+
|
|
30
|
+
- in-memory, IO, and backpressure-aware Ractor port sources
|
|
31
|
+
- mapping, filtering, limiting, line splitting, buffering, async boundaries,
|
|
32
|
+
ordered parallel mapping, and ordered Ractor-backed mapping
|
|
33
|
+
- array, first-element, fold, and IO sinks
|
|
34
|
+
- reusable flow composition and runnable pipelines
|
|
35
|
+
- foreground and scheduler-backed background pipeline execution
|
|
36
|
+
- public RBS signatures
|
|
37
|
+
|
|
38
|
+
Not yet implemented:
|
|
39
|
+
|
|
40
|
+
- graph DSLs
|
|
41
|
+
|
|
42
|
+
## Core Concepts
|
|
43
|
+
|
|
44
|
+
### Sources
|
|
45
|
+
|
|
46
|
+
A `Source` is a lazy stream definition. It is not consumed until the source is
|
|
47
|
+
run with a sink.
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
source = FiberStream::Source.each([1, 2, 3])
|
|
51
|
+
|
|
52
|
+
source.run_with(FiberStream::Sink.to_a) # => [1, 2, 3]
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
IO sources read chunks on demand and require a scheduler-backed non-blocking
|
|
56
|
+
fiber:
|
|
57
|
+
|
|
58
|
+
```ruby
|
|
59
|
+
require "async"
|
|
60
|
+
require "fiber_stream"
|
|
61
|
+
|
|
62
|
+
chunks =
|
|
63
|
+
Async do
|
|
64
|
+
File.open("input.txt", "rb") do |file|
|
|
65
|
+
FiberStream::Source.io(file)
|
|
66
|
+
.run_with(FiberStream::Sink.to_a)
|
|
67
|
+
end
|
|
68
|
+
end.wait
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Ractor port sources connect a producer Ractor with an explicit ack handshake.
|
|
72
|
+
The producer creates its acknowledgment port, waits for `RactorPort::Ack`, and
|
|
73
|
+
then sends one typed message back to the FiberStream data port:
|
|
74
|
+
|
|
75
|
+
```ruby
|
|
76
|
+
data_port = Ractor::Port.new
|
|
77
|
+
setup_port = Ractor::Port.new
|
|
78
|
+
|
|
79
|
+
producer =
|
|
80
|
+
Ractor.new(data_port, setup_port) do |outbox, setup|
|
|
81
|
+
ack_port = Ractor::Port.new
|
|
82
|
+
setup.send(ack_port)
|
|
83
|
+
|
|
84
|
+
values = [1, 2, 3].to_enum
|
|
85
|
+
|
|
86
|
+
loop do
|
|
87
|
+
case ack_port.receive
|
|
88
|
+
in FiberStream::RactorPort::Ack
|
|
89
|
+
begin
|
|
90
|
+
outbox.send(FiberStream::RactorPort::Element.new(values.next))
|
|
91
|
+
rescue StopIteration
|
|
92
|
+
outbox.send(FiberStream::RactorPort::Complete.new)
|
|
93
|
+
break
|
|
94
|
+
end
|
|
95
|
+
in FiberStream::RactorPort::Cancel
|
|
96
|
+
break
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
ack_port = setup_port.receive
|
|
102
|
+
|
|
103
|
+
FiberStream::Source.ractor_port(data_port, ack_port: ack_port)
|
|
104
|
+
.run_with(FiberStream::Sink.to_a)
|
|
105
|
+
# => [1, 2, 3]
|
|
106
|
+
|
|
107
|
+
producer.value
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Flows
|
|
111
|
+
|
|
112
|
+
Flows transform a stream lazily. Convenience methods on `Source` delegate to
|
|
113
|
+
the matching `FiberStream::Flow` constructors.
|
|
114
|
+
|
|
115
|
+
```ruby
|
|
116
|
+
result =
|
|
117
|
+
FiberStream::Source.each(["a\nb", "\nc"])
|
|
118
|
+
.lines
|
|
119
|
+
.select { |line| line != "b" }
|
|
120
|
+
.map(&:upcase)
|
|
121
|
+
.run_with(FiberStream::Sink.to_a)
|
|
122
|
+
|
|
123
|
+
result # => ["A", "C"]
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Reusable flows can be composed with `Flow#via`:
|
|
127
|
+
|
|
128
|
+
```ruby
|
|
129
|
+
normalize =
|
|
130
|
+
FiberStream::Flow.map(&:strip)
|
|
131
|
+
.via(FiberStream::Flow.select { |line| !line.empty? })
|
|
132
|
+
|
|
133
|
+
FiberStream::Source.each([" a ", "", " b "])
|
|
134
|
+
.via(normalize)
|
|
135
|
+
.run_with(FiberStream::Sink.to_a)
|
|
136
|
+
# => ["a", "b"]
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Sinks
|
|
140
|
+
|
|
141
|
+
A `Sink` consumes the stream and returns a materialized value.
|
|
142
|
+
|
|
143
|
+
```ruby
|
|
144
|
+
FiberStream::Source.each([1, 2, 3])
|
|
145
|
+
.run_with(FiberStream::Sink.fold(0) { |sum, value| sum + value })
|
|
146
|
+
# => 6
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Pipelines
|
|
150
|
+
|
|
151
|
+
`Source#to(sink)` creates a reusable runnable pipeline.
|
|
152
|
+
|
|
153
|
+
```ruby
|
|
154
|
+
pipeline =
|
|
155
|
+
FiberStream::Source.each([1, 2, 3])
|
|
156
|
+
.map { |number| number * 2 }
|
|
157
|
+
.to(FiberStream::Sink.to_a)
|
|
158
|
+
|
|
159
|
+
pipeline.run # => [2, 4, 6]
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
`Pipeline#run_async` starts a pipeline in a scheduler-backed background fiber
|
|
163
|
+
and returns a handle:
|
|
164
|
+
|
|
165
|
+
```ruby
|
|
166
|
+
require "async"
|
|
167
|
+
require "fiber_stream"
|
|
168
|
+
|
|
169
|
+
result =
|
|
170
|
+
Async do
|
|
171
|
+
running =
|
|
172
|
+
FiberStream::Source.each([1, 2, 3])
|
|
173
|
+
.map { |number| number * 2 }
|
|
174
|
+
.to(FiberStream::Sink.to_a)
|
|
175
|
+
.run_async
|
|
176
|
+
|
|
177
|
+
# Foreground scheduler-managed work can continue here.
|
|
178
|
+
|
|
179
|
+
running.wait
|
|
180
|
+
end.wait
|
|
181
|
+
|
|
182
|
+
result # => [2, 4, 6]
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
The handle supports `wait`, `cancel`, `done?`, and `cancel_requested?`.
|
|
186
|
+
|
|
187
|
+
## Backpressure
|
|
188
|
+
|
|
189
|
+
The initial runtime is pull-based. A sink asks for one element, each flow pulls
|
|
190
|
+
only what it needs from upstream, and the source advances only when downstream
|
|
191
|
+
demands a value.
|
|
192
|
+
|
|
193
|
+
`Sink.first` demonstrates sink-side early completion:
|
|
194
|
+
|
|
195
|
+
```ruby
|
|
196
|
+
first =
|
|
197
|
+
FiberStream::Source.each([1, 2, 3])
|
|
198
|
+
.run_with(FiberStream::Sink.first)
|
|
199
|
+
|
|
200
|
+
first # => 1
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
`Flow.take` demonstrates flow-side early completion and closes upstream after
|
|
204
|
+
the requested number of elements:
|
|
205
|
+
|
|
206
|
+
```ruby
|
|
207
|
+
limited =
|
|
208
|
+
FiberStream::Source.each([1, 2, 3])
|
|
209
|
+
.take(2)
|
|
210
|
+
.run_with(FiberStream::Sink.to_a)
|
|
211
|
+
|
|
212
|
+
limited # => [1, 2]
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
`Flow.buffer(count)` allows bounded prefetch. `Flow.async`, `Flow.buffer`,
|
|
216
|
+
`Flow.parallel_map`, `Source.io`, `Sink.io`, and `Pipeline#run_async` require an
|
|
217
|
+
installed `Fiber.scheduler` and a non-blocking current fiber when demanded or
|
|
218
|
+
started. FiberStream does not install a scheduler and does not depend on Async
|
|
219
|
+
at runtime.
|
|
220
|
+
|
|
221
|
+
## API Surface
|
|
222
|
+
|
|
223
|
+
Sources:
|
|
224
|
+
|
|
225
|
+
- `FiberStream::Source.each(enumerable)`
|
|
226
|
+
- `FiberStream::Source.io(io, chunk_size: 16 * 1024, close: false)`
|
|
227
|
+
- `FiberStream::Source.ractor_port(port, ack_port:, ack_transfer: :copy, cancel: true)`
|
|
228
|
+
|
|
229
|
+
Source convenience methods:
|
|
230
|
+
|
|
231
|
+
- `Source#via(flow)`
|
|
232
|
+
- `Source#map { |element| ... }`
|
|
233
|
+
- `Source#parallel_map(concurrency:) { |element| ... }`
|
|
234
|
+
- `Source#ractor_map(workers:, input_transfer: :copy, output_transfer: :copy) { |element| ... }`
|
|
235
|
+
- `Source#select { |element| ... }`
|
|
236
|
+
- `Source#take(count)`
|
|
237
|
+
- `Source#async`
|
|
238
|
+
- `Source#buffer(count)`
|
|
239
|
+
- `Source#lines(chomp: true, max_length: nil)`
|
|
240
|
+
- `Source#to(sink)`
|
|
241
|
+
- `Source#run_with(sink)`
|
|
242
|
+
|
|
243
|
+
Flows:
|
|
244
|
+
|
|
245
|
+
- `FiberStream::Flow.map { |element| ... }`
|
|
246
|
+
- `FiberStream::Flow.parallel_map(concurrency:) { |element| ... }`
|
|
247
|
+
- `FiberStream::Flow.ractor_map(workers:, input_transfer: :copy, output_transfer: :copy) { |element| ... }`
|
|
248
|
+
- `FiberStream::Flow.select { |element| ... }`
|
|
249
|
+
- `FiberStream::Flow.take(count)`
|
|
250
|
+
- `FiberStream::Flow.async`
|
|
251
|
+
- `FiberStream::Flow.buffer(count)`
|
|
252
|
+
- `FiberStream::Flow.lines(chomp: true, max_length: nil)`
|
|
253
|
+
- `Flow#via(flow)`
|
|
254
|
+
- `Flow#to(sink)`
|
|
255
|
+
|
|
256
|
+
Sinks:
|
|
257
|
+
|
|
258
|
+
- `FiberStream::Sink.to_a`
|
|
259
|
+
- `FiberStream::Sink.first`
|
|
260
|
+
- `FiberStream::Sink.fold(initial) { |accumulator, element| ... }`
|
|
261
|
+
- `FiberStream::Sink.io(io, close: false, flush: false)`
|
|
262
|
+
|
|
263
|
+
Pipelines:
|
|
264
|
+
|
|
265
|
+
- `FiberStream::Pipeline#run`
|
|
266
|
+
- `FiberStream::Pipeline#run_async`
|
|
267
|
+
- `FiberStream::RunningPipeline#wait`
|
|
268
|
+
- `FiberStream::RunningPipeline#cancel`
|
|
269
|
+
- `FiberStream::RunningPipeline#done?`
|
|
270
|
+
- `FiberStream::RunningPipeline#cancel_requested?`
|
|
271
|
+
|
|
272
|
+
## Examples
|
|
273
|
+
|
|
274
|
+
Runnable examples live under `examples/`.
|
|
275
|
+
|
|
276
|
+
```sh
|
|
277
|
+
bundle exec ruby examples/basic_pipeline.rb
|
|
278
|
+
bundle exec ruby examples/composable_pipeline.rb
|
|
279
|
+
bundle exec ruby examples/line_processing.rb
|
|
280
|
+
bundle exec ruby examples/file_copy.rb
|
|
281
|
+
bundle exec ruby examples/backpressure_buffer.rb
|
|
282
|
+
bundle exec ruby examples/background_execution.rb
|
|
283
|
+
bundle exec ruby examples/ractor_map_hashing.rb
|
|
284
|
+
bundle exec ruby examples/ractor_port_source.rb
|
|
285
|
+
bundle exec ruby examples/async_http_requests.rb
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
`examples/backpressure_buffer.rb` prints timestamped producer and consumer
|
|
289
|
+
events so the difference between direct demand and bounded prefetch is visible.
|
|
290
|
+
|
|
291
|
+
`examples/ractor_map_hashing.rb` demonstrates ordered Ractor-backed hashing
|
|
292
|
+
with a shareable mapper proc and `input_transfer: :move`.
|
|
293
|
+
|
|
294
|
+
`examples/ractor_port_source.rb` demonstrates a producer Ractor that waits for
|
|
295
|
+
`RactorPort::Ack` before sending each `RactorPort::Element`.
|
|
296
|
+
|
|
297
|
+
`examples/async_http_requests.rb` starts a local HTTP server and shows
|
|
298
|
+
FiberStream overlapping independent HTTP request waits with `parallel_map`.
|
|
299
|
+
|
|
300
|
+
Benchmark scripts live under `benchmarks/`.
|
|
301
|
+
|
|
302
|
+
```sh
|
|
303
|
+
bundle exec ruby benchmarks/stream_transform.rb
|
|
304
|
+
bundle exec ruby benchmarks/latency_overlap.rb
|
|
305
|
+
bundle exec ruby benchmarks/heavy_cpu_map.rb
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
## Development
|
|
309
|
+
|
|
310
|
+
This project targets Ruby 4.x. The repository currently pins Ruby 4.0.3 in
|
|
311
|
+
`mise.toml`.
|
|
312
|
+
|
|
313
|
+
Install dependencies:
|
|
314
|
+
|
|
315
|
+
```sh
|
|
316
|
+
bundle install
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
Run the test suite:
|
|
320
|
+
|
|
321
|
+
```sh
|
|
322
|
+
bundle exec rake test
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
Run RBS validation:
|
|
326
|
+
|
|
327
|
+
```sh
|
|
328
|
+
bundle exec rbs validate
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
Run RuboCop:
|
|
332
|
+
|
|
333
|
+
```sh
|
|
334
|
+
bundle exec rubocop
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
Run all default checks:
|
|
338
|
+
|
|
339
|
+
```sh
|
|
340
|
+
bundle exec rake
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
Build the gem:
|
|
344
|
+
|
|
345
|
+
```sh
|
|
346
|
+
bundle exec gem build fiber_stream.gemspec
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
Release uses RubyGems Trusted Publishing from the `Release` GitHub Actions
|
|
350
|
+
workflow. Configure a pending trusted publisher for the `fiber_stream` gem with
|
|
351
|
+
workflow filename `release.yml` and environment `release`, then publish by
|
|
352
|
+
pushing a version tag such as `v0.1.0`.
|
|
353
|
+
|
|
354
|
+
## Documentation
|
|
355
|
+
|
|
356
|
+
Design and planning documents live under `docs/`:
|
|
357
|
+
|
|
358
|
+
- `docs/product-specs/`
|
|
359
|
+
- `docs/design-docs/`
|
|
360
|
+
- `docs/exec-plans/`
|
|
361
|
+
- `docs/references/`
|
data/examples/README.md
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# FiberStream Examples
|
|
2
|
+
|
|
3
|
+
Run examples from the repository root with Bundler:
|
|
4
|
+
|
|
5
|
+
```sh
|
|
6
|
+
bundle exec ruby examples/basic_pipeline.rb
|
|
7
|
+
bundle exec ruby examples/composable_pipeline.rb
|
|
8
|
+
bundle exec ruby examples/line_processing.rb
|
|
9
|
+
bundle exec ruby examples/file_copy.rb
|
|
10
|
+
bundle exec ruby examples/backpressure_buffer.rb
|
|
11
|
+
bundle exec ruby examples/background_execution.rb
|
|
12
|
+
bundle exec ruby examples/ractor_map_hashing.rb
|
|
13
|
+
bundle exec ruby examples/ractor_port_source.rb
|
|
14
|
+
bundle exec ruby examples/async_http_requests.rb
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
`basic_pipeline.rb` uses only in-memory values and does not require an async
|
|
18
|
+
scheduler.
|
|
19
|
+
|
|
20
|
+
`composable_pipeline.rb` demonstrates reusable flow pipelines, sink
|
|
21
|
+
composition, and runnable `Source#to(...).run` pipelines.
|
|
22
|
+
|
|
23
|
+
`line_processing.rb` demonstrates `Source#lines` over arbitrary String chunks.
|
|
24
|
+
|
|
25
|
+
`file_copy.rb` uses Ruby core `File` objects with `Source.io` and `Sink.io`.
|
|
26
|
+
IO examples require a scheduler-backed non-blocking fiber, so they run inside
|
|
27
|
+
an `Async do ... end.wait` block provided by the `async` gem.
|
|
28
|
+
|
|
29
|
+
`backpressure_buffer.rb` prints timestamped producer and consumer events. The
|
|
30
|
+
unbuffered run stays demand-driven, while the buffered run allows bounded
|
|
31
|
+
prefetch. The `produced_ahead` counter includes queued values plus in-flight
|
|
32
|
+
producer and consumer work, so it can be larger than the configured queue size
|
|
33
|
+
without becoming unbounded.
|
|
34
|
+
|
|
35
|
+
`background_execution.rb` starts a runnable pipeline with `Pipeline#run_async`
|
|
36
|
+
and uses the returned handle to wait for the background materialized value while
|
|
37
|
+
the foreground fiber keeps doing scheduler-managed work.
|
|
38
|
+
|
|
39
|
+
`ractor_map_hashing.rb` hashes independent payloads in Ractor workers. It uses
|
|
40
|
+
`Ractor.shareable_proc`, preserves input order, and opts into
|
|
41
|
+
`input_transfer: :move` because the input records are not reused after the
|
|
42
|
+
pipeline runs.
|
|
43
|
+
|
|
44
|
+
`ractor_port_source.rb` demonstrates a producer Ractor connected to
|
|
45
|
+
`Source.ractor_port`. The producer creates its acknowledgment port, waits for
|
|
46
|
+
`RactorPort::Ack`, and sends one typed `RactorPort::Element` per downstream
|
|
47
|
+
demand.
|
|
48
|
+
|
|
49
|
+
`async_http_requests.rb` starts a local HTTP server and compares serial
|
|
50
|
+
requests with FiberStream `parallel_map` requests. It keeps responses ordered
|
|
51
|
+
while overlapping independent network waits.
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
|
|
4
|
+
|
|
5
|
+
require "async"
|
|
6
|
+
require "fiber_stream"
|
|
7
|
+
require "socket"
|
|
8
|
+
|
|
9
|
+
ROUTES = {
|
|
10
|
+
"/profile" => [0.18, "profile loaded"],
|
|
11
|
+
"/orders" => [0.12, "orders loaded"],
|
|
12
|
+
"/recommendations" => [0.16, "recommendations loaded"]
|
|
13
|
+
}.freeze
|
|
14
|
+
|
|
15
|
+
Endpoint = Struct.new(:path, :label, keyword_init: true)
|
|
16
|
+
|
|
17
|
+
def monotonic_time
|
|
18
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def run_server(server)
|
|
22
|
+
loop do
|
|
23
|
+
socket = server.accept
|
|
24
|
+
Async { handle_connection(socket) }
|
|
25
|
+
rescue IOError, Errno::EBADF
|
|
26
|
+
break
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def handle_connection(socket)
|
|
31
|
+
request = socket.readpartial(1024)
|
|
32
|
+
path = request[/GET\s+(\S+)/, 1] || "/"
|
|
33
|
+
delay, body = ROUTES.fetch(path, [0.02, "not found"])
|
|
34
|
+
status = ROUTES.key?(path) ? "200 OK" : "404 Not Found"
|
|
35
|
+
|
|
36
|
+
sleep delay
|
|
37
|
+
|
|
38
|
+
socket.write(
|
|
39
|
+
"HTTP/1.1 #{status}\r\n" \
|
|
40
|
+
"Content-Type: text/plain\r\n" \
|
|
41
|
+
"Content-Length: #{body.bytesize}\r\n" \
|
|
42
|
+
"Connection: close\r\n\r\n" \
|
|
43
|
+
"#{body}"
|
|
44
|
+
)
|
|
45
|
+
ensure
|
|
46
|
+
socket&.close
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def http_get(host, port, endpoint)
|
|
50
|
+
response = +""
|
|
51
|
+
started_at = monotonic_time
|
|
52
|
+
socket = TCPSocket.new(host, port)
|
|
53
|
+
|
|
54
|
+
socket.write(
|
|
55
|
+
"GET #{endpoint.path} HTTP/1.1\r\n" \
|
|
56
|
+
"Host: #{host}:#{port}\r\n" \
|
|
57
|
+
"Connection: close\r\n\r\n"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
loop do
|
|
61
|
+
response << socket.readpartial(1024)
|
|
62
|
+
rescue EOFError
|
|
63
|
+
break
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
headers, body = response.split("\r\n\r\n", 2)
|
|
67
|
+
status = headers.lines.first.split.fetch(1)
|
|
68
|
+
elapsed = monotonic_time - started_at
|
|
69
|
+
|
|
70
|
+
{
|
|
71
|
+
label: endpoint.label,
|
|
72
|
+
status: status,
|
|
73
|
+
body: body,
|
|
74
|
+
elapsed: elapsed
|
|
75
|
+
}
|
|
76
|
+
ensure
|
|
77
|
+
socket&.close
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def measure(label)
|
|
81
|
+
started_at = monotonic_time
|
|
82
|
+
result = yield
|
|
83
|
+
elapsed = monotonic_time - started_at
|
|
84
|
+
[label, elapsed, result]
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def print_run(label, elapsed, responses)
|
|
88
|
+
puts "#{label}: #{format('%.3f', elapsed)}s"
|
|
89
|
+
responses.each do |response|
|
|
90
|
+
puts format(
|
|
91
|
+
"- %-15<label>s status=%<status>s request=%<elapsed>.3fs body=%<body>s",
|
|
92
|
+
label: response.fetch(:label),
|
|
93
|
+
status: response.fetch(:status),
|
|
94
|
+
elapsed: response.fetch(:elapsed),
|
|
95
|
+
body: response.fetch(:body)
|
|
96
|
+
)
|
|
97
|
+
end
|
|
98
|
+
puts
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
endpoints = [
|
|
102
|
+
Endpoint.new(path: "/profile", label: "profile"),
|
|
103
|
+
Endpoint.new(path: "/orders", label: "orders"),
|
|
104
|
+
Endpoint.new(path: "/recommendations", label: "recommendations")
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
Sync do
|
|
108
|
+
server = TCPServer.new("127.0.0.1", 0)
|
|
109
|
+
host = "127.0.0.1"
|
|
110
|
+
port = server.addr.fetch(1)
|
|
111
|
+
server_task = Async { run_server(server) }
|
|
112
|
+
|
|
113
|
+
begin
|
|
114
|
+
serial = measure("Serial HTTP requests") do
|
|
115
|
+
endpoints.map { |endpoint| http_get(host, port, endpoint) }
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
parallel = measure("FiberStream parallel HTTP requests") do
|
|
119
|
+
FiberStream::Source.each(endpoints)
|
|
120
|
+
.parallel_map(concurrency: endpoints.length) { |endpoint| http_get(host, port, endpoint) }
|
|
121
|
+
.run_with(FiberStream::Sink.to_a)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
print_run(*serial)
|
|
125
|
+
print_run(*parallel)
|
|
126
|
+
|
|
127
|
+
puts "Serial waits add up. FiberStream starts all requests together and keeps the responses ordered."
|
|
128
|
+
ensure
|
|
129
|
+
server.close
|
|
130
|
+
server_task.stop
|
|
131
|
+
end
|
|
132
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
|
|
4
|
+
|
|
5
|
+
require "async"
|
|
6
|
+
require "fiber_stream"
|
|
7
|
+
|
|
8
|
+
jobs = [
|
|
9
|
+
{ id: "import", delay: 0.12 },
|
|
10
|
+
{ id: "validate", delay: 0.05 },
|
|
11
|
+
{ id: "publish", delay: 0.08 }
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
Async do
|
|
15
|
+
running =
|
|
16
|
+
FiberStream::Source.each(jobs)
|
|
17
|
+
.parallel_map(concurrency: 2) do |job|
|
|
18
|
+
sleep job.fetch(:delay)
|
|
19
|
+
"#{job.fetch(:id)} complete"
|
|
20
|
+
end
|
|
21
|
+
.to(FiberStream::Sink.to_a)
|
|
22
|
+
.run_async
|
|
23
|
+
|
|
24
|
+
3.times do |tick|
|
|
25
|
+
sleep 0.03
|
|
26
|
+
puts "foreground tick #{tick + 1}"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
puts "Background result"
|
|
30
|
+
running.wait.each { |line| puts "- #{line}" }
|
|
31
|
+
end.wait
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
|
|
4
|
+
|
|
5
|
+
require "async"
|
|
6
|
+
require "fiber_stream"
|
|
7
|
+
|
|
8
|
+
ITEM_COUNT = 8
|
|
9
|
+
BUFFER_SIZE = 3
|
|
10
|
+
PRODUCER_DELAY = 0.05
|
|
11
|
+
CONSUMER_DELAY = 0.20
|
|
12
|
+
|
|
13
|
+
def run_pipeline(label, buffer_size: nil)
|
|
14
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
15
|
+
produced = 0
|
|
16
|
+
consumed = 0
|
|
17
|
+
max_produced_ahead = 0
|
|
18
|
+
|
|
19
|
+
log = lambda do |event, item|
|
|
20
|
+
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
|
|
21
|
+
produced_ahead = produced - consumed
|
|
22
|
+
max_produced_ahead = [max_produced_ahead, produced_ahead].max
|
|
23
|
+
|
|
24
|
+
puts format(
|
|
25
|
+
"%<elapsed>6.2fs %-10<event>s item=%<item>2d produced_ahead=%<ahead>d",
|
|
26
|
+
elapsed: elapsed,
|
|
27
|
+
event: event,
|
|
28
|
+
item: item,
|
|
29
|
+
ahead: produced_ahead
|
|
30
|
+
)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
source =
|
|
34
|
+
FiberStream::Source.each(1..ITEM_COUNT)
|
|
35
|
+
.map do |item|
|
|
36
|
+
produced += 1
|
|
37
|
+
log.call("produce", item)
|
|
38
|
+
sleep PRODUCER_DELAY
|
|
39
|
+
item
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
source = source.buffer(buffer_size) if buffer_size
|
|
43
|
+
|
|
44
|
+
result =
|
|
45
|
+
source.run_with(
|
|
46
|
+
FiberStream::Sink.fold([]) do |items, item|
|
|
47
|
+
log.call("consume", item)
|
|
48
|
+
sleep CONSUMER_DELAY
|
|
49
|
+
consumed += 1
|
|
50
|
+
items << item
|
|
51
|
+
end
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
puts "#{label}: result=#{result.inspect}"
|
|
55
|
+
puts "#{label}: max produced ahead=#{max_produced_ahead}"
|
|
56
|
+
puts
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
Async do
|
|
60
|
+
puts "Unbuffered: downstream demand gates upstream one item at a time."
|
|
61
|
+
run_pipeline("unbuffered")
|
|
62
|
+
|
|
63
|
+
puts "buffer(#{BUFFER_SIZE}): upstream can prefetch, but backpressure keeps it bounded."
|
|
64
|
+
puts "produced_ahead includes queued values plus producer/consumer in-flight work."
|
|
65
|
+
run_pipeline("buffered", buffer_size: BUFFER_SIZE)
|
|
66
|
+
end.wait
|