fiber_stream 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE +19 -0
  4. data/README.md +361 -0
  5. data/examples/README.md +51 -0
  6. data/examples/async_http_requests.rb +132 -0
  7. data/examples/background_execution.rb +31 -0
  8. data/examples/backpressure_buffer.rb +66 -0
  9. data/examples/basic_pipeline.rb +28 -0
  10. data/examples/composable_pipeline.rb +43 -0
  11. data/examples/file_copy.rb +33 -0
  12. data/examples/line_processing.rb +20 -0
  13. data/examples/ractor_map_hashing.rb +43 -0
  14. data/examples/ractor_port_source.rb +45 -0
  15. data/lib/fiber_stream/errors.rb +44 -0
  16. data/lib/fiber_stream/flow.rb +190 -0
  17. data/lib/fiber_stream/pipeline.rb +49 -0
  18. data/lib/fiber_stream/pull/async_boundary.rb +85 -0
  19. data/lib/fiber_stream/pull/buffer_boundary.rb +123 -0
  20. data/lib/fiber_stream/pull/each.rb +31 -0
  21. data/lib/fiber_stream/pull/io_source.rb +89 -0
  22. data/lib/fiber_stream/pull/lines.rb +121 -0
  23. data/lib/fiber_stream/pull/map.rb +37 -0
  24. data/lib/fiber_stream/pull/parallel_map_boundary.rb +299 -0
  25. data/lib/fiber_stream/pull/ractor_map_boundary.rb +500 -0
  26. data/lib/fiber_stream/pull/ractor_port_source.rb +242 -0
  27. data/lib/fiber_stream/pull/select.rb +40 -0
  28. data/lib/fiber_stream/pull/take.rb +47 -0
  29. data/lib/fiber_stream/pull.rb +85 -0
  30. data/lib/fiber_stream/ractor_port.rb +17 -0
  31. data/lib/fiber_stream/running_pipeline.rb +156 -0
  32. data/lib/fiber_stream/sink.rb +176 -0
  33. data/lib/fiber_stream/source.rb +184 -0
  34. data/lib/fiber_stream/version.rb +5 -0
  35. data/lib/fiber_stream.rb +15 -0
  36. data/sig/fiber_stream.rbs +97 -0
  37. metadata +154 -0
@@ -0,0 +1,123 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Bounded asynchronous prefetch boundary for `Flow.buffer(count)`.
6
+ #
7
+ # The producer task is scheduled lazily and pushes messages into a
8
+ # `Thread::SizedQueue`, so upstream can run ahead only up to the configured
9
+ # queue capacity plus in-flight producer/consumer work. Close is responsible
10
+ # for closing upstream and waking any producer blocked on a full queue.
11
+ class BufferBoundary
12
+ def initialize(upstream, count)
13
+ @upstream = upstream
14
+ @queue = Thread::SizedQueue.new(count)
15
+ @producer = nil
16
+ @started = false
17
+ @closed = false
18
+ @done = false
19
+ @upstream_closed = false
20
+ @upstream_close_error = nil
21
+ end
22
+
23
+ def next
24
+ return DONE if @closed || @done
25
+
26
+ start
27
+ message = @queue.pop
28
+ return complete if message.nil?
29
+
30
+ case message.fetch(0)
31
+ when :value
32
+ message.fetch(1)
33
+ when :done
34
+ complete
35
+ when :error
36
+ @done = true
37
+ raise message.fetch(1)
38
+ end
39
+ end
40
+
41
+ def close
42
+ return if @closed
43
+
44
+ @closed = true
45
+ @done = true
46
+ close_error = close_upstream
47
+ close_queue
48
+ close_error ||= @upstream_close_error
49
+ raise close_error if close_error
50
+ ensure
51
+ cancel_producer
52
+ end
53
+
54
+ private
55
+
56
+ def start
57
+ return if @started
58
+ raise SchedulerRequiredError, "Flow.buffer requires Fiber.scheduler" unless Fiber.scheduler
59
+
60
+ @started = true
61
+ @producer = Fiber.schedule { run_producer }
62
+ end
63
+
64
+ def run_producer
65
+ loop do
66
+ break if @closed
67
+
68
+ message = pull_message
69
+ break unless deliver(message)
70
+ break unless message.fetch(0) == :value
71
+ end
72
+ ensure
73
+ @upstream_close_error ||= close_upstream unless @upstream_closed
74
+ end
75
+
76
+ def pull_message
77
+ value = @upstream.next
78
+ return terminal_done_message if Pull.done?(value)
79
+
80
+ [:value, value]
81
+ rescue StandardError => error
82
+ close_upstream(record_error: false)
83
+ [:error, error]
84
+ end
85
+
86
+ def terminal_done_message
87
+ close_error = close_upstream
88
+ close_error ? [:error, close_error] : [:done]
89
+ end
90
+
91
+ def deliver(message)
92
+ @queue << message
93
+ true
94
+ rescue ClosedQueueError
95
+ false
96
+ end
97
+
98
+ def close_queue
99
+ @queue.close
100
+ end
101
+
102
+ def close_upstream(record_error: true)
103
+ return nil if @upstream_closed
104
+
105
+ @upstream_closed = true
106
+ @upstream.close
107
+ nil
108
+ rescue StandardError => error
109
+ @upstream_close_error ||= error if record_error
110
+ error
111
+ end
112
+
113
+ def complete
114
+ @done = true
115
+ DONE
116
+ end
117
+
118
+ def cancel_producer
119
+ nil
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Pull stream for `Source.each`.
6
+ #
7
+ # It owns only the per-materialization Enumerator created from the supplied
8
+ # enumerable. The original enumerable remains caller-owned and is never
9
+ # closed by FiberStream.
10
+ class Each
11
+ def initialize(enumerable)
12
+ @iterator = enumerable.to_enum(:each)
13
+ @closed = false
14
+ end
15
+
16
+ def next
17
+ return DONE if @closed
18
+
19
+ @iterator.next
20
+ rescue StopIteration
21
+ DONE
22
+ end
23
+
24
+ def close
25
+ return if @closed
26
+
27
+ @closed = true
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Pull stream for `Source.io`.
6
+ #
7
+ # Reads are demand-driven: every downstream `#next` performs at most one
8
+ # `readpartial` call. The stream owns IO close only when `close_io` is true,
9
+ # and preserves primary stream failures over cleanup close failures.
10
+ class IOSource
11
+ def initialize(io, chunk_size, close_io)
12
+ @io = io
13
+ @chunk_size = chunk_size
14
+ @close_io = close_io
15
+ @closed = false
16
+ @done = false
17
+ @io_closed = false
18
+ end
19
+
20
+ def next
21
+ return DONE if @closed || @done
22
+
23
+ validate_scheduler!
24
+
25
+ chunk = read_chunk
26
+ return DONE if Pull.done?(chunk)
27
+ return chunk if chunk.is_a?(String)
28
+
29
+ fail_with_primary(TypeError.new("readpartial must return a String"))
30
+ end
31
+
32
+ def close
33
+ return if @closed
34
+
35
+ @closed = true
36
+ @done = true
37
+ close_error = close_io
38
+ raise close_error if close_error
39
+ end
40
+
41
+ private
42
+
43
+ def validate_scheduler!
44
+ return if Fiber.scheduler && !Fiber.current.blocking?
45
+
46
+ message =
47
+ if Fiber.scheduler
48
+ "Source.io requires a non-blocking fiber"
49
+ else
50
+ "Source.io requires Fiber.scheduler"
51
+ end
52
+ fail_with_primary(SchedulerRequiredError.new(message))
53
+ end
54
+
55
+ def read_chunk
56
+ @io.readpartial(@chunk_size)
57
+ rescue EOFError
58
+ complete
59
+ rescue StandardError => error
60
+ fail_with_primary(error)
61
+ end
62
+
63
+ def complete
64
+ @done = true
65
+ close_error = close_io
66
+ raise close_error if close_error
67
+
68
+ DONE
69
+ end
70
+
71
+ def fail_with_primary(error)
72
+ @done = true
73
+ close_io
74
+ raise error
75
+ end
76
+
77
+ def close_io
78
+ return nil unless @close_io
79
+ return nil if @io_closed
80
+
81
+ @io_closed = true
82
+ @io.close
83
+ nil
84
+ rescue StandardError => error
85
+ error
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Line-framing stage for `Flow.lines`.
6
+ #
7
+ # The stage keeps an internal byte buffer because line boundaries can cross
8
+ # chunk boundaries. Length checks are per line/frame, not against the
9
+ # aggregate buffer, so already complete valid lines can be emitted before a
10
+ # later over-limit line fails.
11
+ class Lines
12
+ NEWLINE = "\n".b
13
+ CARRIAGE_RETURN = "\r".b
14
+
15
+ def initialize(upstream, chomp, max_length)
16
+ @upstream = upstream
17
+ @chomp = chomp
18
+ @max_length = max_length
19
+ @buffer = +"".b
20
+ @closed = false
21
+ @upstream_done = false
22
+ end
23
+
24
+ def next
25
+ return DONE if @closed
26
+
27
+ loop do
28
+ line = next_buffered_line
29
+ return line if line
30
+
31
+ validate_pending_frame_length!
32
+ return complete_from_buffer if @upstream_done
33
+
34
+ append_next_chunk
35
+ end
36
+ end
37
+
38
+ def close
39
+ return if @closed
40
+
41
+ @closed = true
42
+ @buffer.clear
43
+ @upstream.close
44
+ end
45
+
46
+ private
47
+
48
+ def next_buffered_line
49
+ newline_index = @buffer.index(NEWLINE)
50
+ return nil unless newline_index
51
+
52
+ frame = @buffer.slice!(0, newline_index + 1)
53
+ validate_frame_length!(frame)
54
+ format_frame(frame, terminated: true)
55
+ end
56
+
57
+ def complete_from_buffer
58
+ return DONE if @buffer.empty?
59
+
60
+ frame = @buffer
61
+ @buffer = +"".b
62
+ validate_frame_length!(frame)
63
+ format_frame(frame, terminated: false)
64
+ end
65
+
66
+ def append_next_chunk
67
+ chunk = @upstream.next
68
+ if Pull.done?(chunk)
69
+ @upstream_done = true
70
+ return
71
+ end
72
+
73
+ unless chunk.is_a?(String)
74
+ raise TypeError, "Flow.lines elements must be String"
75
+ end
76
+
77
+ @buffer << chunk.b
78
+ validate_pending_frame_length!
79
+ end
80
+
81
+ def validate_pending_frame_length!
82
+ return unless @max_length
83
+ return if @buffer.include?(NEWLINE)
84
+ return if @buffer.bytesize <= @max_length
85
+
86
+ fail_frame_too_long
87
+ end
88
+
89
+ def validate_frame_length!(frame)
90
+ return unless @max_length
91
+ return if frame.bytesize <= @max_length
92
+
93
+ fail_frame_too_long
94
+ end
95
+
96
+ def fail_frame_too_long
97
+ @closed = true
98
+ close_upstream
99
+ error = FrameTooLongError.new("frame exceeded max_length #{@max_length}")
100
+ raise error
101
+ end
102
+
103
+ def close_upstream
104
+ @upstream.close
105
+ nil
106
+ rescue StandardError => error
107
+ error
108
+ end
109
+
110
+ def format_frame(frame, terminated:)
111
+ return frame unless @chomp && terminated
112
+
113
+ frame = frame.byteslice(0, frame.bytesize - 1)
114
+ if frame.end_with?(CARRIAGE_RETURN)
115
+ frame = frame.byteslice(0, frame.bytesize - 1)
116
+ end
117
+ frame
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Stateless mapping stage.
6
+ #
7
+ # It pulls one upstream element for each downstream demand and applies the
8
+ # transform only to real elements, never to the `DONE` sentinel.
9
+ class Map
10
+ def initialize(upstream, transform)
11
+ @upstream = upstream
12
+ @transform = transform
13
+ @closed = false
14
+ @done = false
15
+ end
16
+
17
+ def next
18
+ return DONE if @closed || @done
19
+
20
+ value = @upstream.next
21
+ if Pull.done?(value)
22
+ @done = true
23
+ return DONE
24
+ end
25
+
26
+ @transform.call(value)
27
+ end
28
+
29
+ def close
30
+ return if @closed
31
+
32
+ @closed = true
33
+ @upstream.close
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,299 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Ordered scheduler-backed worker boundary for `Flow.parallel_map`.
6
+ #
7
+ # A single dispatcher pulls upstream and assigns sequence numbers while a
8
+ # bounded worker pool maps values. Downstream emits results in input order,
9
+ # so admission is permit-based to keep queued, running, and completed
10
+ # pulled-but-unemitted work bounded by the configured concurrency.
11
+ class ParallelMapBoundary
12
+ TERMINAL_RESULT_CAPACITY = 1
13
+ CancellationError = Class.new(StandardError)
14
+
15
+ def initialize(upstream, concurrency, transform)
16
+ @upstream = upstream
17
+ @concurrency = concurrency
18
+ @transform = transform
19
+ @permits = Thread::SizedQueue.new(concurrency)
20
+ @jobs = Thread::SizedQueue.new(concurrency)
21
+ @results = Thread::SizedQueue.new(concurrency + TERMINAL_RESULT_CAPACITY)
22
+ @workers = []
23
+ @dispatcher = nil
24
+ @pending = {}
25
+ @next_sequence = 0
26
+ @next_emit_sequence = 0
27
+ @failure_sequence = nil
28
+ @started = false
29
+ @closed = false
30
+ @done = false
31
+ @admission_closed = false
32
+ @upstream_closed = false
33
+ @upstream_close_error = nil
34
+
35
+ concurrency.times { @permits << true }
36
+ end
37
+
38
+ def next
39
+ return DONE if @closed || @done
40
+
41
+ start
42
+ next_message
43
+ end
44
+
45
+ def close
46
+ return if @closed
47
+
48
+ @closed = true
49
+ @done = true
50
+ close_error = close_upstream
51
+ close_internal_queues
52
+ close_error ||= @upstream_close_error
53
+ raise close_error if close_error
54
+ ensure
55
+ cancel_fibers
56
+ end
57
+
58
+ private
59
+
60
+ def start
61
+ return if @started
62
+
63
+ validate_scheduler!
64
+
65
+ @started = true
66
+ @concurrency.times do
67
+ @workers << Fiber.schedule { run_worker }
68
+ end
69
+ @dispatcher = Fiber.schedule { run_dispatcher }
70
+ end
71
+
72
+ def next_message
73
+ loop do
74
+ ready = @pending.delete(@next_emit_sequence)
75
+ if ready
76
+ drain_available_results
77
+ return emit(ready)
78
+ end
79
+
80
+ message = @results.pop
81
+ return complete if message.nil?
82
+
83
+ record_result(message)
84
+ end
85
+ end
86
+
87
+ def emit(message)
88
+ case message.fetch(0)
89
+ when :value
90
+ emit_value(message)
91
+ when :done
92
+ complete
93
+ when :error
94
+ fail_with_ordered_error(message)
95
+ end
96
+ end
97
+
98
+ def emit_value(message)
99
+ sequence = message.fetch(1)
100
+ value = message.fetch(2)
101
+ @next_emit_sequence = sequence + 1
102
+ return_permit unless @admission_closed
103
+ value
104
+ end
105
+
106
+ def fail_with_ordered_error(message)
107
+ sequence = message.fetch(1)
108
+ error = message.fetch(2)
109
+
110
+ if @failure_sequence && sequence > @failure_sequence
111
+ @next_emit_sequence = sequence + 1
112
+ return next_message
113
+ end
114
+
115
+ @done = true
116
+ close_result_queue
117
+ cancel_fibers
118
+ raise error
119
+ end
120
+
121
+ def complete
122
+ @done = true
123
+ close_result_queue
124
+ DONE
125
+ end
126
+
127
+ def run_dispatcher
128
+ loop do
129
+ break if @closed || @admission_closed
130
+ break unless take_permit
131
+
132
+ message = pull_job_message
133
+ if message.fetch(0) == :job
134
+ break unless deliver_job(message)
135
+ else
136
+ close_admission(close_upstream: false)
137
+ deliver_result(message)
138
+ break
139
+ end
140
+ end
141
+ rescue CancellationError
142
+ nil
143
+ ensure
144
+ close_upstream unless @upstream_closed || @closed
145
+ close_job_queue
146
+ end
147
+
148
+ def pull_job_message
149
+ value = @upstream.next
150
+ return terminal_done_message if Pull.done?(value)
151
+
152
+ sequence = @next_sequence
153
+ @next_sequence += 1
154
+ [:job, sequence, value]
155
+ rescue StandardError => error
156
+ close_upstream(record_error: false)
157
+ [:error, @next_sequence, error]
158
+ end
159
+
160
+ def terminal_done_message
161
+ close_error = close_upstream
162
+ close_error ? [:error, @next_sequence, close_error] : [:done, @next_sequence]
163
+ end
164
+
165
+ def run_worker
166
+ loop do
167
+ break if @closed
168
+
169
+ message = @jobs.pop
170
+ break if message.nil?
171
+
172
+ deliver_result(map_job(message))
173
+ end
174
+ rescue CancellationError
175
+ nil
176
+ end
177
+
178
+ def map_job(message)
179
+ sequence = message.fetch(1)
180
+ value = message.fetch(2)
181
+ [:value, sequence, @transform.call(value)]
182
+ rescue CancellationError
183
+ raise
184
+ rescue StandardError => error
185
+ [:error, sequence, error]
186
+ end
187
+
188
+ def record_result(message)
189
+ if message.fetch(0) == :error
190
+ sequence = message.fetch(1)
191
+ @failure_sequence = sequence if @failure_sequence.nil? || sequence < @failure_sequence
192
+ close_admission
193
+ end
194
+
195
+ @pending[message.fetch(1)] = message
196
+ end
197
+
198
+ def drain_available_results
199
+ loop do
200
+ message = @results.pop(true)
201
+ break if message.nil?
202
+
203
+ record_result(message)
204
+ rescue ThreadError
205
+ break
206
+ end
207
+ end
208
+
209
+ def close_admission(close_upstream: true)
210
+ return if @admission_closed
211
+
212
+ @admission_closed = true
213
+ close_upstream(record_error: false) if close_upstream
214
+ close_permit_queue
215
+ close_job_queue
216
+ end
217
+
218
+ def take_permit
219
+ @permits.pop
220
+ rescue ClosedQueueError
221
+ nil
222
+ end
223
+
224
+ def return_permit
225
+ @permits << true
226
+ rescue ClosedQueueError
227
+ nil
228
+ end
229
+
230
+ def deliver_job(message)
231
+ @jobs << message
232
+ true
233
+ rescue ClosedQueueError
234
+ false
235
+ end
236
+
237
+ def deliver_result(message)
238
+ @results << message
239
+ true
240
+ rescue ClosedQueueError
241
+ false
242
+ end
243
+
244
+ def close_internal_queues
245
+ close_permit_queue
246
+ close_job_queue
247
+ close_result_queue
248
+ end
249
+
250
+ def close_permit_queue
251
+ @permits.close
252
+ end
253
+
254
+ def close_job_queue
255
+ @jobs.close
256
+ end
257
+
258
+ def close_result_queue
259
+ @results.close
260
+ end
261
+
262
+ def close_upstream(record_error: true)
263
+ return nil if @upstream_closed
264
+
265
+ @upstream_closed = true
266
+ @upstream.close
267
+ nil
268
+ rescue StandardError => error
269
+ @upstream_close_error ||= error if record_error
270
+ error
271
+ end
272
+
273
+ def cancel_fibers
274
+ scheduler = Fiber.scheduler
275
+ return unless scheduler.respond_to?(:fiber_interrupt)
276
+
277
+ (@workers + [@dispatcher]).compact.each do |fiber|
278
+ next unless fiber.alive?
279
+
280
+ scheduler.fiber_interrupt(fiber, CancellationError.new)
281
+ rescue StandardError
282
+ nil
283
+ end
284
+ end
285
+
286
+ def validate_scheduler!
287
+ return if Fiber.scheduler && !Fiber.current.blocking?
288
+
289
+ message =
290
+ if Fiber.scheduler
291
+ "Flow.parallel_map requires a non-blocking fiber"
292
+ else
293
+ "Flow.parallel_map requires Fiber.scheduler"
294
+ end
295
+ raise SchedulerRequiredError, message
296
+ end
297
+ end
298
+ end
299
+ end