fiber_stream 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE +19 -0
  4. data/README.md +361 -0
  5. data/examples/README.md +51 -0
  6. data/examples/async_http_requests.rb +132 -0
  7. data/examples/background_execution.rb +31 -0
  8. data/examples/backpressure_buffer.rb +66 -0
  9. data/examples/basic_pipeline.rb +28 -0
  10. data/examples/composable_pipeline.rb +43 -0
  11. data/examples/file_copy.rb +33 -0
  12. data/examples/line_processing.rb +20 -0
  13. data/examples/ractor_map_hashing.rb +43 -0
  14. data/examples/ractor_port_source.rb +45 -0
  15. data/lib/fiber_stream/errors.rb +44 -0
  16. data/lib/fiber_stream/flow.rb +190 -0
  17. data/lib/fiber_stream/pipeline.rb +49 -0
  18. data/lib/fiber_stream/pull/async_boundary.rb +85 -0
  19. data/lib/fiber_stream/pull/buffer_boundary.rb +123 -0
  20. data/lib/fiber_stream/pull/each.rb +31 -0
  21. data/lib/fiber_stream/pull/io_source.rb +89 -0
  22. data/lib/fiber_stream/pull/lines.rb +121 -0
  23. data/lib/fiber_stream/pull/map.rb +37 -0
  24. data/lib/fiber_stream/pull/parallel_map_boundary.rb +299 -0
  25. data/lib/fiber_stream/pull/ractor_map_boundary.rb +500 -0
  26. data/lib/fiber_stream/pull/ractor_port_source.rb +242 -0
  27. data/lib/fiber_stream/pull/select.rb +40 -0
  28. data/lib/fiber_stream/pull/take.rb +47 -0
  29. data/lib/fiber_stream/pull.rb +85 -0
  30. data/lib/fiber_stream/ractor_port.rb +17 -0
  31. data/lib/fiber_stream/running_pipeline.rb +156 -0
  32. data/lib/fiber_stream/sink.rb +176 -0
  33. data/lib/fiber_stream/source.rb +184 -0
  34. data/lib/fiber_stream/version.rb +5 -0
  35. data/lib/fiber_stream.rb +15 -0
  36. data/sig/fiber_stream.rbs +97 -0
  37. metadata +154 -0
@@ -0,0 +1,242 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Pull stream for `Source.ractor_port`.
6
+ #
7
+ # Downstream demand is converted into one `RactorPort::Ack` sent to the
8
+ # producer-owned acknowledgment port. Blocking Ractor waits are isolated in
9
+ # a coordinator thread so scheduler-managed fibers do not call Ractor wait
10
+ # APIs directly.
11
+ class RactorPortSource
12
+ WAIT_INTERVAL = 0.001
13
+
14
+ def initialize(port, ack_port, ack_transfer, cancel)
15
+ @port = port
16
+ @ack_port = ack_port
17
+ @ack_transfer = ack_transfer
18
+ @cancel_enabled = cancel
19
+ @demands = Thread::SizedQueue.new(1)
20
+ @results = Thread::SizedQueue.new(1)
21
+ @shutdown_port = nil
22
+ @coordinator = nil
23
+ @state_mutex = Mutex.new
24
+ @started = false
25
+ @closed = false
26
+ @done = false
27
+ @producer_terminal = false
28
+ @cancel_sent = false
29
+ end
30
+
31
+ def next
32
+ return DONE if closed_or_done?
33
+
34
+ start
35
+ request_next_message
36
+ result = @results.pop
37
+ return DONE if result.nil?
38
+
39
+ handle_result(result)
40
+ end
41
+
42
+ def close
43
+ cancel_error = nil
44
+
45
+ should_cancel =
46
+ @state_mutex.synchronize do
47
+ return if @closed
48
+
49
+ @closed = true
50
+ @done = true
51
+ @cancel_enabled && !@producer_terminal && !@cancel_sent
52
+ end
53
+
54
+ cancel_error = send_cancel if should_cancel
55
+ wake_coordinator
56
+ close_result_queue
57
+ wait_for_coordinator
58
+ raise cancel_error if cancel_error
59
+ end
60
+
61
+ private
62
+
63
+ def closed_or_done?
64
+ @state_mutex.synchronize { @closed || @done }
65
+ end
66
+
67
+ def start
68
+ @state_mutex.synchronize do
69
+ return if @started
70
+
71
+ @started = true
72
+ @shutdown_port = Ractor::Port.new
73
+ @coordinator = Thread.new { run_coordinator }
74
+ end
75
+ end
76
+
77
+ def request_next_message
78
+ @demands.push(:next)
79
+ rescue ClosedQueueError
80
+ nil
81
+ end
82
+
83
+ def handle_result(result)
84
+ tag = result.fetch(0)
85
+
86
+ case tag
87
+ when :message
88
+ handle_protocol_message(result.fetch(1))
89
+ when :error
90
+ mark_done
91
+ raise_error(result.fetch(1))
92
+ when :closed
93
+ DONE
94
+ end
95
+ end
96
+
97
+ def handle_protocol_message(message)
98
+ case message
99
+ in RactorPort::Element[value]
100
+ value
101
+ in RactorPort::Complete
102
+ mark_producer_terminal
103
+ DONE
104
+ in RactorPort::Failure[String => cause_class_name, String => cause_message]
105
+ mark_producer_terminal
106
+ raise RactorPortSourceError.new(
107
+ kind: :producer_failure,
108
+ cause_class_name: cause_class_name,
109
+ cause_message: cause_message
110
+ )
111
+ in RactorPort::Failure
112
+ raise_invalid_message(message, "Failure payloads must be Strings")
113
+ else
114
+ raise_invalid_message(message, "invalid RactorPort message")
115
+ end
116
+ end
117
+
118
+ def raise_invalid_message(message, cause_message)
119
+ mark_done
120
+ raise RactorPortSourceError.new(
121
+ kind: :invalid_message,
122
+ cause_class_name: message.class.name,
123
+ cause_message: cause_message
124
+ )
125
+ end
126
+
127
+ def mark_done
128
+ @state_mutex.synchronize { @done = true }
129
+ end
130
+
131
+ def mark_producer_terminal
132
+ @state_mutex.synchronize do
133
+ @done = true
134
+ @producer_terminal = true
135
+ end
136
+ end
137
+
138
+ def raise_error(error)
139
+ if error.is_a?(RactorPortSourceError) && error.original_cause
140
+ raise error, cause: error.original_cause
141
+ end
142
+
143
+ raise error
144
+ end
145
+
146
+ def run_coordinator
147
+ loop do
148
+ demand = @demands.pop
149
+ break unless demand
150
+ break if closed?
151
+
152
+ ack_error = send_ack
153
+ if ack_error
154
+ deliver_result([:error, ack_error])
155
+ break
156
+ end
157
+
158
+ selected, message = select_message
159
+ break if selected == @shutdown_port || closed?
160
+
161
+ deliver_result([:message, message])
162
+ end
163
+ rescue StandardError => error
164
+ deliver_result([:error, build_error(:receive, error)])
165
+ ensure
166
+ deliver_result([:closed]) if closed?
167
+ end
168
+
169
+ def select_message
170
+ Ractor.select(@port, @shutdown_port)
171
+ end
172
+
173
+ def send_ack
174
+ send_control(RactorPort::Ack.new)
175
+ nil
176
+ rescue StandardError => error
177
+ build_error(:ack_transfer, error)
178
+ end
179
+
180
+ def send_cancel
181
+ @state_mutex.synchronize { @cancel_sent = true }
182
+ send_control(RactorPort::Cancel.new(:closed))
183
+ nil
184
+ rescue StandardError => error
185
+ build_error(:cancel_transfer, error)
186
+ end
187
+
188
+ def send_control(message)
189
+ if @ack_transfer == :move
190
+ @ack_port.send(message, move: true)
191
+ else
192
+ @ack_port.send(message)
193
+ end
194
+ end
195
+
196
+ def build_error(kind, error)
197
+ RactorPortSourceError.new(
198
+ kind: kind,
199
+ cause_class_name: error.class.name,
200
+ cause_message: error.message,
201
+ cause: error
202
+ )
203
+ end
204
+
205
+ def closed?
206
+ @state_mutex.synchronize { @closed }
207
+ end
208
+
209
+ def wake_coordinator
210
+ close_demand_queue
211
+ return unless @shutdown_port
212
+
213
+ @shutdown_port.send(:shutdown)
214
+ rescue StandardError
215
+ nil
216
+ end
217
+
218
+ def wait_for_coordinator
219
+ return unless @coordinator
220
+
221
+ sleep WAIT_INTERVAL while @coordinator.alive?
222
+ @coordinator.join
223
+ end
224
+
225
+ def deliver_result(result)
226
+ return if @results.closed?
227
+
228
+ @results.push(result)
229
+ rescue ClosedQueueError, ThreadError
230
+ nil
231
+ end
232
+
233
+ def close_demand_queue
234
+ @demands.close
235
+ end
236
+
237
+ def close_result_queue
238
+ @results.close
239
+ end
240
+ end
241
+ end
242
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Filtering stage.
6
+ #
7
+ # A single downstream demand may pull multiple upstream elements until the
8
+ # predicate accepts a value or upstream completes. Rejected elements are
9
+ # discarded immediately and are not buffered.
10
+ class Select
11
+ def initialize(upstream, predicate)
12
+ @upstream = upstream
13
+ @predicate = predicate
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ loop do
22
+ value = @upstream.next
23
+ if Pull.done?(value)
24
+ @done = true
25
+ return DONE
26
+ end
27
+
28
+ return value if @predicate.call(value)
29
+ end
30
+ end
31
+
32
+ def close
33
+ return if @closed
34
+
35
+ @closed = true
36
+ @upstream.close
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Limiting stage.
6
+ #
7
+ # The stage closes upstream as soon as the limit is reached, including
8
+ # `take(0)` on first demand. This makes early completion visible to
9
+ # resource-owning sources and asynchronous boundaries.
10
+ class Take
11
+ def initialize(upstream, count)
12
+ @upstream = upstream
13
+ @remaining = count
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ if @remaining.zero?
22
+ @done = true
23
+ close
24
+ return DONE
25
+ end
26
+
27
+ value = @upstream.next
28
+ if Pull.done?(value)
29
+ @done = true
30
+ return DONE
31
+ end
32
+
33
+ @remaining -= 1
34
+ close if @remaining.zero?
35
+
36
+ value
37
+ end
38
+
39
+ def close
40
+ return if @closed
41
+
42
+ @closed = true
43
+ @upstream.close
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ # Internal pull-stream runtime.
5
+ #
6
+ # Public `Source`, `Flow`, and `Sink` objects are lazy definitions. When a
7
+ # source is materialized, those definitions attach together into private pull
8
+ # stream objects from this module. Every pull stream responds to `#next` and
9
+ # `#close`: `#next` returns either an element or the private `DONE` sentinel,
10
+ # while `#close` releases upstream resources and may raise cleanup failures.
11
+ # The sentinel must never escape through public APIs.
12
+ module Pull
13
+ DONE = Object.new.freeze
14
+
15
+ def self.done?(value)
16
+ value.equal?(DONE)
17
+ end
18
+
19
+ def self.each(enumerable)
20
+ Each.new(enumerable)
21
+ end
22
+
23
+ def self.io(io, chunk_size, close_io)
24
+ IOSource.new(io, chunk_size, close_io)
25
+ end
26
+
27
+ def self.ractor_port(port, ack_port, ack_transfer, cancel)
28
+ RactorPortSource.new(port, ack_port, ack_transfer, cancel)
29
+ end
30
+
31
+ def self.map(upstream, transform)
32
+ Map.new(upstream, transform)
33
+ end
34
+
35
+ def self.parallel_map(upstream, concurrency, transform)
36
+ ParallelMapBoundary.new(upstream, concurrency, transform)
37
+ end
38
+
39
+ def self.ractor_map(upstream, workers, input_transfer, output_transfer, transform)
40
+ RactorMapBoundary.new(upstream, workers, input_transfer, output_transfer, transform)
41
+ end
42
+
43
+ def self.select(upstream, predicate)
44
+ Select.new(upstream, predicate)
45
+ end
46
+
47
+ def self.take(upstream, count)
48
+ Take.new(upstream, count)
49
+ end
50
+
51
+ def self.async(upstream)
52
+ AsyncBoundary.new(upstream)
53
+ end
54
+
55
+ def self.buffer(upstream, count)
56
+ BufferBoundary.new(upstream, count)
57
+ end
58
+
59
+ def self.lines(upstream, chomp, max_length)
60
+ Lines.new(upstream, chomp, max_length)
61
+ end
62
+
63
+ private_constant :DONE
64
+ end
65
+ end
66
+
67
+ require_relative "pull/each"
68
+ require_relative "pull/io_source"
69
+ require_relative "pull/ractor_port_source"
70
+ require_relative "pull/map"
71
+ require_relative "pull/select"
72
+ require_relative "pull/take"
73
+ require_relative "pull/lines"
74
+ require_relative "pull/async_boundary"
75
+ require_relative "pull/buffer_boundary"
76
+ require_relative "pull/parallel_map_boundary"
77
+ require_relative "pull/ractor_map_boundary"
78
+
79
+ module FiberStream
80
+ module Pull
81
+ private_constant :Each, :IOSource, :RactorPortSource, :Map, :Select, :Take, :Lines,
82
+ :AsyncBoundary, :BufferBoundary, :ParallelMapBoundary,
83
+ :RactorMapBoundary
84
+ end
85
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ # Typed message envelopes for `Source.ractor_port`.
5
+ #
6
+ # Producers send `Element`, `Complete`, and `Failure` messages to the data
7
+ # port. FiberStream sends `Ack` and `Cancel` messages to the producer-owned
8
+ # acknowledgment port. The envelopes keep stream values distinct from control
9
+ # messages and support Ruby pattern matching.
10
+ module RactorPort
11
+ Element = ::Data.define(:value)
12
+ Complete = ::Data.define
13
+ Failure = ::Data.define(:cause_class_name, :cause_message)
14
+ Ack = ::Data.define
15
+ Cancel = ::Data.define(:reason)
16
+ end
17
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ class RunningPipeline
5
+ def initialize(scheduler, &run)
6
+ @scheduler = scheduler
7
+ @completion = nil
8
+ @waiters = []
9
+ @mutex = Mutex.new
10
+ @cancel_requested = false
11
+ @cancellation_error = nil
12
+ @fiber = Fiber.schedule { run_background(run) }
13
+ end
14
+
15
+ # Waits for the background pipeline to complete.
16
+ #
17
+ # On success, returns the sink materialized value. On stream failure,
18
+ # re-raises the original exception. If cancellation interrupts the
19
+ # background materialization, raises `PipelineCancelledError`. Waiting
20
+ # before completion requires a scheduler-backed non-blocking fiber; waiting
21
+ # after completion replays the stored result without requiring a scheduler.
22
+ def wait
23
+ message = nil
24
+ waiter = nil
25
+
26
+ @mutex.synchronize do
27
+ if @completion
28
+ message = @completion
29
+ else
30
+ validate_scheduler!("RunningPipeline#wait")
31
+ waiter = Thread::Queue.new
32
+ @waiters << waiter
33
+ end
34
+ end
35
+
36
+ message ||= waiter.pop
37
+ deliver(message)
38
+ end
39
+
40
+ # Requests cancellation of the background pipeline.
41
+ #
42
+ # Cancellation is cooperative and uses the scheduler captured when
43
+ # `Pipeline#run_async` started the background fiber. The method is
44
+ # idempotent. If the captured scheduler cannot interrupt fibers, this
45
+ # method raises `NotImplementedError` without recording a cancellation
46
+ # request.
47
+ def cancel
48
+ fiber = nil
49
+ cancellation_error = nil
50
+
51
+ @mutex.synchronize do
52
+ return self if @completion
53
+ return self if @cancel_requested
54
+
55
+ unless @scheduler.respond_to?(:fiber_interrupt)
56
+ raise NotImplementedError, "scheduler does not support fiber_interrupt"
57
+ end
58
+
59
+ cancellation_error = PipelineCancelledError.new("pipeline cancelled")
60
+ @cancellation_error = cancellation_error
61
+ @cancel_requested = true
62
+ fiber = @fiber
63
+ end
64
+
65
+ interrupt(fiber, cancellation_error)
66
+ self
67
+ end
68
+
69
+ # Returns true when the background run has completed with success, failure,
70
+ # or cancellation.
71
+ def done?
72
+ @mutex.synchronize { !@completion.nil? }
73
+ end
74
+
75
+ # Returns true after `cancel` successfully records a cancellation request.
76
+ def cancel_requested?
77
+ @mutex.synchronize { @cancel_requested }
78
+ end
79
+
80
+ private_class_method :new
81
+
82
+ private
83
+
84
+ def run_background(run)
85
+ complete([:value, run.call])
86
+ rescue Exception => error # rubocop:disable Lint/RescueException
87
+ complete(classify_error(error))
88
+ end
89
+
90
+ def classify_error(error)
91
+ if cancellation_error?(error)
92
+ [:cancelled, error]
93
+ else
94
+ [:error, error]
95
+ end
96
+ end
97
+
98
+ def cancellation_error?(error)
99
+ @mutex.synchronize { @cancellation_error.equal?(error) }
100
+ end
101
+
102
+ def complete(message)
103
+ waiters = []
104
+
105
+ @mutex.synchronize do
106
+ return if @completion
107
+
108
+ @completion = message
109
+ waiters = @waiters
110
+ @waiters = []
111
+ end
112
+
113
+ waiters.each { |waiter| waiter << message }
114
+ end
115
+
116
+ def deliver(message)
117
+ case message.fetch(0)
118
+ when :value
119
+ message.fetch(1)
120
+ when :error, :cancelled
121
+ raise message.fetch(1)
122
+ end
123
+ end
124
+
125
+ def interrupt(fiber, cancellation_error)
126
+ return unless fiber&.alive?
127
+
128
+ @scheduler.fiber_interrupt(fiber, cancellation_error)
129
+ rescue NotImplementedError, StandardError
130
+ clear_cancellation_request(cancellation_error)
131
+ raise
132
+ end
133
+
134
+ def clear_cancellation_request(cancellation_error)
135
+ @mutex.synchronize do
136
+ return unless @cancellation_error.equal?(cancellation_error)
137
+ return if @completion
138
+
139
+ @cancellation_error = nil
140
+ @cancel_requested = false
141
+ end
142
+ end
143
+
144
+ def validate_scheduler!(operation)
145
+ return if Fiber.scheduler && !Fiber.current.blocking?
146
+
147
+ message =
148
+ if Fiber.scheduler
149
+ "#{operation} requires a non-blocking fiber"
150
+ else
151
+ "#{operation} requires Fiber.scheduler"
152
+ end
153
+ raise SchedulerRequiredError, message
154
+ end
155
+ end
156
+ end