fiber_stream 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,349 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Setup adapter for high-level owned Ractor producer sources.
6
+ #
7
+ # Producer ractors and ports are created on first demand. Once every
8
+ # producer has returned its producer-owned ack port, this adapter delegates
9
+ # demand to the existing low-level Ractor port pull sources.
10
+ class RactorProducerSource
11
+ StartedProducer = Data.define(:side, :data_port, :setup_port, :ractor, :definition)
12
+ ReadyProducer = Data.define(:side, :data_port, :ack_port, :ractor)
13
+ PortPair = Data.define(:port, :ack_port, :producer_ractor)
14
+ SetupSuccess = Data.define(:producers)
15
+ SetupError = Data.define(:error)
16
+ SetupClosed = Data.define
17
+ private_constant :StartedProducer, :ReadyProducer, :PortPair, :SetupSuccess, :SetupError, :SetupClosed
18
+
19
+ def initialize(definitions, ack_transfer, merge)
20
+ @definitions = definitions
21
+ @ack_transfer = ack_transfer
22
+ @merge = merge
23
+ @setup_results = Thread::SizedQueue.new(1)
24
+ @state_mutex = Mutex.new
25
+ @started = false
26
+ @closed = false
27
+ @done = false
28
+ @shutdown_port = nil
29
+ @setup_thread = nil
30
+ @delegate = nil
31
+ @started_producers = []
32
+ @ready_producers = []
33
+ end
34
+
35
+ def next
36
+ return DONE if closed_or_done?
37
+
38
+ start
39
+ return DONE unless ensure_delegate
40
+
41
+ value = @delegate.next
42
+ mark_done if Pull.done?(value)
43
+ value
44
+ end
45
+
46
+ def close
47
+ already_closed = mark_closed
48
+ return if already_closed
49
+
50
+ wake_setup
51
+ wait_for_setup
52
+ close_setup_queue
53
+ close_error = close_delegate
54
+ cancel_ready_producers
55
+ wait_for_ractors(@ready_producers.map(&:ractor))
56
+ raise close_error if close_error
57
+ end
58
+
59
+ private
60
+
61
+ def closed_or_done?
62
+ @state_mutex.synchronize { @closed || @done }
63
+ end
64
+
65
+ def mark_done
66
+ @state_mutex.synchronize { @done = true }
67
+ end
68
+
69
+ def mark_closed
70
+ @state_mutex.synchronize do
71
+ already_closed = @closed
72
+ @closed = true
73
+ @done = true
74
+ already_closed
75
+ end
76
+ end
77
+
78
+ def start
79
+ @state_mutex.synchronize do
80
+ return if @started
81
+
82
+ @started = true
83
+ @shutdown_port = Ractor::Port.new
84
+ spawn_producers
85
+ @setup_thread = Thread.new { run_setup }
86
+ end
87
+ rescue Exception => error # rubocop:disable Lint/RescueException
88
+ setup_error = build_error(:producer_setup, error)
89
+ @setup_thread = Thread.new { cleanup_after_start_failure(setup_error) }
90
+ end
91
+
92
+ def cleanup_after_start_failure(setup_error)
93
+ setup_ports = @started_producers.map(&:setup_port)
94
+ ractors = @started_producers.map(&:ractor)
95
+ producer_by_setup_port = @started_producers.to_h { |producer| [producer.setup_port, producer] }
96
+
97
+ cleanup_remaining_setup(setup_ports, ractors, producer_by_setup_port)
98
+ deliver_setup(SetupError.new(error: setup_error))
99
+ end
100
+
101
+ def spawn_producers
102
+ @started_producers = []
103
+
104
+ @definitions.each_with_index do |definition, side|
105
+ data_port = Ractor::Port.new
106
+ setup_port = Ractor::Port.new
107
+ ractor = self.class.spawn_producer(data_port, setup_port, definition)
108
+ @started_producers << StartedProducer.new(side:, data_port:, setup_port:, ractor:, definition:)
109
+ end
110
+ end
111
+
112
+ def run_setup
113
+ remaining_setup_ports = @started_producers.map(&:setup_port)
114
+ remaining_ractors = @started_producers.map(&:ractor)
115
+ producer_by_setup_port = @started_producers.to_h { |producer| [producer.setup_port, producer] }
116
+
117
+ until remaining_setup_ports.empty?
118
+ selected, message = Ractor.select(@shutdown_port, *remaining_setup_ports, *remaining_ractors)
119
+ if selected == @shutdown_port
120
+ deliver_setup(SetupClosed.new)
121
+ cleanup_remaining_setup(remaining_setup_ports, remaining_ractors, producer_by_setup_port)
122
+ return
123
+ elsif producer_by_setup_port.key?(selected)
124
+ producer = producer_by_setup_port.fetch(selected)
125
+ validate_ack_port!(message)
126
+ @ready_producers << ReadyProducer.new(
127
+ side: producer.side,
128
+ data_port: producer.data_port,
129
+ ack_port: message,
130
+ ractor: producer.ractor
131
+ )
132
+ remaining_setup_ports.delete(selected)
133
+ else
134
+ raise "producer exited before setup completed"
135
+ end
136
+ end
137
+
138
+ deliver_setup(SetupSuccess.new(producers: @ready_producers.sort_by(&:side).freeze))
139
+ rescue Exception => error # rubocop:disable Lint/RescueException
140
+ setup_error = build_error(:producer_setup, error)
141
+ cancel_ready_producers
142
+ cleanup_remaining_setup(remaining_setup_ports, remaining_ractors, producer_by_setup_port)
143
+ deliver_setup(SetupError.new(error: setup_error))
144
+ end
145
+
146
+ def cleanup_remaining_setup(remaining_setup_ports, remaining_ractors, producer_by_setup_port)
147
+ producer_by_ractor = @started_producers.to_h { |producer| [producer.ractor, producer] }
148
+
149
+ until remaining_setup_ports.empty?
150
+ selected, message = Ractor.select(*remaining_setup_ports, *remaining_ractors)
151
+ if producer_by_setup_port.key?(selected)
152
+ producer = producer_by_setup_port.fetch(selected)
153
+ validate_ack_port!(message)
154
+ ready = ReadyProducer.new(
155
+ side: producer.side,
156
+ data_port: producer.data_port,
157
+ ack_port: message,
158
+ ractor: producer.ractor
159
+ )
160
+ @ready_producers << ready
161
+ send_cancel(ready)
162
+ remaining_setup_ports.delete(selected)
163
+ else
164
+ producer = producer_by_ractor.fetch(selected)
165
+ remaining_setup_ports.delete(producer.setup_port)
166
+ remaining_ractors.delete(selected)
167
+ end
168
+ end
169
+
170
+ wait_for_ractors(@started_producers.map(&:ractor))
171
+ rescue Exception # rubocop:disable Lint/RescueException
172
+ nil
173
+ end
174
+
175
+ def validate_ack_port!(ack_port)
176
+ return if ack_port.respond_to?(:send) && ack_port.method(:send).owner != Kernel
177
+
178
+ raise TypeError, "producer setup did not return a Ractor-style ack port"
179
+ end
180
+
181
+ def ensure_delegate
182
+ return true if delegate_installed?
183
+
184
+ case setup_result
185
+ in SetupSuccess[producers:]
186
+ delegate = build_delegate(producers)
187
+ should_close_delegate = false
188
+ @state_mutex.synchronize do
189
+ @delegate = delegate
190
+ should_close_delegate = @closed
191
+ @delegate = nil if should_close_delegate
192
+ end
193
+ if should_close_delegate
194
+ close_delegate_suppressing(delegate)
195
+ return false
196
+ end
197
+
198
+ true
199
+ in SetupError[error:]
200
+ mark_done
201
+ raise_error(error)
202
+ in SetupClosed
203
+ mark_done
204
+ false
205
+ end
206
+ end
207
+
208
+ def setup_result
209
+ result = @setup_results.pop
210
+ result || SetupClosed.new
211
+ rescue ClosedQueueError
212
+ SetupClosed.new
213
+ end
214
+
215
+ def build_delegate(producers)
216
+ if @merge
217
+ Pull.ractor_merge_ports(
218
+ producers.map do |producer|
219
+ PortPair.new(port: producer.data_port, ack_port: producer.ack_port, producer_ractor: producer.ractor)
220
+ end,
221
+ @ack_transfer,
222
+ true
223
+ )
224
+ else
225
+ producer = producers.fetch(0)
226
+ Pull.ractor_port(producer.data_port, producer.ack_port, @ack_transfer, true, producer.ractor)
227
+ end
228
+ end
229
+
230
+ def cancel_ready_producers
231
+ @ready_producers.each do |producer|
232
+ send_cancel(producer)
233
+ rescue Exception # rubocop:disable Lint/RescueException
234
+ nil
235
+ end
236
+ end
237
+
238
+ def send_cancel(producer)
239
+ send_control(producer.ack_port, RactorPort::Cancel.new(:closed))
240
+ end
241
+
242
+ def send_control(port, message)
243
+ if @ack_transfer == :move
244
+ port.send(message, move: true)
245
+ else
246
+ port.send(message)
247
+ end
248
+ end
249
+
250
+ def close_delegate
251
+ delegate = @state_mutex.synchronize { @delegate }
252
+ delegate&.close
253
+ nil
254
+ rescue StandardError => error
255
+ error
256
+ end
257
+
258
+ def close_delegate_suppressing(delegate)
259
+ delegate.close
260
+ rescue StandardError
261
+ nil
262
+ end
263
+
264
+ def delegate_installed?
265
+ @state_mutex.synchronize { !@delegate.nil? }
266
+ end
267
+
268
+ def wake_setup
269
+ @shutdown_port&.send(:shutdown)
270
+ rescue Exception # rubocop:disable Lint/RescueException
271
+ nil
272
+ end
273
+
274
+ def wait_for_setup
275
+ @setup_thread&.join
276
+ end
277
+
278
+ def wait_for_ractors(ractors)
279
+ return if ractors.empty?
280
+
281
+ Thread.new do
282
+ ractors.each do |ractor|
283
+ ractor.value
284
+ rescue Exception # rubocop:disable Lint/RescueException
285
+ nil
286
+ end
287
+ end.join
288
+ end
289
+
290
+ def close_setup_queue
291
+ @setup_results.close
292
+ end
293
+
294
+ def deliver_setup(result)
295
+ @setup_results.push(result)
296
+ rescue ClosedQueueError, ThreadError
297
+ nil
298
+ end
299
+
300
+ def build_error(kind, error)
301
+ RactorPortSourceError.new(
302
+ kind: kind,
303
+ cause_class_name: error.class.name,
304
+ cause_message: error.message,
305
+ cause: error
306
+ )
307
+ end
308
+
309
+ def raise_error(error)
310
+ if error.is_a?(RactorPortSourceError) && error.original_cause
311
+ raise error, cause: error.original_cause
312
+ end
313
+
314
+ raise error
315
+ end
316
+
317
+ class << self
318
+ def spawn_producer(data_port, setup_port, definition) # :nodoc:
319
+ Ractor.new(
320
+ data_port,
321
+ setup_port,
322
+ definition.block,
323
+ definition.transfer,
324
+ definition.args
325
+ ) do |outbox, setup, block, transfer, args|
326
+ ack_port = Ractor::Port.new
327
+ setup.send(ack_port)
328
+ producer = RactorProducer.new(outbox, ack_port, transfer)
329
+
330
+ begin
331
+ block.call(producer, *args)
332
+ producer.complete unless producer.terminal? || producer.cancelled?
333
+ rescue Exception => error # rubocop:disable Lint/RescueException
334
+ producer.fail(error) unless producer.terminal? || producer.cancelled?
335
+ end
336
+
337
+ if producer.send_failed?
338
+ ProducerSendFailed.new
339
+ elsif producer.cancelled?
340
+ ProducerCancelled.new
341
+ else
342
+ ProducerTerminal.new
343
+ end
344
+ end
345
+ end
346
+ end
347
+ end
348
+ end
349
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Complement filtering stage.
6
+ #
7
+ # A single downstream demand may pull multiple upstream elements until the
8
+ # predicate retains a value or upstream completes. Rejected elements are
9
+ # discarded immediately and are not buffered.
10
+ class Reject
11
+ def initialize(upstream, predicate)
12
+ @upstream = upstream
13
+ @predicate = predicate
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ loop do
22
+ value = @upstream.next
23
+ if Pull.done?(value)
24
+ @done = true
25
+ return DONE
26
+ end
27
+
28
+ return value unless @predicate.call(value)
29
+ end
30
+ end
31
+
32
+ def close
33
+ return if @closed
34
+
35
+ @closed = true
36
+ @upstream.close
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Running-accumulator stage.
6
+ #
7
+ # It pulls one immediate upstream value for each downstream demand, updates
8
+ # the accumulator with the reducer, and emits the updated accumulator.
9
+ class Scan
10
+ def initialize(upstream, initial, reducer)
11
+ @upstream = upstream
12
+ @accumulator = initial
13
+ @reducer = reducer
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ value = @upstream.next
22
+ if Pull.done?(value)
23
+ @done = true
24
+ return DONE
25
+ end
26
+
27
+ @accumulator = @reducer.call(@accumulator, value)
28
+ end
29
+
30
+ def close
31
+ return if @closed
32
+
33
+ @closed = true
34
+ @upstream.close
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Stateless observing stage.
6
+ #
7
+ # It pulls one upstream element for each downstream demand, calls the
8
+ # observer for real elements, and emits the original element unchanged.
9
+ class Tap
10
+ def initialize(upstream, observer)
11
+ @upstream = upstream
12
+ @observer = observer
13
+ @closed = false
14
+ @done = false
15
+ end
16
+
17
+ def next
18
+ return DONE if @closed || @done
19
+
20
+ value = @upstream.next
21
+ if Pull.done?(value)
22
+ @done = true
23
+ return DONE
24
+ end
25
+
26
+ @observer.call(value)
27
+ value
28
+ end
29
+
30
+ def close
31
+ return if @closed
32
+
33
+ @closed = true
34
+ @upstream.close
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Pull-driven rate-limiting stage.
6
+ #
7
+ # The stage pulls at most one upstream value, acquires one permit, and then
8
+ # emits that value unless the stage was closed while waiting.
9
+ class Throttle
10
+ def initialize(upstream, limiter)
11
+ @upstream = upstream
12
+ @limiter = limiter
13
+ @closed = false
14
+ @done = false
15
+ end
16
+
17
+ def next
18
+ return DONE if @closed || @done
19
+
20
+ value = @upstream.next
21
+ if Pull.done?(value)
22
+ @done = true
23
+ return DONE
24
+ end
25
+
26
+ @limiter.acquire(permits: 1)
27
+ if @closed
28
+ @done = true
29
+ return DONE
30
+ end
31
+
32
+ value
33
+ end
34
+
35
+ def close
36
+ return if @closed
37
+
38
+ @closed = true
39
+ @upstream.close
40
+ end
41
+ end
42
+ end
43
+ end
@@ -11,11 +11,40 @@ module FiberStream
11
11
  # The sentinel must never escape through public APIs.
12
12
  module Pull
13
13
  DONE = Object.new.freeze
14
+ ProducerTerminal = Data.define
15
+ ProducerCancelled = Data.define
16
+ ProducerSendFailed = Data.define
17
+ private_constant :ProducerTerminal, :ProducerCancelled, :ProducerSendFailed
14
18
 
15
19
  def self.done?(value)
16
20
  value.equal?(DONE)
17
21
  end
18
22
 
23
+ def self.each_value(stream)
24
+ loop do
25
+ value = stream.next
26
+ break if done?(value)
27
+
28
+ yield value
29
+ end
30
+ end
31
+
32
+ def self.ractor_producer_termination_error(result)
33
+ cause_message =
34
+ case result
35
+ in ProducerSendFailed
36
+ "producer exited after failing to send the ack-permitted message"
37
+ else
38
+ "producer exited before sending the ack-permitted message"
39
+ end
40
+
41
+ RactorPortSourceError.new(
42
+ kind: :producer_failure,
43
+ cause_class_name: "RuntimeError",
44
+ cause_message: cause_message
45
+ )
46
+ end
47
+
19
48
  def self.each(enumerable)
20
49
  Each.new(enumerable)
21
50
  end
@@ -24,14 +53,22 @@ module FiberStream
24
53
  IOSource.new(io, chunk_size, close_io)
25
54
  end
26
55
 
27
- def self.ractor_port(port, ack_port, ack_transfer, cancel)
28
- RactorPortSource.new(port, ack_port, ack_transfer, cancel)
56
+ def self.ractor_port(port, ack_port, ack_transfer, cancel, producer_ractor = nil)
57
+ RactorPortSource.new(port, ack_port, ack_transfer, cancel, producer_ractor)
29
58
  end
30
59
 
31
60
  def self.ractor_merge_ports(port_pairs, ack_transfer, cancel)
32
61
  RactorMergePortsSource.new(port_pairs, ack_transfer, cancel)
33
62
  end
34
63
 
64
+ def self.ractor_producer(definitions, ack_transfer)
65
+ RactorProducerSource.new(definitions, ack_transfer, false)
66
+ end
67
+
68
+ def self.ractor_merge_producers(definitions, ack_transfer)
69
+ RactorProducerSource.new(definitions, ack_transfer, true)
70
+ end
71
+
35
72
  def self.concat(left_materializer, right_materializer)
36
73
  Concat.new(left_materializer, right_materializer)
37
74
  end
@@ -48,10 +85,30 @@ module FiberStream
48
85
  Map.new(upstream, transform)
49
86
  end
50
87
 
88
+ def self.filter_map(upstream, transform)
89
+ FilterMap.new(upstream, transform)
90
+ end
91
+
92
+ def self.compact(upstream)
93
+ Compact.new(upstream)
94
+ end
95
+
96
+ def self.map_concat(upstream, transform)
97
+ MapConcat.new(upstream, transform)
98
+ end
99
+
100
+ def self.tap(upstream, observer)
101
+ Tap.new(upstream, observer)
102
+ end
103
+
51
104
  def self.parallel_map(upstream, concurrency, transform)
52
105
  ParallelMapBoundary.new(upstream, concurrency, transform)
53
106
  end
54
107
 
108
+ def self.parallel_unordered_map(upstream, concurrency, transform)
109
+ ParallelUnorderedMapBoundary.new(upstream, concurrency, transform)
110
+ end
111
+
55
112
  def self.ractor_map(upstream, workers, input_transfer, output_transfer, transform)
56
113
  RactorMapBoundary.new(upstream, workers, input_transfer, output_transfer, transform)
57
114
  end
@@ -60,6 +117,10 @@ module FiberStream
60
117
  Select.new(upstream, predicate)
61
118
  end
62
119
 
120
+ def self.reject(upstream, predicate)
121
+ Reject.new(upstream, predicate)
122
+ end
123
+
63
124
  def self.take(upstream, count)
64
125
  Take.new(upstream, count)
65
126
  end
@@ -72,6 +133,10 @@ module FiberStream
72
133
  Grouped.new(upstream, count)
73
134
  end
74
135
 
136
+ def self.scan(upstream, initial, reducer)
137
+ Scan.new(upstream, initial, reducer)
138
+ end
139
+
75
140
  def self.take_while(upstream, predicate)
76
141
  TakeWhile.new(upstream, predicate)
77
142
  end
@@ -88,6 +153,10 @@ module FiberStream
88
153
  BufferBoundary.new(upstream, count)
89
154
  end
90
155
 
156
+ def self.throttle(upstream, limiter)
157
+ Throttle.new(upstream, limiter)
158
+ end
159
+
91
160
  def self.lines(upstream, chomp, max_length)
92
161
  Lines.new(upstream, chomp, max_length)
93
162
  end
@@ -104,27 +173,37 @@ require_relative "pull/each"
104
173
  require_relative "pull/io_source"
105
174
  require_relative "pull/ractor_port_source"
106
175
  require_relative "pull/ractor_merge_ports_source"
176
+ require_relative "pull/ractor_producer_source"
107
177
  require_relative "pull/concat"
108
178
  require_relative "pull/zip"
109
179
  require_relative "pull/merge"
110
180
  require_relative "pull/map"
181
+ require_relative "pull/filter_map"
182
+ require_relative "pull/compact"
183
+ require_relative "pull/map_concat"
184
+ require_relative "pull/tap"
111
185
  require_relative "pull/select"
186
+ require_relative "pull/reject"
112
187
  require_relative "pull/take"
113
188
  require_relative "pull/drop"
114
189
  require_relative "pull/grouped"
190
+ require_relative "pull/scan"
115
191
  require_relative "pull/take_while"
116
192
  require_relative "pull/drop_while"
117
193
  require_relative "pull/lines"
118
194
  require_relative "pull/split"
119
195
  require_relative "pull/async_boundary"
120
196
  require_relative "pull/buffer_boundary"
197
+ require_relative "pull/throttle"
121
198
  require_relative "pull/parallel_map_boundary"
199
+ require_relative "pull/parallel_unordered_map_boundary"
122
200
  require_relative "pull/ractor_map_boundary"
123
201
 
124
202
  module FiberStream
125
203
  module Pull
126
- private_constant :Each, :IOSource, :RactorPortSource, :RactorMergePortsSource, :Concat, :Zip, :Merge, :Map,
127
- :Select, :Take, :Drop, :Grouped, :TakeWhile, :DropWhile, :Lines, :Split, :AsyncBoundary,
128
- :BufferBoundary, :ParallelMapBoundary, :RactorMapBoundary
204
+ private_constant :Each, :IOSource, :RactorPortSource, :RactorMergePortsSource, :RactorProducerSource, :Concat,
205
+ :Zip, :Merge, :Map, :FilterMap, :Compact, :MapConcat, :Tap, :Select, :Reject, :Take, :Drop,
206
+ :Grouped, :Scan, :TakeWhile, :DropWhile, :Lines, :Split, :AsyncBoundary, :BufferBoundary,
207
+ :Throttle, :ParallelMapBoundary, :ParallelUnorderedMapBoundary, :RactorMapBoundary
129
208
  end
130
209
  end