fiber_stream 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/README.md +167 -43
- data/examples/README.md +11 -0
- data/examples/ractor_merge_ports_and_map.rb +116 -0
- data/examples/ractor_producer_sources.rb +43 -0
- data/lib/fiber_stream/errors.rb +4 -1
- data/lib/fiber_stream/flow.rb +75 -16
- data/lib/fiber_stream/internal/ractor_transfer_policy.rb +17 -0
- data/lib/fiber_stream/pipeline.rb +5 -1
- data/lib/fiber_stream/pull/async_boundary.rb +28 -11
- data/lib/fiber_stream/pull/buffer_boundary.rb +28 -10
- data/lib/fiber_stream/pull/concat.rb +9 -1
- data/lib/fiber_stream/pull/grouped.rb +46 -0
- data/lib/fiber_stream/pull/merge.rb +230 -0
- data/lib/fiber_stream/pull/parallel_map_boundary.rb +28 -24
- data/lib/fiber_stream/pull/parallel_unordered_map_boundary.rb +311 -0
- data/lib/fiber_stream/pull/ractor_map_boundary.rb +112 -89
- data/lib/fiber_stream/pull/ractor_merge_ports_source.rb +373 -0
- data/lib/fiber_stream/pull/ractor_port_source.rb +53 -20
- data/lib/fiber_stream/pull/ractor_producer_source.rb +349 -0
- data/lib/fiber_stream/pull/scan.rb +38 -0
- data/lib/fiber_stream/pull/split.rb +134 -0
- data/lib/fiber_stream/pull.rb +74 -5
- data/lib/fiber_stream/ractor_port.rb +3 -1
- data/lib/fiber_stream/ractor_producer.rb +167 -0
- data/lib/fiber_stream/running_pipeline.rb +22 -8
- data/lib/fiber_stream/sink.rb +9 -19
- data/lib/fiber_stream/source.rb +177 -19
- data/lib/fiber_stream/version.rb +1 -1
- data/lib/fiber_stream.rb +2 -0
- data/sig/fiber_stream.rbs +25 -1
- metadata +14 -3
|
@@ -11,6 +11,11 @@ module FiberStream
|
|
|
11
11
|
class ParallelMapBoundary
|
|
12
12
|
TERMINAL_RESULT_CAPACITY = 1
|
|
13
13
|
CancellationError = Class.new(StandardError)
|
|
14
|
+
JobMessage = Data.define(:sequence, :value)
|
|
15
|
+
ValueMessage = Data.define(:sequence, :value)
|
|
16
|
+
DoneMessage = Data.define(:sequence)
|
|
17
|
+
ErrorMessage = Data.define(:sequence, :error)
|
|
18
|
+
private_constant :JobMessage, :ValueMessage, :DoneMessage, :ErrorMessage
|
|
14
19
|
|
|
15
20
|
def initialize(upstream, concurrency, transform)
|
|
16
21
|
@upstream = upstream
|
|
@@ -85,28 +90,23 @@ module FiberStream
|
|
|
85
90
|
end
|
|
86
91
|
|
|
87
92
|
def emit(message)
|
|
88
|
-
case message
|
|
89
|
-
|
|
90
|
-
emit_value(
|
|
91
|
-
|
|
93
|
+
case message
|
|
94
|
+
in ValueMessage[sequence:, value:]
|
|
95
|
+
emit_value(sequence, value)
|
|
96
|
+
in DoneMessage
|
|
92
97
|
complete
|
|
93
|
-
|
|
94
|
-
fail_with_ordered_error(
|
|
98
|
+
in ErrorMessage[sequence:, error:]
|
|
99
|
+
fail_with_ordered_error(sequence, error)
|
|
95
100
|
end
|
|
96
101
|
end
|
|
97
102
|
|
|
98
|
-
def emit_value(
|
|
99
|
-
sequence = message.fetch(1)
|
|
100
|
-
value = message.fetch(2)
|
|
103
|
+
def emit_value(sequence, value)
|
|
101
104
|
@next_emit_sequence = sequence + 1
|
|
102
105
|
return_permit unless @admission_closed
|
|
103
106
|
value
|
|
104
107
|
end
|
|
105
108
|
|
|
106
|
-
def fail_with_ordered_error(
|
|
107
|
-
sequence = message.fetch(1)
|
|
108
|
-
error = message.fetch(2)
|
|
109
|
-
|
|
109
|
+
def fail_with_ordered_error(sequence, error)
|
|
110
110
|
if @failure_sequence && sequence > @failure_sequence
|
|
111
111
|
@next_emit_sequence = sequence + 1
|
|
112
112
|
return next_message
|
|
@@ -130,7 +130,7 @@ module FiberStream
|
|
|
130
130
|
break unless take_permit
|
|
131
131
|
|
|
132
132
|
message = pull_job_message
|
|
133
|
-
if message.
|
|
133
|
+
if message.is_a?(JobMessage)
|
|
134
134
|
break unless deliver_job(message)
|
|
135
135
|
else
|
|
136
136
|
close_admission(close_upstream: false)
|
|
@@ -151,15 +151,19 @@ module FiberStream
|
|
|
151
151
|
|
|
152
152
|
sequence = @next_sequence
|
|
153
153
|
@next_sequence += 1
|
|
154
|
-
|
|
154
|
+
JobMessage.new(sequence:, value:)
|
|
155
155
|
rescue StandardError => error
|
|
156
156
|
close_upstream(record_error: false)
|
|
157
|
-
|
|
157
|
+
ErrorMessage.new(sequence: @next_sequence, error:)
|
|
158
158
|
end
|
|
159
159
|
|
|
160
160
|
def terminal_done_message
|
|
161
161
|
close_error = close_upstream
|
|
162
|
-
|
|
162
|
+
if close_error
|
|
163
|
+
ErrorMessage.new(sequence: @next_sequence, error: close_error)
|
|
164
|
+
else
|
|
165
|
+
DoneMessage.new(sequence: @next_sequence)
|
|
166
|
+
end
|
|
163
167
|
end
|
|
164
168
|
|
|
165
169
|
def run_worker
|
|
@@ -176,23 +180,23 @@ module FiberStream
|
|
|
176
180
|
end
|
|
177
181
|
|
|
178
182
|
def map_job(message)
|
|
179
|
-
sequence = message.
|
|
180
|
-
value = message.
|
|
181
|
-
|
|
183
|
+
sequence = message.sequence
|
|
184
|
+
value = message.value
|
|
185
|
+
ValueMessage.new(sequence:, value: @transform.call(value))
|
|
182
186
|
rescue CancellationError
|
|
183
187
|
raise
|
|
184
188
|
rescue StandardError => error
|
|
185
|
-
|
|
189
|
+
ErrorMessage.new(sequence:, error:)
|
|
186
190
|
end
|
|
187
191
|
|
|
188
192
|
def record_result(message)
|
|
189
|
-
if message.
|
|
190
|
-
sequence = message.
|
|
193
|
+
if message.is_a?(ErrorMessage)
|
|
194
|
+
sequence = message.sequence
|
|
191
195
|
@failure_sequence = sequence if @failure_sequence.nil? || sequence < @failure_sequence
|
|
192
196
|
close_admission
|
|
193
197
|
end
|
|
194
198
|
|
|
195
|
-
@pending[message.
|
|
199
|
+
@pending[message.sequence] = message
|
|
196
200
|
end
|
|
197
201
|
|
|
198
202
|
def drain_available_results
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FiberStream
|
|
4
|
+
module Pull
|
|
5
|
+
# Unordered scheduler-backed worker boundary for
|
|
6
|
+
# `Flow.parallel_unordered_map`.
|
|
7
|
+
#
|
|
8
|
+
# A single dispatcher pulls upstream and a bounded worker pool maps values.
|
|
9
|
+
# Downstream emits worker results in completion order. Admission is
|
|
10
|
+
# permit-based to keep queued, running, and completed pulled-but-unemitted
|
|
11
|
+
# work bounded by the configured concurrency.
|
|
12
|
+
class ParallelUnorderedMapBoundary
|
|
13
|
+
TERMINAL_RESULT_CAPACITY = 1
|
|
14
|
+
CancellationError = Class.new(StandardError)
|
|
15
|
+
JobMessage = Data.define(:sequence, :value)
|
|
16
|
+
ValueMessage = Data.define(:sequence, :value)
|
|
17
|
+
DoneMessage = Data.define
|
|
18
|
+
ErrorMessage = Data.define(:sequence, :error)
|
|
19
|
+
CloseErrorMessage = Data.define(:error)
|
|
20
|
+
private_constant :JobMessage, :ValueMessage, :DoneMessage, :ErrorMessage, :CloseErrorMessage
|
|
21
|
+
|
|
22
|
+
def initialize(upstream, concurrency, transform)
|
|
23
|
+
@upstream = upstream
|
|
24
|
+
@concurrency = concurrency
|
|
25
|
+
@transform = transform
|
|
26
|
+
@permits = Thread::SizedQueue.new(concurrency)
|
|
27
|
+
@jobs = Thread::SizedQueue.new(concurrency)
|
|
28
|
+
@results = Thread::SizedQueue.new(concurrency + TERMINAL_RESULT_CAPACITY)
|
|
29
|
+
@workers = []
|
|
30
|
+
@dispatcher = nil
|
|
31
|
+
@next_sequence = 0
|
|
32
|
+
@outstanding_jobs = 0
|
|
33
|
+
@terminal_message = nil
|
|
34
|
+
@started = false
|
|
35
|
+
@closed = false
|
|
36
|
+
@done = false
|
|
37
|
+
@admission_closed = false
|
|
38
|
+
@upstream_closing = false
|
|
39
|
+
@upstream_closed = false
|
|
40
|
+
@upstream_close_error = nil
|
|
41
|
+
@upstream_close_done = Thread::SizedQueue.new(1)
|
|
42
|
+
|
|
43
|
+
concurrency.times { @permits << true }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def next
|
|
47
|
+
return DONE if @closed || @done
|
|
48
|
+
|
|
49
|
+
start
|
|
50
|
+
next_message
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def close
|
|
54
|
+
return if @closed
|
|
55
|
+
|
|
56
|
+
@closed = true
|
|
57
|
+
@done = true
|
|
58
|
+
close_error = close_upstream
|
|
59
|
+
close_internal_queues
|
|
60
|
+
close_error ||= @upstream_close_error
|
|
61
|
+
raise close_error if close_error
|
|
62
|
+
ensure
|
|
63
|
+
cancel_fibers
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
def start
|
|
69
|
+
return if @started
|
|
70
|
+
|
|
71
|
+
validate_scheduler!
|
|
72
|
+
|
|
73
|
+
@started = true
|
|
74
|
+
@concurrency.times do
|
|
75
|
+
@workers << Fiber.schedule { run_worker }
|
|
76
|
+
end
|
|
77
|
+
@dispatcher = Fiber.schedule { run_dispatcher }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def next_message
|
|
81
|
+
loop do
|
|
82
|
+
return emit_terminal(@terminal_message) if terminal_ready?
|
|
83
|
+
|
|
84
|
+
message = @results.pop
|
|
85
|
+
return complete if message.nil?
|
|
86
|
+
|
|
87
|
+
case message
|
|
88
|
+
in ValueMessage[sequence:, value:]
|
|
89
|
+
return emit_value(sequence, value)
|
|
90
|
+
in DoneMessage | CloseErrorMessage
|
|
91
|
+
@terminal_message = message
|
|
92
|
+
in ErrorMessage[sequence:, error:]
|
|
93
|
+
fail_with_error(sequence, error)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def emit_value(_sequence, value)
|
|
99
|
+
@outstanding_jobs -= 1
|
|
100
|
+
return_permit unless @admission_closed
|
|
101
|
+
value
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def terminal_ready?
|
|
105
|
+
@terminal_message && @outstanding_jobs.zero?
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def emit_terminal(message)
|
|
109
|
+
case message
|
|
110
|
+
in DoneMessage
|
|
111
|
+
complete
|
|
112
|
+
in CloseErrorMessage[error:]
|
|
113
|
+
fail_with_error(@next_sequence, error, close_admission: false)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def fail_with_error(_sequence, error, close_admission: true)
|
|
118
|
+
@done = true
|
|
119
|
+
close_admission() if close_admission
|
|
120
|
+
close_result_queue
|
|
121
|
+
cancel_fibers
|
|
122
|
+
raise error
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def complete
|
|
126
|
+
@done = true
|
|
127
|
+
close_result_queue
|
|
128
|
+
DONE
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def run_dispatcher
|
|
132
|
+
loop do
|
|
133
|
+
break if @closed || @admission_closed
|
|
134
|
+
break unless take_permit
|
|
135
|
+
|
|
136
|
+
message = pull_job_message
|
|
137
|
+
if message.is_a?(JobMessage)
|
|
138
|
+
break unless deliver_job(message)
|
|
139
|
+
else
|
|
140
|
+
close_admission(close_upstream: false)
|
|
141
|
+
deliver_result(message)
|
|
142
|
+
break
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
rescue CancellationError
|
|
146
|
+
nil
|
|
147
|
+
ensure
|
|
148
|
+
close_upstream unless @upstream_closed || @closed
|
|
149
|
+
close_job_queue
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def pull_job_message
|
|
153
|
+
value = @upstream.next
|
|
154
|
+
return terminal_done_message if Pull.done?(value)
|
|
155
|
+
|
|
156
|
+
sequence = @next_sequence
|
|
157
|
+
@next_sequence += 1
|
|
158
|
+
@outstanding_jobs += 1
|
|
159
|
+
JobMessage.new(sequence:, value:)
|
|
160
|
+
rescue StandardError => error
|
|
161
|
+
close_upstream(record_error: false)
|
|
162
|
+
ErrorMessage.new(sequence: @next_sequence, error:)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def terminal_done_message
|
|
166
|
+
close_error = close_upstream
|
|
167
|
+
if close_error
|
|
168
|
+
CloseErrorMessage.new(error: close_error)
|
|
169
|
+
else
|
|
170
|
+
DoneMessage.new
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def run_worker
|
|
175
|
+
loop do
|
|
176
|
+
break if @closed
|
|
177
|
+
|
|
178
|
+
message = @jobs.pop
|
|
179
|
+
break if message.nil?
|
|
180
|
+
|
|
181
|
+
deliver_result(map_job(message))
|
|
182
|
+
end
|
|
183
|
+
rescue CancellationError
|
|
184
|
+
nil
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def map_job(message)
|
|
188
|
+
sequence = message.sequence
|
|
189
|
+
value = message.value
|
|
190
|
+
ValueMessage.new(sequence:, value: @transform.call(value))
|
|
191
|
+
rescue CancellationError
|
|
192
|
+
raise
|
|
193
|
+
rescue StandardError => error
|
|
194
|
+
ErrorMessage.new(sequence:, error:)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def close_admission(close_upstream: true)
|
|
198
|
+
return if @admission_closed
|
|
199
|
+
|
|
200
|
+
@admission_closed = true
|
|
201
|
+
close_upstream(record_error: false) if close_upstream
|
|
202
|
+
close_permit_queue
|
|
203
|
+
close_job_queue
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def take_permit
|
|
207
|
+
@permits.pop
|
|
208
|
+
rescue ClosedQueueError
|
|
209
|
+
nil
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def return_permit
|
|
213
|
+
@permits << true
|
|
214
|
+
rescue ClosedQueueError
|
|
215
|
+
nil
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def deliver_job(message)
|
|
219
|
+
@jobs << message
|
|
220
|
+
true
|
|
221
|
+
rescue ClosedQueueError
|
|
222
|
+
false
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def deliver_result(message)
|
|
226
|
+
@results << message
|
|
227
|
+
true
|
|
228
|
+
rescue ClosedQueueError
|
|
229
|
+
false
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def close_internal_queues
|
|
233
|
+
close_permit_queue
|
|
234
|
+
close_job_queue
|
|
235
|
+
close_result_queue
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def close_permit_queue
|
|
239
|
+
@permits.close
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def close_job_queue
|
|
243
|
+
@jobs.close
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def close_result_queue
|
|
247
|
+
@results.close
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def close_upstream(record_error: true)
|
|
251
|
+
return wait_for_upstream_close(record_error:) if @upstream_closing
|
|
252
|
+
return nil if @upstream_closed
|
|
253
|
+
|
|
254
|
+
@upstream_closing = true
|
|
255
|
+
@upstream.close
|
|
256
|
+
nil
|
|
257
|
+
rescue StandardError => error
|
|
258
|
+
@upstream_close_error ||= error if record_error
|
|
259
|
+
error
|
|
260
|
+
ensure
|
|
261
|
+
if @upstream_closing
|
|
262
|
+
@upstream_closed = true
|
|
263
|
+
@upstream_closing = false
|
|
264
|
+
signal_upstream_close_done
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
def wait_for_upstream_close(record_error:)
|
|
269
|
+
@upstream_close_done.pop
|
|
270
|
+
return @upstream_close_error if record_error
|
|
271
|
+
|
|
272
|
+
nil
|
|
273
|
+
rescue ClosedQueueError
|
|
274
|
+
record_error ? @upstream_close_error : nil
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def signal_upstream_close_done
|
|
278
|
+
@upstream_close_done << true
|
|
279
|
+
rescue ClosedQueueError
|
|
280
|
+
nil
|
|
281
|
+
ensure
|
|
282
|
+
@upstream_close_done.close
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def cancel_fibers
|
|
286
|
+
scheduler = Fiber.scheduler
|
|
287
|
+
return unless scheduler.respond_to?(:fiber_interrupt)
|
|
288
|
+
|
|
289
|
+
(@workers + [@dispatcher]).compact.each do |fiber|
|
|
290
|
+
next unless fiber.alive?
|
|
291
|
+
|
|
292
|
+
scheduler.fiber_interrupt(fiber, CancellationError.new)
|
|
293
|
+
rescue StandardError
|
|
294
|
+
nil
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def validate_scheduler!
|
|
299
|
+
return if Fiber.scheduler && !Fiber.current.blocking?
|
|
300
|
+
|
|
301
|
+
message =
|
|
302
|
+
if Fiber.scheduler
|
|
303
|
+
"Flow.parallel_unordered_map requires a non-blocking fiber"
|
|
304
|
+
else
|
|
305
|
+
"Flow.parallel_unordered_map requires Fiber.scheduler"
|
|
306
|
+
end
|
|
307
|
+
raise SchedulerRequiredError, message
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
end
|