fiber_stream 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -0
- data/README.md +102 -9
- data/examples/README.md +5 -0
- data/examples/ractor_merge_ports_and_map.rb +116 -0
- data/lib/fiber_stream/errors.rb +4 -1
- data/lib/fiber_stream/flow.rb +37 -1
- data/lib/fiber_stream/pull/async_boundary.rb +28 -11
- data/lib/fiber_stream/pull/buffer_boundary.rb +28 -10
- data/lib/fiber_stream/pull/concat.rb +9 -1
- data/lib/fiber_stream/pull/grouped.rb +46 -0
- data/lib/fiber_stream/pull/merge.rb +230 -0
- data/lib/fiber_stream/pull/parallel_map_boundary.rb +28 -24
- data/lib/fiber_stream/pull/ractor_map_boundary.rb +103 -79
- data/lib/fiber_stream/pull/ractor_merge_ports_source.rb +358 -0
- data/lib/fiber_stream/pull/ractor_port_source.rb +14 -14
- data/lib/fiber_stream/pull/split.rb +134 -0
- data/lib/fiber_stream/pull.rb +23 -3
- data/lib/fiber_stream/ractor_port.rb +3 -1
- data/lib/fiber_stream/running_pipeline.rb +18 -8
- data/lib/fiber_stream/source.rb +105 -3
- data/lib/fiber_stream/version.rb +1 -1
- data/sig/fiber_stream.rbs +7 -0
- metadata +7 -2
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FiberStream
|
|
4
|
+
module Pull
|
|
5
|
+
# Fixed-size grouping stage.
|
|
6
|
+
#
|
|
7
|
+
# It collects adjacent upstream elements into distinct arrays of up to
|
|
8
|
+
# `count` elements. A final partial group is emitted when upstream completes
|
|
9
|
+
# normally.
|
|
10
|
+
class Grouped
|
|
11
|
+
def initialize(upstream, count)
|
|
12
|
+
@upstream = upstream
|
|
13
|
+
@count = count
|
|
14
|
+
@closed = false
|
|
15
|
+
@done = false
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def next
|
|
19
|
+
return DONE if @closed || @done
|
|
20
|
+
|
|
21
|
+
group = []
|
|
22
|
+
|
|
23
|
+
while group.length < @count
|
|
24
|
+
value = @upstream.next
|
|
25
|
+
if Pull.done?(value)
|
|
26
|
+
@done = true
|
|
27
|
+
return DONE if group.empty?
|
|
28
|
+
|
|
29
|
+
return group
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
group << value
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
group
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def close
|
|
39
|
+
return if @closed
|
|
40
|
+
|
|
41
|
+
@closed = true
|
|
42
|
+
@upstream.close
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FiberStream
|
|
4
|
+
module Pull
|
|
5
|
+
# Scheduler-backed ready-order source merge.
|
|
6
|
+
#
|
|
7
|
+
# Each input source is materialized by a scheduled producer fiber on first
|
|
8
|
+
# downstream demand. Producers publish values, completion, and failures into
|
|
9
|
+
# a bounded mailbox; downstream emits values in mailbox arrival order while
|
|
10
|
+
# preserving each input's own order.
|
|
11
|
+
class Merge
|
|
12
|
+
SIDE_ORDER = [:left, :right].freeze
|
|
13
|
+
CancellationError = Class.new(StandardError)
|
|
14
|
+
ValueMessage = Data.define(:side, :value)
|
|
15
|
+
DoneMessage = Data.define(:side)
|
|
16
|
+
ErrorMessage = Data.define(:side, :error)
|
|
17
|
+
private_constant :ValueMessage, :DoneMessage, :ErrorMessage
|
|
18
|
+
|
|
19
|
+
def initialize(left_materializer, right_materializer)
|
|
20
|
+
@materializers = { left: left_materializer, right: right_materializer }
|
|
21
|
+
@streams = { left: nil, right: nil }
|
|
22
|
+
@stream_closed = { left: false, right: false }
|
|
23
|
+
@side_done = { left: false, right: false }
|
|
24
|
+
@producers = {}
|
|
25
|
+
@mailbox = nil
|
|
26
|
+
@started = false
|
|
27
|
+
@closed = false
|
|
28
|
+
@done = false
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def next
|
|
32
|
+
return DONE if @closed || @done
|
|
33
|
+
|
|
34
|
+
start
|
|
35
|
+
next_message
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def close
|
|
39
|
+
return if @closed
|
|
40
|
+
|
|
41
|
+
@closed = true
|
|
42
|
+
@done = true
|
|
43
|
+
close_error = close_materialized_streams
|
|
44
|
+
close_mailbox
|
|
45
|
+
raise close_error if close_error
|
|
46
|
+
ensure
|
|
47
|
+
cancel_producers
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def start
|
|
53
|
+
return if @started
|
|
54
|
+
|
|
55
|
+
validate_scheduler!
|
|
56
|
+
|
|
57
|
+
@mailbox = MergeMailbox.new(1)
|
|
58
|
+
@started = true
|
|
59
|
+
SIDE_ORDER.each do |side|
|
|
60
|
+
@producers[side] = Fiber.schedule { run_producer(side) }
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def next_message
|
|
65
|
+
loop do
|
|
66
|
+
message = @mailbox.pop
|
|
67
|
+
return complete if message.nil?
|
|
68
|
+
|
|
69
|
+
case message
|
|
70
|
+
in ValueMessage[value:]
|
|
71
|
+
return value
|
|
72
|
+
in DoneMessage[side:]
|
|
73
|
+
mark_side_done(side)
|
|
74
|
+
return complete if all_done?
|
|
75
|
+
in ErrorMessage[error:]
|
|
76
|
+
return fail_with(error)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
rescue MergeMailbox::Closed
|
|
80
|
+
complete
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def run_producer(side)
|
|
84
|
+
stream = materialize_side(side)
|
|
85
|
+
|
|
86
|
+
loop do
|
|
87
|
+
break if @closed
|
|
88
|
+
|
|
89
|
+
message = pull_message(side, stream)
|
|
90
|
+
break unless deliver(message)
|
|
91
|
+
break unless message.is_a?(ValueMessage)
|
|
92
|
+
end
|
|
93
|
+
rescue MergeMailbox::Closed, CancellationError
|
|
94
|
+
nil
|
|
95
|
+
rescue StandardError => error
|
|
96
|
+
close_side(side, record_error: false)
|
|
97
|
+
deliver(ErrorMessage.new(side:, error:)) unless @closed
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def materialize_side(side)
|
|
101
|
+
stream = @materializers.fetch(side).call
|
|
102
|
+
@streams[side] = stream
|
|
103
|
+
close_side(side) if @closed
|
|
104
|
+
stream
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def pull_message(side, stream)
|
|
108
|
+
value = stream.next
|
|
109
|
+
return terminal_done_message(side) if Pull.done?(value)
|
|
110
|
+
|
|
111
|
+
ValueMessage.new(side:, value:)
|
|
112
|
+
rescue StandardError => error
|
|
113
|
+
close_side(side, record_error: false)
|
|
114
|
+
ErrorMessage.new(side:, error:)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def terminal_done_message(side)
|
|
118
|
+
close_error = close_side(side)
|
|
119
|
+
close_error ? ErrorMessage.new(side:, error: close_error) : DoneMessage.new(side:)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def deliver(message)
|
|
123
|
+
@mailbox.push(message)
|
|
124
|
+
true
|
|
125
|
+
rescue MergeMailbox::Closed
|
|
126
|
+
false
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def mark_side_done(side)
|
|
130
|
+
@side_done[side] = true
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def all_done?
|
|
134
|
+
SIDE_ORDER.all? { |side| @side_done.fetch(side) }
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def complete
|
|
138
|
+
@done = true
|
|
139
|
+
close_mailbox
|
|
140
|
+
DONE
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def fail_with(error)
|
|
144
|
+
@done = true
|
|
145
|
+
close_mailbox
|
|
146
|
+
close_materialized_streams
|
|
147
|
+
cancel_producers
|
|
148
|
+
raise error
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def close_materialized_streams
|
|
152
|
+
first_error = nil
|
|
153
|
+
|
|
154
|
+
SIDE_ORDER.each do |side|
|
|
155
|
+
close_error = close_side(side)
|
|
156
|
+
first_error ||= close_error
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
first_error
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def close_side(side, record_error: true)
|
|
163
|
+
return nil if @stream_closed.fetch(side)
|
|
164
|
+
|
|
165
|
+
stream = @streams[side]
|
|
166
|
+
return nil unless stream
|
|
167
|
+
|
|
168
|
+
@stream_closed[side] = true
|
|
169
|
+
@streams[side] = nil
|
|
170
|
+
stream.close
|
|
171
|
+
nil
|
|
172
|
+
rescue StandardError => error
|
|
173
|
+
error if record_error
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def close_mailbox
|
|
177
|
+
@mailbox&.close
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def cancel_producers
|
|
181
|
+
scheduler = Fiber.scheduler
|
|
182
|
+
return unless scheduler.respond_to?(:fiber_interrupt)
|
|
183
|
+
|
|
184
|
+
@producers.each_value do |fiber|
|
|
185
|
+
next unless fiber&.alive?
|
|
186
|
+
|
|
187
|
+
scheduler.fiber_interrupt(fiber, CancellationError.new)
|
|
188
|
+
rescue StandardError
|
|
189
|
+
nil
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def validate_scheduler!
|
|
194
|
+
return if Fiber.scheduler && !Fiber.current.blocking?
|
|
195
|
+
|
|
196
|
+
message =
|
|
197
|
+
if Fiber.scheduler
|
|
198
|
+
"Source.merge requires a non-blocking fiber"
|
|
199
|
+
else
|
|
200
|
+
"Source.merge requires Fiber.scheduler"
|
|
201
|
+
end
|
|
202
|
+
raise SchedulerRequiredError, message
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
class MergeMailbox
|
|
206
|
+
Closed = Class.new(StandardError)
|
|
207
|
+
|
|
208
|
+
def initialize(capacity)
|
|
209
|
+
@queue = Thread::SizedQueue.new(capacity)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def push(message)
|
|
213
|
+
@queue << message
|
|
214
|
+
rescue ClosedQueueError
|
|
215
|
+
raise Closed
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def pop
|
|
219
|
+
@queue.pop
|
|
220
|
+
rescue ClosedQueueError
|
|
221
|
+
raise Closed
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def close
|
|
225
|
+
@queue.close
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
end
|
|
@@ -11,6 +11,11 @@ module FiberStream
|
|
|
11
11
|
class ParallelMapBoundary
|
|
12
12
|
TERMINAL_RESULT_CAPACITY = 1
|
|
13
13
|
CancellationError = Class.new(StandardError)
|
|
14
|
+
JobMessage = Data.define(:sequence, :value)
|
|
15
|
+
ValueMessage = Data.define(:sequence, :value)
|
|
16
|
+
DoneMessage = Data.define(:sequence)
|
|
17
|
+
ErrorMessage = Data.define(:sequence, :error)
|
|
18
|
+
private_constant :JobMessage, :ValueMessage, :DoneMessage, :ErrorMessage
|
|
14
19
|
|
|
15
20
|
def initialize(upstream, concurrency, transform)
|
|
16
21
|
@upstream = upstream
|
|
@@ -85,28 +90,23 @@ module FiberStream
|
|
|
85
90
|
end
|
|
86
91
|
|
|
87
92
|
def emit(message)
|
|
88
|
-
case message
|
|
89
|
-
|
|
90
|
-
emit_value(
|
|
91
|
-
|
|
93
|
+
case message
|
|
94
|
+
in ValueMessage[sequence:, value:]
|
|
95
|
+
emit_value(sequence, value)
|
|
96
|
+
in DoneMessage
|
|
92
97
|
complete
|
|
93
|
-
|
|
94
|
-
fail_with_ordered_error(
|
|
98
|
+
in ErrorMessage[sequence:, error:]
|
|
99
|
+
fail_with_ordered_error(sequence, error)
|
|
95
100
|
end
|
|
96
101
|
end
|
|
97
102
|
|
|
98
|
-
def emit_value(
|
|
99
|
-
sequence = message.fetch(1)
|
|
100
|
-
value = message.fetch(2)
|
|
103
|
+
def emit_value(sequence, value)
|
|
101
104
|
@next_emit_sequence = sequence + 1
|
|
102
105
|
return_permit unless @admission_closed
|
|
103
106
|
value
|
|
104
107
|
end
|
|
105
108
|
|
|
106
|
-
def fail_with_ordered_error(
|
|
107
|
-
sequence = message.fetch(1)
|
|
108
|
-
error = message.fetch(2)
|
|
109
|
-
|
|
109
|
+
def fail_with_ordered_error(sequence, error)
|
|
110
110
|
if @failure_sequence && sequence > @failure_sequence
|
|
111
111
|
@next_emit_sequence = sequence + 1
|
|
112
112
|
return next_message
|
|
@@ -130,7 +130,7 @@ module FiberStream
|
|
|
130
130
|
break unless take_permit
|
|
131
131
|
|
|
132
132
|
message = pull_job_message
|
|
133
|
-
if message.
|
|
133
|
+
if message.is_a?(JobMessage)
|
|
134
134
|
break unless deliver_job(message)
|
|
135
135
|
else
|
|
136
136
|
close_admission(close_upstream: false)
|
|
@@ -151,15 +151,19 @@ module FiberStream
|
|
|
151
151
|
|
|
152
152
|
sequence = @next_sequence
|
|
153
153
|
@next_sequence += 1
|
|
154
|
-
|
|
154
|
+
JobMessage.new(sequence:, value:)
|
|
155
155
|
rescue StandardError => error
|
|
156
156
|
close_upstream(record_error: false)
|
|
157
|
-
|
|
157
|
+
ErrorMessage.new(sequence: @next_sequence, error:)
|
|
158
158
|
end
|
|
159
159
|
|
|
160
160
|
def terminal_done_message
|
|
161
161
|
close_error = close_upstream
|
|
162
|
-
|
|
162
|
+
if close_error
|
|
163
|
+
ErrorMessage.new(sequence: @next_sequence, error: close_error)
|
|
164
|
+
else
|
|
165
|
+
DoneMessage.new(sequence: @next_sequence)
|
|
166
|
+
end
|
|
163
167
|
end
|
|
164
168
|
|
|
165
169
|
def run_worker
|
|
@@ -176,23 +180,23 @@ module FiberStream
|
|
|
176
180
|
end
|
|
177
181
|
|
|
178
182
|
def map_job(message)
|
|
179
|
-
sequence = message.
|
|
180
|
-
value = message.
|
|
181
|
-
|
|
183
|
+
sequence = message.sequence
|
|
184
|
+
value = message.value
|
|
185
|
+
ValueMessage.new(sequence:, value: @transform.call(value))
|
|
182
186
|
rescue CancellationError
|
|
183
187
|
raise
|
|
184
188
|
rescue StandardError => error
|
|
185
|
-
|
|
189
|
+
ErrorMessage.new(sequence:, error:)
|
|
186
190
|
end
|
|
187
191
|
|
|
188
192
|
def record_result(message)
|
|
189
|
-
if message.
|
|
190
|
-
sequence = message.
|
|
193
|
+
if message.is_a?(ErrorMessage)
|
|
194
|
+
sequence = message.sequence
|
|
191
195
|
@failure_sequence = sequence if @failure_sequence.nil? || sequence < @failure_sequence
|
|
192
196
|
close_admission
|
|
193
197
|
end
|
|
194
198
|
|
|
195
|
-
@pending[message.
|
|
199
|
+
@pending[message.sequence] = message
|
|
196
200
|
end
|
|
197
201
|
|
|
198
202
|
def drain_available_results
|