fiber_stream 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Fixed-size grouping stage.
6
+ #
7
+ # It collects adjacent upstream elements into distinct arrays of up to
8
+ # `count` elements. A final partial group is emitted when upstream completes
9
+ # normally.
10
+ class Grouped
11
+ def initialize(upstream, count)
12
+ @upstream = upstream
13
+ @count = count
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ group = []
22
+
23
+ while group.length < @count
24
+ value = @upstream.next
25
+ if Pull.done?(value)
26
+ @done = true
27
+ return DONE if group.empty?
28
+
29
+ return group
30
+ end
31
+
32
+ group << value
33
+ end
34
+
35
+ group
36
+ end
37
+
38
+ def close
39
+ return if @closed
40
+
41
+ @closed = true
42
+ @upstream.close
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Scheduler-backed ready-order source merge.
6
+ #
7
+ # Each input source is materialized by a scheduled producer fiber on first
8
+ # downstream demand. Producers publish values, completion, and failures into
9
+ # a bounded mailbox; downstream emits values in mailbox arrival order while
10
+ # preserving each input's own order.
11
+ class Merge
12
+ SIDE_ORDER = [:left, :right].freeze
13
+ CancellationError = Class.new(StandardError)
14
+ ValueMessage = Data.define(:side, :value)
15
+ DoneMessage = Data.define(:side)
16
+ ErrorMessage = Data.define(:side, :error)
17
+ private_constant :ValueMessage, :DoneMessage, :ErrorMessage
18
+
19
+ def initialize(left_materializer, right_materializer)
20
+ @materializers = { left: left_materializer, right: right_materializer }
21
+ @streams = { left: nil, right: nil }
22
+ @stream_closed = { left: false, right: false }
23
+ @side_done = { left: false, right: false }
24
+ @producers = {}
25
+ @mailbox = nil
26
+ @started = false
27
+ @closed = false
28
+ @done = false
29
+ end
30
+
31
+ def next
32
+ return DONE if @closed || @done
33
+
34
+ start
35
+ next_message
36
+ end
37
+
38
+ def close
39
+ return if @closed
40
+
41
+ @closed = true
42
+ @done = true
43
+ close_error = close_materialized_streams
44
+ close_mailbox
45
+ raise close_error if close_error
46
+ ensure
47
+ cancel_producers
48
+ end
49
+
50
+ private
51
+
52
+ def start
53
+ return if @started
54
+
55
+ validate_scheduler!
56
+
57
+ @mailbox = MergeMailbox.new(1)
58
+ @started = true
59
+ SIDE_ORDER.each do |side|
60
+ @producers[side] = Fiber.schedule { run_producer(side) }
61
+ end
62
+ end
63
+
64
+ def next_message
65
+ loop do
66
+ message = @mailbox.pop
67
+ return complete if message.nil?
68
+
69
+ case message
70
+ in ValueMessage[value:]
71
+ return value
72
+ in DoneMessage[side:]
73
+ mark_side_done(side)
74
+ return complete if all_done?
75
+ in ErrorMessage[error:]
76
+ return fail_with(error)
77
+ end
78
+ end
79
+ rescue MergeMailbox::Closed
80
+ complete
81
+ end
82
+
83
+ def run_producer(side)
84
+ stream = materialize_side(side)
85
+
86
+ loop do
87
+ break if @closed
88
+
89
+ message = pull_message(side, stream)
90
+ break unless deliver(message)
91
+ break unless message.is_a?(ValueMessage)
92
+ end
93
+ rescue MergeMailbox::Closed, CancellationError
94
+ nil
95
+ rescue StandardError => error
96
+ close_side(side, record_error: false)
97
+ deliver(ErrorMessage.new(side:, error:)) unless @closed
98
+ end
99
+
100
+ def materialize_side(side)
101
+ stream = @materializers.fetch(side).call
102
+ @streams[side] = stream
103
+ close_side(side) if @closed
104
+ stream
105
+ end
106
+
107
+ def pull_message(side, stream)
108
+ value = stream.next
109
+ return terminal_done_message(side) if Pull.done?(value)
110
+
111
+ ValueMessage.new(side:, value:)
112
+ rescue StandardError => error
113
+ close_side(side, record_error: false)
114
+ ErrorMessage.new(side:, error:)
115
+ end
116
+
117
+ def terminal_done_message(side)
118
+ close_error = close_side(side)
119
+ close_error ? ErrorMessage.new(side:, error: close_error) : DoneMessage.new(side:)
120
+ end
121
+
122
+ def deliver(message)
123
+ @mailbox.push(message)
124
+ true
125
+ rescue MergeMailbox::Closed
126
+ false
127
+ end
128
+
129
+ def mark_side_done(side)
130
+ @side_done[side] = true
131
+ end
132
+
133
+ def all_done?
134
+ SIDE_ORDER.all? { |side| @side_done.fetch(side) }
135
+ end
136
+
137
+ def complete
138
+ @done = true
139
+ close_mailbox
140
+ DONE
141
+ end
142
+
143
+ def fail_with(error)
144
+ @done = true
145
+ close_mailbox
146
+ close_materialized_streams
147
+ cancel_producers
148
+ raise error
149
+ end
150
+
151
+ def close_materialized_streams
152
+ first_error = nil
153
+
154
+ SIDE_ORDER.each do |side|
155
+ close_error = close_side(side)
156
+ first_error ||= close_error
157
+ end
158
+
159
+ first_error
160
+ end
161
+
162
+ def close_side(side, record_error: true)
163
+ return nil if @stream_closed.fetch(side)
164
+
165
+ stream = @streams[side]
166
+ return nil unless stream
167
+
168
+ @stream_closed[side] = true
169
+ @streams[side] = nil
170
+ stream.close
171
+ nil
172
+ rescue StandardError => error
173
+ error if record_error
174
+ end
175
+
176
+ def close_mailbox
177
+ @mailbox&.close
178
+ end
179
+
180
+ def cancel_producers
181
+ scheduler = Fiber.scheduler
182
+ return unless scheduler.respond_to?(:fiber_interrupt)
183
+
184
+ @producers.each_value do |fiber|
185
+ next unless fiber&.alive?
186
+
187
+ scheduler.fiber_interrupt(fiber, CancellationError.new)
188
+ rescue StandardError
189
+ nil
190
+ end
191
+ end
192
+
193
+ def validate_scheduler!
194
+ return if Fiber.scheduler && !Fiber.current.blocking?
195
+
196
+ message =
197
+ if Fiber.scheduler
198
+ "Source.merge requires a non-blocking fiber"
199
+ else
200
+ "Source.merge requires Fiber.scheduler"
201
+ end
202
+ raise SchedulerRequiredError, message
203
+ end
204
+
205
+ class MergeMailbox
206
+ Closed = Class.new(StandardError)
207
+
208
+ def initialize(capacity)
209
+ @queue = Thread::SizedQueue.new(capacity)
210
+ end
211
+
212
+ def push(message)
213
+ @queue << message
214
+ rescue ClosedQueueError
215
+ raise Closed
216
+ end
217
+
218
+ def pop
219
+ @queue.pop
220
+ rescue ClosedQueueError
221
+ raise Closed
222
+ end
223
+
224
+ def close
225
+ @queue.close
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
@@ -11,6 +11,11 @@ module FiberStream
11
11
  class ParallelMapBoundary
12
12
  TERMINAL_RESULT_CAPACITY = 1
13
13
  CancellationError = Class.new(StandardError)
14
+ JobMessage = Data.define(:sequence, :value)
15
+ ValueMessage = Data.define(:sequence, :value)
16
+ DoneMessage = Data.define(:sequence)
17
+ ErrorMessage = Data.define(:sequence, :error)
18
+ private_constant :JobMessage, :ValueMessage, :DoneMessage, :ErrorMessage
14
19
 
15
20
  def initialize(upstream, concurrency, transform)
16
21
  @upstream = upstream
@@ -85,28 +90,23 @@ module FiberStream
85
90
  end
86
91
 
87
92
  def emit(message)
88
- case message.fetch(0)
89
- when :value
90
- emit_value(message)
91
- when :done
93
+ case message
94
+ in ValueMessage[sequence:, value:]
95
+ emit_value(sequence, value)
96
+ in DoneMessage
92
97
  complete
93
- when :error
94
- fail_with_ordered_error(message)
98
+ in ErrorMessage[sequence:, error:]
99
+ fail_with_ordered_error(sequence, error)
95
100
  end
96
101
  end
97
102
 
98
- def emit_value(message)
99
- sequence = message.fetch(1)
100
- value = message.fetch(2)
103
+ def emit_value(sequence, value)
101
104
  @next_emit_sequence = sequence + 1
102
105
  return_permit unless @admission_closed
103
106
  value
104
107
  end
105
108
 
106
- def fail_with_ordered_error(message)
107
- sequence = message.fetch(1)
108
- error = message.fetch(2)
109
-
109
+ def fail_with_ordered_error(sequence, error)
110
110
  if @failure_sequence && sequence > @failure_sequence
111
111
  @next_emit_sequence = sequence + 1
112
112
  return next_message
@@ -130,7 +130,7 @@ module FiberStream
130
130
  break unless take_permit
131
131
 
132
132
  message = pull_job_message
133
- if message.fetch(0) == :job
133
+ if message.is_a?(JobMessage)
134
134
  break unless deliver_job(message)
135
135
  else
136
136
  close_admission(close_upstream: false)
@@ -151,15 +151,19 @@ module FiberStream
151
151
 
152
152
  sequence = @next_sequence
153
153
  @next_sequence += 1
154
- [:job, sequence, value]
154
+ JobMessage.new(sequence:, value:)
155
155
  rescue StandardError => error
156
156
  close_upstream(record_error: false)
157
- [:error, @next_sequence, error]
157
+ ErrorMessage.new(sequence: @next_sequence, error:)
158
158
  end
159
159
 
160
160
  def terminal_done_message
161
161
  close_error = close_upstream
162
- close_error ? [:error, @next_sequence, close_error] : [:done, @next_sequence]
162
+ if close_error
163
+ ErrorMessage.new(sequence: @next_sequence, error: close_error)
164
+ else
165
+ DoneMessage.new(sequence: @next_sequence)
166
+ end
163
167
  end
164
168
 
165
169
  def run_worker
@@ -176,23 +180,23 @@ module FiberStream
176
180
  end
177
181
 
178
182
  def map_job(message)
179
- sequence = message.fetch(1)
180
- value = message.fetch(2)
181
- [:value, sequence, @transform.call(value)]
183
+ sequence = message.sequence
184
+ value = message.value
185
+ ValueMessage.new(sequence:, value: @transform.call(value))
182
186
  rescue CancellationError
183
187
  raise
184
188
  rescue StandardError => error
185
- [:error, sequence, error]
189
+ ErrorMessage.new(sequence:, error:)
186
190
  end
187
191
 
188
192
  def record_result(message)
189
- if message.fetch(0) == :error
190
- sequence = message.fetch(1)
193
+ if message.is_a?(ErrorMessage)
194
+ sequence = message.sequence
191
195
  @failure_sequence = sequence if @failure_sequence.nil? || sequence < @failure_sequence
192
196
  close_admission
193
197
  end
194
198
 
195
- @pending[message.fetch(1)] = message
199
+ @pending[message.sequence] = message
196
200
  end
197
201
 
198
202
  def drain_available_results