fiber_stream 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,11 @@ module FiberStream
11
11
  class ParallelMapBoundary
12
12
  TERMINAL_RESULT_CAPACITY = 1
13
13
  CancellationError = Class.new(StandardError)
14
+ JobMessage = Data.define(:sequence, :value)
15
+ ValueMessage = Data.define(:sequence, :value)
16
+ DoneMessage = Data.define(:sequence)
17
+ ErrorMessage = Data.define(:sequence, :error)
18
+ private_constant :JobMessage, :ValueMessage, :DoneMessage, :ErrorMessage
14
19
 
15
20
  def initialize(upstream, concurrency, transform)
16
21
  @upstream = upstream
@@ -85,28 +90,23 @@ module FiberStream
85
90
  end
86
91
 
87
92
  def emit(message)
88
- case message.fetch(0)
89
- when :value
90
- emit_value(message)
91
- when :done
93
+ case message
94
+ in ValueMessage[sequence:, value:]
95
+ emit_value(sequence, value)
96
+ in DoneMessage
92
97
  complete
93
- when :error
94
- fail_with_ordered_error(message)
98
+ in ErrorMessage[sequence:, error:]
99
+ fail_with_ordered_error(sequence, error)
95
100
  end
96
101
  end
97
102
 
98
- def emit_value(message)
99
- sequence = message.fetch(1)
100
- value = message.fetch(2)
103
+ def emit_value(sequence, value)
101
104
  @next_emit_sequence = sequence + 1
102
105
  return_permit unless @admission_closed
103
106
  value
104
107
  end
105
108
 
106
- def fail_with_ordered_error(message)
107
- sequence = message.fetch(1)
108
- error = message.fetch(2)
109
-
109
+ def fail_with_ordered_error(sequence, error)
110
110
  if @failure_sequence && sequence > @failure_sequence
111
111
  @next_emit_sequence = sequence + 1
112
112
  return next_message
@@ -130,7 +130,7 @@ module FiberStream
130
130
  break unless take_permit
131
131
 
132
132
  message = pull_job_message
133
- if message.fetch(0) == :job
133
+ if message.is_a?(JobMessage)
134
134
  break unless deliver_job(message)
135
135
  else
136
136
  close_admission(close_upstream: false)
@@ -151,15 +151,19 @@ module FiberStream
151
151
 
152
152
  sequence = @next_sequence
153
153
  @next_sequence += 1
154
- [:job, sequence, value]
154
+ JobMessage.new(sequence:, value:)
155
155
  rescue StandardError => error
156
156
  close_upstream(record_error: false)
157
- [:error, @next_sequence, error]
157
+ ErrorMessage.new(sequence: @next_sequence, error:)
158
158
  end
159
159
 
160
160
  def terminal_done_message
161
161
  close_error = close_upstream
162
- close_error ? [:error, @next_sequence, close_error] : [:done, @next_sequence]
162
+ if close_error
163
+ ErrorMessage.new(sequence: @next_sequence, error: close_error)
164
+ else
165
+ DoneMessage.new(sequence: @next_sequence)
166
+ end
163
167
  end
164
168
 
165
169
  def run_worker
@@ -176,23 +180,23 @@ module FiberStream
176
180
  end
177
181
 
178
182
  def map_job(message)
179
- sequence = message.fetch(1)
180
- value = message.fetch(2)
181
- [:value, sequence, @transform.call(value)]
183
+ sequence = message.sequence
184
+ value = message.value
185
+ ValueMessage.new(sequence:, value: @transform.call(value))
182
186
  rescue CancellationError
183
187
  raise
184
188
  rescue StandardError => error
185
- [:error, sequence, error]
189
+ ErrorMessage.new(sequence:, error:)
186
190
  end
187
191
 
188
192
  def record_result(message)
189
- if message.fetch(0) == :error
190
- sequence = message.fetch(1)
193
+ if message.is_a?(ErrorMessage)
194
+ sequence = message.sequence
191
195
  @failure_sequence = sequence if @failure_sequence.nil? || sequence < @failure_sequence
192
196
  close_admission
193
197
  end
194
198
 
195
- @pending[message.fetch(1)] = message
199
+ @pending[message.sequence] = message
196
200
  end
197
201
 
198
202
  def drain_available_results
@@ -0,0 +1,311 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Unordered scheduler-backed worker boundary for
6
+ # `Flow.parallel_unordered_map`.
7
+ #
8
+ # A single dispatcher pulls upstream and a bounded worker pool maps values.
9
+ # Downstream emits worker results in completion order. Admission is
10
+ # permit-based to keep queued, running, and completed pulled-but-unemitted
11
+ # work bounded by the configured concurrency.
12
+ class ParallelUnorderedMapBoundary
13
+ TERMINAL_RESULT_CAPACITY = 1
14
+ CancellationError = Class.new(StandardError)
15
+ JobMessage = Data.define(:sequence, :value)
16
+ ValueMessage = Data.define(:sequence, :value)
17
+ DoneMessage = Data.define
18
+ ErrorMessage = Data.define(:sequence, :error)
19
+ CloseErrorMessage = Data.define(:error)
20
+ private_constant :JobMessage, :ValueMessage, :DoneMessage, :ErrorMessage, :CloseErrorMessage
21
+
22
+ def initialize(upstream, concurrency, transform)
23
+ @upstream = upstream
24
+ @concurrency = concurrency
25
+ @transform = transform
26
+ @permits = Thread::SizedQueue.new(concurrency)
27
+ @jobs = Thread::SizedQueue.new(concurrency)
28
+ @results = Thread::SizedQueue.new(concurrency + TERMINAL_RESULT_CAPACITY)
29
+ @workers = []
30
+ @dispatcher = nil
31
+ @next_sequence = 0
32
+ @outstanding_jobs = 0
33
+ @terminal_message = nil
34
+ @started = false
35
+ @closed = false
36
+ @done = false
37
+ @admission_closed = false
38
+ @upstream_closing = false
39
+ @upstream_closed = false
40
+ @upstream_close_error = nil
41
+ @upstream_close_done = Thread::SizedQueue.new(1)
42
+
43
+ concurrency.times { @permits << true }
44
+ end
45
+
46
+ def next
47
+ return DONE if @closed || @done
48
+
49
+ start
50
+ next_message
51
+ end
52
+
53
+ def close
54
+ return if @closed
55
+
56
+ @closed = true
57
+ @done = true
58
+ close_error = close_upstream
59
+ close_internal_queues
60
+ close_error ||= @upstream_close_error
61
+ raise close_error if close_error
62
+ ensure
63
+ cancel_fibers
64
+ end
65
+
66
+ private
67
+
68
+ def start
69
+ return if @started
70
+
71
+ validate_scheduler!
72
+
73
+ @started = true
74
+ @concurrency.times do
75
+ @workers << Fiber.schedule { run_worker }
76
+ end
77
+ @dispatcher = Fiber.schedule { run_dispatcher }
78
+ end
79
+
80
+ def next_message
81
+ loop do
82
+ return emit_terminal(@terminal_message) if terminal_ready?
83
+
84
+ message = @results.pop
85
+ return complete if message.nil?
86
+
87
+ case message
88
+ in ValueMessage[sequence:, value:]
89
+ return emit_value(sequence, value)
90
+ in DoneMessage | CloseErrorMessage
91
+ @terminal_message = message
92
+ in ErrorMessage[sequence:, error:]
93
+ fail_with_error(sequence, error)
94
+ end
95
+ end
96
+ end
97
+
98
+ def emit_value(_sequence, value)
99
+ @outstanding_jobs -= 1
100
+ return_permit unless @admission_closed
101
+ value
102
+ end
103
+
104
+ def terminal_ready?
105
+ @terminal_message && @outstanding_jobs.zero?
106
+ end
107
+
108
+ def emit_terminal(message)
109
+ case message
110
+ in DoneMessage
111
+ complete
112
+ in CloseErrorMessage[error:]
113
+ fail_with_error(@next_sequence, error, close_admission: false)
114
+ end
115
+ end
116
+
117
+ def fail_with_error(_sequence, error, close_admission: true)
118
+ @done = true
119
+ close_admission() if close_admission
120
+ close_result_queue
121
+ cancel_fibers
122
+ raise error
123
+ end
124
+
125
+ def complete
126
+ @done = true
127
+ close_result_queue
128
+ DONE
129
+ end
130
+
131
+ def run_dispatcher
132
+ loop do
133
+ break if @closed || @admission_closed
134
+ break unless take_permit
135
+
136
+ message = pull_job_message
137
+ if message.is_a?(JobMessage)
138
+ break unless deliver_job(message)
139
+ else
140
+ close_admission(close_upstream: false)
141
+ deliver_result(message)
142
+ break
143
+ end
144
+ end
145
+ rescue CancellationError
146
+ nil
147
+ ensure
148
+ close_upstream unless @upstream_closed || @closed
149
+ close_job_queue
150
+ end
151
+
152
+ def pull_job_message
153
+ value = @upstream.next
154
+ return terminal_done_message if Pull.done?(value)
155
+
156
+ sequence = @next_sequence
157
+ @next_sequence += 1
158
+ @outstanding_jobs += 1
159
+ JobMessage.new(sequence:, value:)
160
+ rescue StandardError => error
161
+ close_upstream(record_error: false)
162
+ ErrorMessage.new(sequence: @next_sequence, error:)
163
+ end
164
+
165
+ def terminal_done_message
166
+ close_error = close_upstream
167
+ if close_error
168
+ CloseErrorMessage.new(error: close_error)
169
+ else
170
+ DoneMessage.new
171
+ end
172
+ end
173
+
174
+ def run_worker
175
+ loop do
176
+ break if @closed
177
+
178
+ message = @jobs.pop
179
+ break if message.nil?
180
+
181
+ deliver_result(map_job(message))
182
+ end
183
+ rescue CancellationError
184
+ nil
185
+ end
186
+
187
+ def map_job(message)
188
+ sequence = message.sequence
189
+ value = message.value
190
+ ValueMessage.new(sequence:, value: @transform.call(value))
191
+ rescue CancellationError
192
+ raise
193
+ rescue StandardError => error
194
+ ErrorMessage.new(sequence:, error:)
195
+ end
196
+
197
+ def close_admission(close_upstream: true)
198
+ return if @admission_closed
199
+
200
+ @admission_closed = true
201
+ close_upstream(record_error: false) if close_upstream
202
+ close_permit_queue
203
+ close_job_queue
204
+ end
205
+
206
+ def take_permit
207
+ @permits.pop
208
+ rescue ClosedQueueError
209
+ nil
210
+ end
211
+
212
+ def return_permit
213
+ @permits << true
214
+ rescue ClosedQueueError
215
+ nil
216
+ end
217
+
218
+ def deliver_job(message)
219
+ @jobs << message
220
+ true
221
+ rescue ClosedQueueError
222
+ false
223
+ end
224
+
225
+ def deliver_result(message)
226
+ @results << message
227
+ true
228
+ rescue ClosedQueueError
229
+ false
230
+ end
231
+
232
+ def close_internal_queues
233
+ close_permit_queue
234
+ close_job_queue
235
+ close_result_queue
236
+ end
237
+
238
+ def close_permit_queue
239
+ @permits.close
240
+ end
241
+
242
+ def close_job_queue
243
+ @jobs.close
244
+ end
245
+
246
+ def close_result_queue
247
+ @results.close
248
+ end
249
+
250
+ def close_upstream(record_error: true)
251
+ return wait_for_upstream_close(record_error:) if @upstream_closing
252
+ return nil if @upstream_closed
253
+
254
+ @upstream_closing = true
255
+ @upstream.close
256
+ nil
257
+ rescue StandardError => error
258
+ @upstream_close_error ||= error if record_error
259
+ error
260
+ ensure
261
+ if @upstream_closing
262
+ @upstream_closed = true
263
+ @upstream_closing = false
264
+ signal_upstream_close_done
265
+ end
266
+ end
267
+
268
+ def wait_for_upstream_close(record_error:)
269
+ @upstream_close_done.pop
270
+ return @upstream_close_error if record_error
271
+
272
+ nil
273
+ rescue ClosedQueueError
274
+ record_error ? @upstream_close_error : nil
275
+ end
276
+
277
+ def signal_upstream_close_done
278
+ @upstream_close_done << true
279
+ rescue ClosedQueueError
280
+ nil
281
+ ensure
282
+ @upstream_close_done.close
283
+ end
284
+
285
+ def cancel_fibers
286
+ scheduler = Fiber.scheduler
287
+ return unless scheduler.respond_to?(:fiber_interrupt)
288
+
289
+ (@workers + [@dispatcher]).compact.each do |fiber|
290
+ next unless fiber.alive?
291
+
292
+ scheduler.fiber_interrupt(fiber, CancellationError.new)
293
+ rescue StandardError
294
+ nil
295
+ end
296
+ end
297
+
298
+ def validate_scheduler!
299
+ return if Fiber.scheduler && !Fiber.current.blocking?
300
+
301
+ message =
302
+ if Fiber.scheduler
303
+ "Flow.parallel_unordered_map requires a non-blocking fiber"
304
+ else
305
+ "Flow.parallel_unordered_map requires Fiber.scheduler"
306
+ end
307
+ raise SchedulerRequiredError, message
308
+ end
309
+ end
310
+ end
311
+ end