fiber_stream 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
4
+
5
+ require "digest"
6
+ require "fiber_stream"
7
+
8
+ PRODUCER_JOBS = [
9
+ [
10
+ "producer-a",
11
+ [
12
+ { name: "alpha.bin", payload: +"A" * 180_000, seed_rounds: 80, verify_rounds: 60 },
13
+ { name: "bravo.bin", payload: +"B" * 140_000, seed_rounds: 70, verify_rounds: 55 }
14
+ ]
15
+ ],
16
+ [
17
+ "producer-b",
18
+ [
19
+ { name: "charlie.bin", payload: +"C" * 220_000, seed_rounds: 85, verify_rounds: 65 },
20
+ { name: "delta.bin", payload: +"D" * 120_000, seed_rounds: 75, verify_rounds: 50 }
21
+ ]
22
+ ]
23
+ ].freeze
24
+
25
+ VERIFY_RECORD =
26
+ Ractor.shareable_proc do |record|
27
+ digest = record.fetch(:seed_sha256)
28
+
29
+ record.fetch(:verify_rounds).times do |index|
30
+ digest = Digest::SHA256.hexdigest("#{digest}:verify:#{index}")
31
+ end
32
+
33
+ record.merge(final_sha256: digest)
34
+ end
35
+
36
+ def spawn_digest_producer(data_port, producer_name, jobs)
37
+ setup_port = Ractor::Port.new
38
+ producer =
39
+ Ractor.new(data_port, setup_port, producer_name, jobs) do |outbox, setup, name, producer_jobs|
40
+ ack_port = Ractor::Port.new
41
+ setup.send(ack_port)
42
+
43
+ enumerator = producer_jobs.to_enum
44
+ sent = 0
45
+
46
+ loop do
47
+ case ack_port.receive
48
+ in FiberStream::RactorPort::Ack
49
+ begin
50
+ job = enumerator.next
51
+ digest = job.fetch(:payload)
52
+
53
+ job.fetch(:seed_rounds).times do |index|
54
+ digest = Digest::SHA256.hexdigest("#{digest}:#{name}:#{index}")
55
+ end
56
+
57
+ sent += 1
58
+ outbox.send(
59
+ FiberStream::RactorPort::Element.new(
60
+ {
61
+ producer: name,
62
+ name: job.fetch(:name),
63
+ bytes: job.fetch(:payload).bytesize,
64
+ seed_sha256: digest,
65
+ verify_rounds: job.fetch(:verify_rounds)
66
+ }
67
+ ),
68
+ move: true
69
+ )
70
+ rescue StopIteration
71
+ outbox.send(FiberStream::RactorPort::Complete.new)
72
+ break [:completed, name, sent]
73
+ end
74
+ in FiberStream::RactorPort::Cancel[reason]
75
+ break [:cancelled, name, sent, reason]
76
+ end
77
+ end
78
+ end
79
+
80
+ [producer, setup_port.receive]
81
+ end
82
+
83
+ port_pairs = []
84
+ producers =
85
+ PRODUCER_JOBS.map do |producer_name, jobs|
86
+ data_port = Ractor::Port.new
87
+ producer, ack_port = spawn_digest_producer(data_port, producer_name, jobs)
88
+ port_pairs << { port: data_port, ack_port: ack_port }
89
+ producer
90
+ end
91
+
92
+ records =
93
+ FiberStream::Source.ractor_merge_ports(port_pairs)
94
+ .ractor_map(workers: 2, input_transfer: :move, output_transfer: :move, &VERIFY_RECORD)
95
+ .run_with(FiberStream::Sink.to_a)
96
+
97
+ puts "Merged producer Ractors, then verified in ractor_map workers"
98
+ records.each do |record|
99
+ puts format(
100
+ "- %-10<producer>s %-11<name>s %7<bytes>d bytes %<final_sha256>s",
101
+ producer: record.fetch(:producer),
102
+ name: record.fetch(:name),
103
+ bytes: record.fetch(:bytes),
104
+ final_sha256: record.fetch(:final_sha256)
105
+ )
106
+ end
107
+
108
+ puts
109
+ puts "Producer statuses:"
110
+ producers.each do |producer|
111
+ puts "- #{producer.value.inspect}"
112
+ end
113
+
114
+ puts
115
+ puts "Source.ractor_merge_ports emits producers in ready order."
116
+ puts "ractor_map preserves that merged input order while running verification in Ractor workers."
@@ -9,7 +9,10 @@ module FiberStream
9
9
  #
10
10
  # Producer failures, invalid protocol messages, and source-side Ractor port
11
11
  # failures use this stable error shape so callers do not need to depend on
12
- # Ruby's Ractor transport exceptions.
12
+ # Ruby's Ractor transport exceptions. For producer failures,
13
+ # `cause_class_name` and `cause_message` come from the producer's
14
+ # `RactorPort::Failure` envelope and are included in this error's public
15
+ # message.
13
16
  class RactorPortSourceError < RuntimeError
14
17
  attr_reader :kind, :cause_class_name, :cause_message, :original_cause
15
18
 
@@ -75,6 +75,56 @@ module FiberStream
75
75
  new { |upstream| Pull.take(upstream, count) }
76
76
  end
77
77
 
78
+ # Creates a fixed-prefix dropping flow.
79
+ #
80
+ # The flow discards the first `count` upstream elements, then passes later
81
+ # elements through unchanged. `drop(0)` behaves as pass-through. Negative
82
+ # counts raise `ArgumentError`; non-Integer counts raise `TypeError`.
83
+ def self.drop(count)
84
+ raise TypeError, "count must be an Integer" unless count.is_a?(Integer)
85
+ raise ArgumentError, "count must be non-negative" if count.negative?
86
+
87
+ new { |upstream| Pull.drop(upstream, count) }
88
+ end
89
+
90
+ # Creates a fixed-size grouping flow.
91
+ #
92
+ # The flow emits arrays containing up to `count` adjacent upstream elements.
93
+ # Full groups contain exactly `count` elements; normal upstream completion
94
+ # emits one final partial group when one exists. `count` must be a positive
95
+ # Integer.
96
+ def self.grouped(count)
97
+ raise TypeError, "count must be an Integer" unless count.is_a?(Integer)
98
+ raise ArgumentError, "count must be positive" unless count.positive?
99
+
100
+ new { |upstream| Pull.grouped(upstream, count) }
101
+ end
102
+
103
+ # Creates a predicate-based limiting flow.
104
+ #
105
+ # The flow emits leading elements while the block result is truthy. The
106
+ # first false or nil result completes the stream without emitting that
107
+ # element and closes upstream during the same downstream pull. Exceptions
108
+ # raised by the block fail the stream and are re-raised from
109
+ # `Source#run_with`.
110
+ def self.take_while(&block)
111
+ raise ArgumentError, "missing block" unless block
112
+
113
+ new { |upstream| Pull.take_while(upstream, block) }
114
+ end
115
+
116
+ # Creates a predicate-based prefix-dropping flow.
117
+ #
118
+ # The flow drops leading elements while the block result is truthy. The
119
+ # first false or nil result, and all later elements, pass through unchanged.
120
+ # After that boundary the block is not called again. Exceptions raised by
121
+ # the block fail the stream and are re-raised from `Source#run_with`.
122
+ def self.drop_while(&block)
123
+ raise ArgumentError, "missing block" unless block
124
+
125
+ new { |upstream| Pull.drop_while(upstream, block) }
126
+ end
127
+
78
128
  # Creates a scheduler-backed asynchronous boundary.
79
129
  #
80
130
  # The boundary starts its producer on the first downstream demand and
@@ -105,7 +155,9 @@ module FiberStream
105
155
  #
106
156
  # The flow accepts String chunks and emits lines split on "\n". By default
107
157
  # it chomps the trailing newline and one preceding "\r". `max_length` is an
108
- # optional per-line bytesize limit.
158
+ # optional per-line bytesize limit. With `max_length: nil`, one
159
+ # unterminated line can buffer without bound. Set a positive `max_length`
160
+ # for untrusted, network-facing, or otherwise unbounded streams.
109
161
  def self.lines(chomp: true, max_length: nil)
110
162
  raise TypeError, "chomp must be true or false" unless [true, false].include?(chomp)
111
163
  unless max_length.nil? || max_length.is_a?(Integer)
@@ -116,6 +168,27 @@ module FiberStream
116
168
  new { |upstream| Pull.lines(upstream, chomp, max_length) }
117
169
  end
118
170
 
171
+ # Creates a delimiter-splitting flow.
172
+ #
173
+ # The flow accepts String chunks and emits frames split on the non-empty
174
+ # String `separator`. Separator matching is byte-oriented. By default
175
+ # emitted frames exclude the separator; `keep_separator: true` preserves it
176
+ # on separator-terminated frames. `max_length` is an optional per-frame body
177
+ # bytesize limit. With `max_length: nil`, one unterminated frame can buffer
178
+ # without bound. Set a positive `max_length` for untrusted, network-facing,
179
+ # or otherwise unbounded streams.
180
+ def self.split(separator, keep_separator: false, max_length: nil)
181
+ raise TypeError, "separator must be String" unless separator.is_a?(String)
182
+ raise ArgumentError, "separator must not be empty" if separator.empty?
183
+ raise TypeError, "keep_separator must be true or false" unless [true, false].include?(keep_separator)
184
+ unless max_length.nil? || max_length.is_a?(Integer)
185
+ raise TypeError, "max_length must be nil or an Integer"
186
+ end
187
+ raise ArgumentError, "max_length must be positive" if max_length&.<= 0
188
+
189
+ new { |upstream| Pull.split(upstream, separator, keep_separator, max_length) }
190
+ end
191
+
119
192
  def self.validate_ractor_transfer_policy!(name, value)
120
193
  return if [:copy, :move].include?(value)
121
194
 
@@ -9,12 +9,18 @@ module FiberStream
9
9
  # time back to the downstream caller, so it adds an async boundary without
10
10
  # adding prefetch.
11
11
  class AsyncBoundary
12
+ ValueMessage = Data.define(:value)
13
+ DoneMessage = Data.define
14
+ ErrorMessage = Data.define(:error)
15
+ private_constant :ValueMessage, :DoneMessage, :ErrorMessage
16
+
12
17
  def initialize(upstream)
13
18
  @upstream = upstream
14
19
  @producer = nil
15
20
  @started = false
16
21
  @closed = false
17
22
  @done = false
23
+ @upstream_closed = false
18
24
  end
19
25
 
20
26
  def next
@@ -23,14 +29,14 @@ module FiberStream
23
29
  start
24
30
  message = @producer.resume
25
31
 
26
- case message.fetch(0)
27
- when :value
28
- message.fetch(1)
29
- when :done
32
+ case message
33
+ in ValueMessage[value:]
34
+ value
35
+ in DoneMessage
30
36
  complete
31
- when :error
37
+ in ErrorMessage[error:]
32
38
  @done = true
33
- raise message.fetch(1)
39
+ raise error
34
40
  end
35
41
  end
36
42
 
@@ -39,7 +45,7 @@ module FiberStream
39
45
 
40
46
  @closed = true
41
47
  @done = true
42
- @upstream.close
48
+ close_upstream
43
49
  ensure
44
50
  cancel_producer
45
51
  end
@@ -60,16 +66,16 @@ module FiberStream
60
66
 
61
67
  value = @upstream.next
62
68
  if Pull.done?(value)
63
- Fiber.yield([:done])
69
+ Fiber.yield(DoneMessage.new)
64
70
  break
65
71
  end
66
72
 
67
- Fiber.yield([:value, value])
73
+ Fiber.yield(ValueMessage.new(value:))
68
74
  end
69
75
  rescue StandardError => exception
70
- Fiber.yield([:error, exception]) unless @closed
76
+ Fiber.yield(ErrorMessage.new(error: exception)) unless @closed
71
77
  ensure
72
- @upstream.close
78
+ close_upstream
73
79
  end
74
80
 
75
81
  def complete
@@ -77,7 +83,18 @@ module FiberStream
77
83
  DONE
78
84
  end
79
85
 
86
+ def close_upstream
87
+ return if @upstream_closed
88
+
89
+ @upstream_closed = true
90
+ @upstream.close
91
+ end
92
+
80
93
  def cancel_producer
94
+ return unless @producer&.alive?
95
+
96
+ @producer.kill
97
+ rescue StandardError
81
98
  nil
82
99
  end
83
100
  end
@@ -9,10 +9,17 @@ module FiberStream
9
9
  # queue capacity plus in-flight producer/consumer work. Close is responsible
10
10
  # for closing upstream and waking any producer blocked on a full queue.
11
11
  class BufferBoundary
12
+ CancellationError = Class.new(StandardError)
13
+ ValueMessage = Data.define(:value)
14
+ DoneMessage = Data.define
15
+ ErrorMessage = Data.define(:error)
16
+ private_constant :CancellationError, :ValueMessage, :DoneMessage, :ErrorMessage
17
+
12
18
  def initialize(upstream, count)
13
19
  @upstream = upstream
14
20
  @queue = Thread::SizedQueue.new(count)
15
21
  @producer = nil
22
+ @scheduler = nil
16
23
  @started = false
17
24
  @closed = false
18
25
  @done = false
@@ -27,14 +34,14 @@ module FiberStream
27
34
  message = @queue.pop
28
35
  return complete if message.nil?
29
36
 
30
- case message.fetch(0)
31
- when :value
32
- message.fetch(1)
33
- when :done
37
+ case message
38
+ in ValueMessage[value:]
39
+ value
40
+ in DoneMessage
34
41
  complete
35
- when :error
42
+ in ErrorMessage[error:]
36
43
  @done = true
37
- raise message.fetch(1)
44
+ raise error
38
45
  end
39
46
  end
40
47
 
@@ -58,7 +65,9 @@ module FiberStream
58
65
  raise SchedulerRequiredError, "Flow.buffer requires Fiber.scheduler" unless Fiber.scheduler
59
66
 
60
67
  @started = true
68
+ @scheduler = Fiber.scheduler
61
69
  @producer = Fiber.schedule { run_producer }
70
+ cancel_producer if @closed
62
71
  end
63
72
 
64
73
  def run_producer
@@ -67,8 +76,10 @@ module FiberStream
67
76
 
68
77
  message = pull_message
69
78
  break unless deliver(message)
70
- break unless message.fetch(0) == :value
79
+ break unless message.is_a?(ValueMessage)
71
80
  end
81
+ rescue CancellationError
82
+ nil
72
83
  ensure
73
84
  @upstream_close_error ||= close_upstream unless @upstream_closed
74
85
  end
@@ -77,15 +88,17 @@ module FiberStream
77
88
  value = @upstream.next
78
89
  return terminal_done_message if Pull.done?(value)
79
90
 
80
- [:value, value]
91
+ ValueMessage.new(value:)
92
+ rescue CancellationError
93
+ raise
81
94
  rescue StandardError => error
82
95
  close_upstream(record_error: false)
83
- [:error, error]
96
+ ErrorMessage.new(error:)
84
97
  end
85
98
 
86
99
  def terminal_done_message
87
100
  close_error = close_upstream
88
- close_error ? [:error, close_error] : [:done]
101
+ close_error ? ErrorMessage.new(error: close_error) : DoneMessage.new
89
102
  end
90
103
 
91
104
  def deliver(message)
@@ -116,6 +129,11 @@ module FiberStream
116
129
  end
117
130
 
118
131
  def cancel_producer
132
+ return unless @producer&.alive?
133
+ return unless @scheduler.respond_to?(:fiber_interrupt)
134
+
135
+ @scheduler.fiber_interrupt(@producer, CancellationError.new)
136
+ rescue NotImplementedError, StandardError
119
137
  nil
120
138
  end
121
139
  end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Pull stream that emits all values from one materialized source, then all
6
+ # values from a second source materialized only after the first completes.
7
+ class Concat
8
+ def initialize(left_materializer, right_materializer)
9
+ @left_materializer = left_materializer
10
+ @right_materializer = right_materializer
11
+ @left = nil
12
+ @right = nil
13
+ @phase = :left
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ case @phase
22
+ when :left
23
+ next_left
24
+ when :right
25
+ next_right
26
+ else
27
+ DONE
28
+ end
29
+ end
30
+
31
+ def close
32
+ return if @closed
33
+
34
+ @closed = true
35
+ close_materialized_streams
36
+ end
37
+
38
+ private
39
+
40
+ def next_left
41
+ materialize_left
42
+ value = @left.next
43
+ return value unless Pull.done?(value)
44
+
45
+ close_left
46
+ @phase = :right
47
+ @right = @right_materializer.call
48
+ next_right
49
+ end
50
+
51
+ def next_right
52
+ value = @right.next
53
+ return value unless Pull.done?(value)
54
+
55
+ close_right
56
+ @done = true
57
+ DONE
58
+ end
59
+
60
+ def materialize_left
61
+ return if @left
62
+
63
+ stream = @left_materializer.call
64
+ @left = stream
65
+ end
66
+
67
+ def close_left
68
+ stream = @left
69
+ return unless stream
70
+
71
+ stream.close
72
+ @left = nil
73
+ end
74
+
75
+ def close_right
76
+ stream = @right
77
+ return unless stream
78
+
79
+ stream.close
80
+ @right = nil
81
+ end
82
+
83
+ def close_materialized_streams
84
+ first_error = nil
85
+
86
+ [@right, @left].each do |stream|
87
+ next unless stream
88
+
89
+ begin
90
+ stream.close
91
+ rescue StandardError => error
92
+ first_error ||= error
93
+ end
94
+ end
95
+
96
+ @right = nil
97
+ @left = nil
98
+
99
+ raise first_error if first_error
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Fixed-prefix dropping stage.
6
+ #
7
+ # It discards the first `count` upstream elements on downstream demand, then
8
+ # passes later elements through without buffering.
9
+ class Drop
10
+ def initialize(upstream, count)
11
+ @upstream = upstream
12
+ @remaining = count
13
+ @closed = false
14
+ @done = false
15
+ end
16
+
17
+ def next
18
+ return DONE if @closed || @done
19
+
20
+ drop_prefix
21
+ return DONE if @done
22
+
23
+ pull_retained_value
24
+ end
25
+
26
+ def close
27
+ return if @closed
28
+
29
+ @closed = true
30
+ @upstream.close
31
+ end
32
+
33
+ private
34
+
35
+ def drop_prefix
36
+ while @remaining.positive?
37
+ value = @upstream.next
38
+ if Pull.done?(value)
39
+ @done = true
40
+ return
41
+ end
42
+
43
+ @remaining -= 1
44
+ end
45
+ end
46
+
47
+ def pull_retained_value
48
+ value = @upstream.next
49
+ if Pull.done?(value)
50
+ @done = true
51
+ return DONE
52
+ end
53
+
54
+ value
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Predicate-based prefix dropping stage.
6
+ #
7
+ # It drops leading elements while the predicate is truthy. The first falsey
8
+ # element and all later elements pass through unchanged.
9
+ class DropWhile
10
+ def initialize(upstream, predicate)
11
+ @upstream = upstream
12
+ @predicate = predicate
13
+ @dropping = true
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ return pull_pass_through unless @dropping
22
+
23
+ pull_until_retained
24
+ end
25
+
26
+ def close
27
+ return if @closed
28
+
29
+ @closed = true
30
+ @upstream.close
31
+ end
32
+
33
+ private
34
+
35
+ def pull_until_retained
36
+ loop do
37
+ value = @upstream.next
38
+ if Pull.done?(value)
39
+ @done = true
40
+ return DONE
41
+ end
42
+
43
+ next if @predicate.call(value)
44
+
45
+ @dropping = false
46
+ return value
47
+ end
48
+ end
49
+
50
+ def pull_pass_through
51
+ value = @upstream.next
52
+ if Pull.done?(value)
53
+ @done = true
54
+ return DONE
55
+ end
56
+
57
+ value
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # Fixed-size grouping stage.
6
+ #
7
+ # It collects adjacent upstream elements into distinct arrays of up to
8
+ # `count` elements. A final partial group is emitted when upstream completes
9
+ # normally.
10
+ class Grouped
11
+ def initialize(upstream, count)
12
+ @upstream = upstream
13
+ @count = count
14
+ @closed = false
15
+ @done = false
16
+ end
17
+
18
+ def next
19
+ return DONE if @closed || @done
20
+
21
+ group = []
22
+
23
+ while group.length < @count
24
+ value = @upstream.next
25
+ if Pull.done?(value)
26
+ @done = true
27
+ return DONE if group.empty?
28
+
29
+ return group
30
+ end
31
+
32
+ group << value
33
+ end
34
+
35
+ group
36
+ end
37
+
38
+ def close
39
+ return if @closed
40
+
41
+ @closed = true
42
+ @upstream.close
43
+ end
44
+ end
45
+ end
46
+ end