fiber_stream 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -0
- data/README.md +102 -9
- data/examples/README.md +5 -0
- data/examples/ractor_merge_ports_and_map.rb +116 -0
- data/lib/fiber_stream/errors.rb +4 -1
- data/lib/fiber_stream/flow.rb +37 -1
- data/lib/fiber_stream/pull/async_boundary.rb +28 -11
- data/lib/fiber_stream/pull/buffer_boundary.rb +28 -10
- data/lib/fiber_stream/pull/concat.rb +9 -1
- data/lib/fiber_stream/pull/grouped.rb +46 -0
- data/lib/fiber_stream/pull/merge.rb +230 -0
- data/lib/fiber_stream/pull/parallel_map_boundary.rb +28 -24
- data/lib/fiber_stream/pull/ractor_map_boundary.rb +103 -79
- data/lib/fiber_stream/pull/ractor_merge_ports_source.rb +358 -0
- data/lib/fiber_stream/pull/ractor_port_source.rb +14 -14
- data/lib/fiber_stream/pull/split.rb +134 -0
- data/lib/fiber_stream/pull.rb +23 -3
- data/lib/fiber_stream/ractor_port.rb +3 -1
- data/lib/fiber_stream/running_pipeline.rb +18 -8
- data/lib/fiber_stream/source.rb +105 -3
- data/lib/fiber_stream/version.rb +1 -1
- data/sig/fiber_stream.rbs +7 -0
- metadata +7 -2
|
@@ -9,7 +9,10 @@ module FiberStream
|
|
|
9
9
|
# a coordinator thread so scheduler-managed fibers do not call Ractor wait
|
|
10
10
|
# APIs directly.
|
|
11
11
|
class RactorPortSource
|
|
12
|
-
|
|
12
|
+
ProtocolMessage = Data.define(:message)
|
|
13
|
+
ErrorMessage = Data.define(:error)
|
|
14
|
+
ClosedMessage = Data.define
|
|
15
|
+
private_constant :ProtocolMessage, :ErrorMessage, :ClosedMessage
|
|
13
16
|
|
|
14
17
|
def initialize(port, ack_port, ack_transfer, cancel)
|
|
15
18
|
@port = port
|
|
@@ -81,15 +84,13 @@ module FiberStream
|
|
|
81
84
|
end
|
|
82
85
|
|
|
83
86
|
def handle_result(result)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
handle_protocol_message(result.fetch(1))
|
|
89
|
-
when :error
|
|
87
|
+
case result
|
|
88
|
+
in ProtocolMessage[message:]
|
|
89
|
+
handle_protocol_message(message)
|
|
90
|
+
in ErrorMessage[error:]
|
|
90
91
|
mark_done
|
|
91
|
-
raise_error(
|
|
92
|
-
|
|
92
|
+
raise_error(error)
|
|
93
|
+
in ClosedMessage
|
|
93
94
|
DONE
|
|
94
95
|
end
|
|
95
96
|
end
|
|
@@ -151,19 +152,19 @@ module FiberStream
|
|
|
151
152
|
|
|
152
153
|
ack_error = send_ack
|
|
153
154
|
if ack_error
|
|
154
|
-
deliver_result(
|
|
155
|
+
deliver_result(ErrorMessage.new(error: ack_error))
|
|
155
156
|
break
|
|
156
157
|
end
|
|
157
158
|
|
|
158
159
|
selected, message = select_message
|
|
159
160
|
break if selected == @shutdown_port || closed?
|
|
160
161
|
|
|
161
|
-
deliver_result(
|
|
162
|
+
deliver_result(ProtocolMessage.new(message:))
|
|
162
163
|
end
|
|
163
164
|
rescue StandardError => error
|
|
164
|
-
deliver_result(
|
|
165
|
+
deliver_result(ErrorMessage.new(error: build_error(:receive, error)))
|
|
165
166
|
ensure
|
|
166
|
-
deliver_result(
|
|
167
|
+
deliver_result(ClosedMessage.new) if closed?
|
|
167
168
|
end
|
|
168
169
|
|
|
169
170
|
def select_message
|
|
@@ -218,7 +219,6 @@ module FiberStream
|
|
|
218
219
|
def wait_for_coordinator
|
|
219
220
|
return unless @coordinator
|
|
220
221
|
|
|
221
|
-
sleep WAIT_INTERVAL while @coordinator.alive?
|
|
222
222
|
@coordinator.join
|
|
223
223
|
end
|
|
224
224
|
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FiberStream
|
|
4
|
+
module Pull
|
|
5
|
+
# Delimiter-framing stage for `Flow.split`.
|
|
6
|
+
#
|
|
7
|
+
# The stage keeps an internal byte buffer because frames and separators can
|
|
8
|
+
# cross chunk boundaries. Length checks are per frame body, not against the
|
|
9
|
+
# aggregate buffer, so already complete valid frames can be emitted before a
|
|
10
|
+
# later over-limit frame fails.
|
|
11
|
+
class Split
|
|
12
|
+
def initialize(upstream, separator, keep_separator, max_length)
|
|
13
|
+
@upstream = upstream
|
|
14
|
+
@separator = separator.b.freeze
|
|
15
|
+
@keep_separator = keep_separator
|
|
16
|
+
@max_length = max_length
|
|
17
|
+
@buffer = +"".b
|
|
18
|
+
@closed = false
|
|
19
|
+
@upstream_done = false
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def next
|
|
23
|
+
return DONE if @closed
|
|
24
|
+
|
|
25
|
+
loop do
|
|
26
|
+
frame = next_buffered_frame
|
|
27
|
+
return frame if frame
|
|
28
|
+
|
|
29
|
+
validate_pending_frame_length!
|
|
30
|
+
return complete_from_buffer if @upstream_done
|
|
31
|
+
|
|
32
|
+
append_next_chunk
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def close
|
|
37
|
+
return if @closed
|
|
38
|
+
|
|
39
|
+
@closed = true
|
|
40
|
+
@buffer.clear
|
|
41
|
+
@upstream.close
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def next_buffered_frame
|
|
47
|
+
separator_index = @buffer.index(@separator)
|
|
48
|
+
return nil unless separator_index
|
|
49
|
+
|
|
50
|
+
frame = @buffer.slice!(0, separator_index)
|
|
51
|
+
@buffer.slice!(0, @separator.bytesize)
|
|
52
|
+
validate_frame_length!(frame)
|
|
53
|
+
format_frame(frame)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def complete_from_buffer
|
|
57
|
+
return DONE if @buffer.empty?
|
|
58
|
+
|
|
59
|
+
frame = @buffer
|
|
60
|
+
@buffer = +"".b
|
|
61
|
+
validate_frame_length!(frame)
|
|
62
|
+
frame
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def append_next_chunk
|
|
66
|
+
chunk = @upstream.next
|
|
67
|
+
if Pull.done?(chunk)
|
|
68
|
+
@upstream_done = true
|
|
69
|
+
return
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
unless chunk.is_a?(String)
|
|
73
|
+
raise TypeError, "Flow.split elements must be String"
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
@buffer << chunk.b
|
|
77
|
+
validate_pending_frame_length!
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def validate_pending_frame_length!
|
|
81
|
+
return unless @max_length
|
|
82
|
+
return if pending_frame_body_bytesize <= @max_length
|
|
83
|
+
|
|
84
|
+
fail_frame_too_long
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def validate_frame_length!(frame)
|
|
88
|
+
return unless @max_length
|
|
89
|
+
return if frame.bytesize <= @max_length
|
|
90
|
+
|
|
91
|
+
fail_frame_too_long
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def pending_frame_body_bytesize
|
|
95
|
+
separator_index = @buffer.index(@separator)
|
|
96
|
+
return separator_index if separator_index
|
|
97
|
+
|
|
98
|
+
@buffer.bytesize - partial_separator_suffix_bytesize
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def partial_separator_suffix_bytesize
|
|
102
|
+
max_suffix_bytesize = [@separator.bytesize - 1, @buffer.bytesize].min
|
|
103
|
+
return 0 if max_suffix_bytesize.zero?
|
|
104
|
+
|
|
105
|
+
max_suffix_bytesize.downto(1) do |bytesize|
|
|
106
|
+
suffix = @buffer.byteslice(@buffer.bytesize - bytesize, bytesize)
|
|
107
|
+
return bytesize if @separator.start_with?(suffix)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
0
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def fail_frame_too_long
|
|
114
|
+
@closed = true
|
|
115
|
+
close_upstream
|
|
116
|
+
error = FrameTooLongError.new("frame exceeded max_length #{@max_length}")
|
|
117
|
+
raise error
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def close_upstream
|
|
121
|
+
@upstream.close
|
|
122
|
+
nil
|
|
123
|
+
rescue StandardError => error
|
|
124
|
+
error
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def format_frame(frame)
|
|
128
|
+
return frame unless @keep_separator
|
|
129
|
+
|
|
130
|
+
frame + @separator
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
data/lib/fiber_stream/pull.rb
CHANGED
|
@@ -28,6 +28,10 @@ module FiberStream
|
|
|
28
28
|
RactorPortSource.new(port, ack_port, ack_transfer, cancel)
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
+
def self.ractor_merge_ports(port_pairs, ack_transfer, cancel)
|
|
32
|
+
RactorMergePortsSource.new(port_pairs, ack_transfer, cancel)
|
|
33
|
+
end
|
|
34
|
+
|
|
31
35
|
def self.concat(left_materializer, right_materializer)
|
|
32
36
|
Concat.new(left_materializer, right_materializer)
|
|
33
37
|
end
|
|
@@ -36,6 +40,10 @@ module FiberStream
|
|
|
36
40
|
Zip.new(left_materializer, right_materializer)
|
|
37
41
|
end
|
|
38
42
|
|
|
43
|
+
def self.merge(left_materializer, right_materializer)
|
|
44
|
+
Merge.new(left_materializer, right_materializer)
|
|
45
|
+
end
|
|
46
|
+
|
|
39
47
|
def self.map(upstream, transform)
|
|
40
48
|
Map.new(upstream, transform)
|
|
41
49
|
end
|
|
@@ -60,6 +68,10 @@ module FiberStream
|
|
|
60
68
|
Drop.new(upstream, count)
|
|
61
69
|
end
|
|
62
70
|
|
|
71
|
+
def self.grouped(upstream, count)
|
|
72
|
+
Grouped.new(upstream, count)
|
|
73
|
+
end
|
|
74
|
+
|
|
63
75
|
def self.take_while(upstream, predicate)
|
|
64
76
|
TakeWhile.new(upstream, predicate)
|
|
65
77
|
end
|
|
@@ -80,6 +92,10 @@ module FiberStream
|
|
|
80
92
|
Lines.new(upstream, chomp, max_length)
|
|
81
93
|
end
|
|
82
94
|
|
|
95
|
+
def self.split(upstream, separator, keep_separator, max_length)
|
|
96
|
+
Split.new(upstream, separator, keep_separator, max_length)
|
|
97
|
+
end
|
|
98
|
+
|
|
83
99
|
private_constant :DONE
|
|
84
100
|
end
|
|
85
101
|
end
|
|
@@ -87,15 +103,19 @@ end
|
|
|
87
103
|
require_relative "pull/each"
|
|
88
104
|
require_relative "pull/io_source"
|
|
89
105
|
require_relative "pull/ractor_port_source"
|
|
106
|
+
require_relative "pull/ractor_merge_ports_source"
|
|
90
107
|
require_relative "pull/concat"
|
|
91
108
|
require_relative "pull/zip"
|
|
109
|
+
require_relative "pull/merge"
|
|
92
110
|
require_relative "pull/map"
|
|
93
111
|
require_relative "pull/select"
|
|
94
112
|
require_relative "pull/take"
|
|
95
113
|
require_relative "pull/drop"
|
|
114
|
+
require_relative "pull/grouped"
|
|
96
115
|
require_relative "pull/take_while"
|
|
97
116
|
require_relative "pull/drop_while"
|
|
98
117
|
require_relative "pull/lines"
|
|
118
|
+
require_relative "pull/split"
|
|
99
119
|
require_relative "pull/async_boundary"
|
|
100
120
|
require_relative "pull/buffer_boundary"
|
|
101
121
|
require_relative "pull/parallel_map_boundary"
|
|
@@ -103,8 +123,8 @@ require_relative "pull/ractor_map_boundary"
|
|
|
103
123
|
|
|
104
124
|
module FiberStream
|
|
105
125
|
module Pull
|
|
106
|
-
private_constant :Each, :IOSource, :RactorPortSource, :
|
|
107
|
-
:TakeWhile, :DropWhile, :Lines, :
|
|
108
|
-
:ParallelMapBoundary, :RactorMapBoundary
|
|
126
|
+
private_constant :Each, :IOSource, :RactorPortSource, :RactorMergePortsSource, :Concat, :Zip, :Merge, :Map,
|
|
127
|
+
:Select, :Take, :Drop, :Grouped, :TakeWhile, :DropWhile, :Lines, :Split, :AsyncBoundary,
|
|
128
|
+
:BufferBoundary, :ParallelMapBoundary, :RactorMapBoundary
|
|
109
129
|
end
|
|
110
130
|
end
|
|
@@ -6,7 +6,9 @@ module FiberStream
|
|
|
6
6
|
# Producers send `Element`, `Complete`, and `Failure` messages to the data
|
|
7
7
|
# port. FiberStream sends `Ack` and `Cancel` messages to the producer-owned
|
|
8
8
|
# acknowledgment port. The envelopes keep stream values distinct from control
|
|
9
|
-
# messages and support Ruby pattern matching.
|
|
9
|
+
# messages and support Ruby pattern matching. `Failure` cause metadata is
|
|
10
|
+
# producer-provided and is surfaced through `RactorPortSourceError`; producers
|
|
11
|
+
# should sanitize it before crossing trust boundaries.
|
|
10
12
|
module RactorPort
|
|
11
13
|
Element = ::Data.define(:value)
|
|
12
14
|
Complete = ::Data.define
|
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
module FiberStream
|
|
4
4
|
class RunningPipeline
|
|
5
|
+
ValueMessage = Data.define(:value)
|
|
6
|
+
ErrorMessage = Data.define(:error)
|
|
7
|
+
CancelledMessage = Data.define(:error)
|
|
8
|
+
private_constant :ValueMessage, :ErrorMessage, :CancelledMessage
|
|
9
|
+
|
|
5
10
|
def initialize(scheduler, &run)
|
|
6
11
|
@scheduler = scheduler
|
|
7
12
|
@completion = nil
|
|
@@ -82,16 +87,19 @@ module FiberStream
|
|
|
82
87
|
private
|
|
83
88
|
|
|
84
89
|
def run_background(run)
|
|
85
|
-
complete(
|
|
90
|
+
complete(ValueMessage.new(value: run.call))
|
|
91
|
+
rescue SystemExit, SignalException => error
|
|
92
|
+
complete(ErrorMessage.new(error:))
|
|
93
|
+
raise
|
|
86
94
|
rescue Exception => error # rubocop:disable Lint/RescueException
|
|
87
95
|
complete(classify_error(error))
|
|
88
96
|
end
|
|
89
97
|
|
|
90
98
|
def classify_error(error)
|
|
91
99
|
if cancellation_error?(error)
|
|
92
|
-
|
|
100
|
+
CancelledMessage.new(error:)
|
|
93
101
|
else
|
|
94
|
-
|
|
102
|
+
ErrorMessage.new(error:)
|
|
95
103
|
end
|
|
96
104
|
end
|
|
97
105
|
|
|
@@ -114,11 +122,13 @@ module FiberStream
|
|
|
114
122
|
end
|
|
115
123
|
|
|
116
124
|
def deliver(message)
|
|
117
|
-
case message
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
raise
|
|
125
|
+
case message
|
|
126
|
+
in ValueMessage[value:]
|
|
127
|
+
value
|
|
128
|
+
in ErrorMessage[error:]
|
|
129
|
+
raise error
|
|
130
|
+
in CancelledMessage[error:]
|
|
131
|
+
raise error
|
|
122
132
|
end
|
|
123
133
|
end
|
|
124
134
|
|
data/lib/fiber_stream/source.rb
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
module FiberStream
|
|
4
4
|
class Source
|
|
5
|
+
RactorMergePortPair = Data.define(:port, :ack_port)
|
|
6
|
+
private_constant :RactorMergePortPair
|
|
7
|
+
|
|
5
8
|
# Creates a source definition from an Enumerable.
|
|
6
9
|
#
|
|
7
10
|
# The enumerable is not consumed until values are pulled by `run_with`. Each
|
|
@@ -17,7 +20,9 @@ module FiberStream
|
|
|
17
20
|
# The IO object is not read until values are pulled by `run_with`. Each
|
|
18
21
|
# materialization reads from the same IO object's current position; this
|
|
19
22
|
# source does not snapshot, reopen, or guarantee replayability. The IO is
|
|
20
|
-
# closed only when `close: true` is passed.
|
|
23
|
+
# closed only when `close: true` is passed. `chunk_size` is the maximum byte
|
|
24
|
+
# count passed to `readpartial` for one downstream pull; very large values
|
|
25
|
+
# may cause the IO implementation to attempt large allocations.
|
|
21
26
|
def self.io(io, chunk_size: 16 * 1024, close: false)
|
|
22
27
|
raise TypeError, "io must respond to readpartial" unless io.respond_to?(:readpartial)
|
|
23
28
|
raise TypeError, "chunk_size must be an Integer" unless chunk_size.is_a?(Integer)
|
|
@@ -34,7 +39,8 @@ module FiberStream
|
|
|
34
39
|
# producer-owned port that receives `RactorPort::Ack` and
|
|
35
40
|
# `RactorPort::Cancel` control messages. The producer must wait for an ack
|
|
36
41
|
# before sending each `RactorPort::Element`, `RactorPort::Complete`, or
|
|
37
|
-
# `RactorPort::Failure` message.
|
|
42
|
+
# `RactorPort::Failure` message. Failure metadata is producer-provided and
|
|
43
|
+
# should be sanitized before crossing trust boundaries.
|
|
38
44
|
def self.ractor_port(port, ack_port:, ack_transfer: :copy, cancel: true)
|
|
39
45
|
raise TypeError, "port must respond to receive" unless port.respond_to?(:receive)
|
|
40
46
|
unless ack_port.respond_to?(:send) && ack_port.method(:send).owner != Kernel
|
|
@@ -47,6 +53,24 @@ module FiberStream
|
|
|
47
53
|
new(-> { Pull.ractor_port(port, ack_port, ack_transfer, cancel) })
|
|
48
54
|
end
|
|
49
55
|
|
|
56
|
+
# Creates a backpressure-aware source definition from multiple Ractor port
|
|
57
|
+
# pairs.
|
|
58
|
+
#
|
|
59
|
+
# Each pair must be a Hash with `:port` and `:ack_port`. The source sends
|
|
60
|
+
# at most one outstanding `RactorPort::Ack` to each active producer and
|
|
61
|
+
# emits producer values in coordinator-observed ready order. Producer work
|
|
62
|
+
# is isolated in Ractors, so demanding this source does not require a
|
|
63
|
+
# `Fiber.scheduler`. Failure metadata is producer-provided and should be
|
|
64
|
+
# sanitized before crossing trust boundaries.
|
|
65
|
+
def self.ractor_merge_ports(ports, ack_transfer: :copy, cancel: true)
|
|
66
|
+
pairs = normalize_ractor_merge_port_pairs(ports)
|
|
67
|
+
|
|
68
|
+
Flow.__send__(:validate_ractor_transfer_policy!, :ack_transfer, ack_transfer)
|
|
69
|
+
raise TypeError, "cancel must be true or false" unless [true, false].include?(cancel)
|
|
70
|
+
|
|
71
|
+
new(-> { Pull.ractor_merge_ports(pairs, ack_transfer, cancel) })
|
|
72
|
+
end
|
|
73
|
+
|
|
50
74
|
def initialize(source_factory, flows = [])
|
|
51
75
|
@source_factory = source_factory
|
|
52
76
|
@flows = flows
|
|
@@ -93,6 +117,23 @@ module FiberStream
|
|
|
93
117
|
)
|
|
94
118
|
end
|
|
95
119
|
|
|
120
|
+
# Returns a new source definition that emits values from this source and
|
|
121
|
+
# `source` in scheduler-observed ready order.
|
|
122
|
+
#
|
|
123
|
+
# Construction is lazy. The merged source starts one scheduled producer
|
|
124
|
+
# fiber per input source only when downstream demand reaches the merge. Each
|
|
125
|
+
# input's own element order is preserved, but cross-input ordering is not
|
|
126
|
+
# deterministic and requires an installed `Fiber.scheduler` from a
|
|
127
|
+
# non-blocking fiber when demanded.
|
|
128
|
+
def merge(source)
|
|
129
|
+
raise TypeError, "expected FiberStream::Source" unless source.is_a?(Source)
|
|
130
|
+
|
|
131
|
+
self.class.__send__(
|
|
132
|
+
:new,
|
|
133
|
+
-> { Pull.merge(materializer, source.__send__(:materializer)) }
|
|
134
|
+
)
|
|
135
|
+
end
|
|
136
|
+
|
|
96
137
|
# Returns a new source definition that maps each element with `block`.
|
|
97
138
|
#
|
|
98
139
|
# This is a convenience wrapper around `via(FiberStream::Flow.map { ... })`
|
|
@@ -154,6 +195,15 @@ module FiberStream
|
|
|
154
195
|
via(Flow.drop(count))
|
|
155
196
|
end
|
|
156
197
|
|
|
198
|
+
# Returns a new source definition that groups adjacent elements into arrays.
|
|
199
|
+
#
|
|
200
|
+
# This is a convenience wrapper around
|
|
201
|
+
# `via(FiberStream::Flow.grouped(count))` and preserves the same validation,
|
|
202
|
+
# ordering, final partial group, and pull-driven backpressure behavior.
|
|
203
|
+
def grouped(count)
|
|
204
|
+
via(Flow.grouped(count))
|
|
205
|
+
end
|
|
206
|
+
|
|
157
207
|
# Returns a new source definition that emits leading elements while `block`
|
|
158
208
|
# is truthy.
|
|
159
209
|
#
|
|
@@ -194,11 +244,25 @@ module FiberStream
|
|
|
194
244
|
# Returns a new source definition that splits String chunks into lines.
|
|
195
245
|
#
|
|
196
246
|
# This is a convenience wrapper around
|
|
197
|
-
# `via(FiberStream::Flow.lines(chomp:, max_length:))`.
|
|
247
|
+
# `via(FiberStream::Flow.lines(chomp:, max_length:))`. With
|
|
248
|
+
# `max_length: nil`, one unterminated line can buffer without bound. Set a
|
|
249
|
+
# positive `max_length` for untrusted, network-facing, or otherwise
|
|
250
|
+
# unbounded streams.
|
|
198
251
|
def lines(chomp: true, max_length: nil)
|
|
199
252
|
via(Flow.lines(chomp: chomp, max_length: max_length))
|
|
200
253
|
end
|
|
201
254
|
|
|
255
|
+
# Returns a new source definition that splits String chunks into frames.
|
|
256
|
+
#
|
|
257
|
+
# This is a convenience wrapper around
|
|
258
|
+
# `via(FiberStream::Flow.split(separator, keep_separator:, max_length:))`.
|
|
259
|
+
# With `max_length: nil`, one unterminated frame can buffer without bound.
|
|
260
|
+
# Set a positive `max_length` for untrusted, network-facing, or otherwise
|
|
261
|
+
# unbounded streams.
|
|
262
|
+
def split(separator, keep_separator: false, max_length: nil)
|
|
263
|
+
via(Flow.split(separator, keep_separator: keep_separator, max_length: max_length))
|
|
264
|
+
end
|
|
265
|
+
|
|
202
266
|
# Returns a runnable pipeline from this source to `sink`.
|
|
203
267
|
#
|
|
204
268
|
# Construction is lazy. The source and sink are not materialized until
|
|
@@ -237,6 +301,44 @@ module FiberStream
|
|
|
237
301
|
|
|
238
302
|
private_class_method :new
|
|
239
303
|
|
|
304
|
+
def self.normalize_ractor_merge_port_pairs(ports)
|
|
305
|
+
raise TypeError, "ports must respond to each" unless ports.respond_to?(:each)
|
|
306
|
+
|
|
307
|
+
data_port_ids = {}
|
|
308
|
+
ack_port_ids = {}
|
|
309
|
+
pairs =
|
|
310
|
+
ports.each.map do |pair|
|
|
311
|
+
normalize_ractor_merge_port_pair(pair, data_port_ids, ack_port_ids)
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
raise ArgumentError, "ractor_merge_ports requires at least two port pairs" if pairs.size < 2
|
|
315
|
+
|
|
316
|
+
pairs.freeze
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def self.normalize_ractor_merge_port_pair(pair, data_port_ids, ack_port_ids)
|
|
320
|
+
raise TypeError, "port pair must be a Hash" unless pair.is_a?(Hash)
|
|
321
|
+
raise TypeError, "port pair must include :port and :ack_port" unless pair.key?(:port) && pair.key?(:ack_port)
|
|
322
|
+
|
|
323
|
+
port = pair.fetch(:port)
|
|
324
|
+
ack_port = pair.fetch(:ack_port)
|
|
325
|
+
raise TypeError, "port must respond to receive" unless port.respond_to?(:receive)
|
|
326
|
+
unless ack_port.respond_to?(:send) && ack_port.method(:send).owner != Kernel
|
|
327
|
+
raise TypeError, "ack_port must provide Ractor-style send"
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
port_id = port.object_id
|
|
331
|
+
ack_port_id = ack_port.object_id
|
|
332
|
+
raise ArgumentError, "data ports must be distinct" if data_port_ids.key?(port_id)
|
|
333
|
+
raise ArgumentError, "ack ports must be distinct" if ack_port_ids.key?(ack_port_id)
|
|
334
|
+
|
|
335
|
+
data_port_ids[port_id] = true
|
|
336
|
+
ack_port_ids[ack_port_id] = true
|
|
337
|
+
RactorMergePortPair.new(port:, ack_port:)
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
private_class_method :normalize_ractor_merge_port_pairs, :normalize_ractor_merge_port_pair
|
|
341
|
+
|
|
240
342
|
private
|
|
241
343
|
|
|
242
344
|
def materializer
|
data/lib/fiber_stream/version.rb
CHANGED
data/sig/fiber_stream.rbs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
module FiberStream
|
|
2
2
|
type ractor_transfer_policy = :copy | :move
|
|
3
|
+
type ractor_port_pair = { port: untyped, ack_port: untyped }
|
|
3
4
|
type ractor_map_error_kind = :input_transfer | :output_transfer | :worker | :worker_termination | :isolation
|
|
4
5
|
type ractor_port_source_error_kind = :invalid_message | :producer_failure | :receive | :ack_transfer | :cancel_transfer
|
|
5
6
|
type ractor_port_cancel_reason = :closed
|
|
@@ -51,20 +52,24 @@ module FiberStream
|
|
|
51
52
|
def self.each: [Elem] (Enumerable[Elem] enumerable) -> Source[Elem]
|
|
52
53
|
def self.io: (untyped io, ?chunk_size: Integer, ?close: bool) -> Source[String]
|
|
53
54
|
def self.ractor_port: [Elem] (untyped port, ack_port: untyped, ?ack_transfer: ractor_transfer_policy, ?cancel: bool) -> Source[Elem]
|
|
55
|
+
def self.ractor_merge_ports: [Elem] (Enumerable[ractor_port_pair] ports, ?ack_transfer: ractor_transfer_policy, ?cancel: bool) -> Source[Elem]
|
|
54
56
|
def via: [Out] (Flow[Elem, Out] flow) -> Source[Out]
|
|
55
57
|
def concat: [Other] (Source[Other] source) -> Source[Elem | Other]
|
|
56
58
|
def zip: [Other] (Source[Other] source) -> Source[[Elem, Other]]
|
|
59
|
+
def merge: [Other] (Source[Other] source) -> Source[Elem | Other]
|
|
57
60
|
def map: [Out] () { (Elem) -> Out } -> Source[Out]
|
|
58
61
|
def parallel_map: [Out] (concurrency: Integer) { (Elem) -> Out } -> Source[Out]
|
|
59
62
|
def ractor_map: [Out] (workers: Integer, ?input_transfer: ractor_transfer_policy, ?output_transfer: ractor_transfer_policy) { (Elem) -> Out } -> Source[Out]
|
|
60
63
|
def select: () { (Elem) -> boolish } -> Source[Elem]
|
|
61
64
|
def take: (Integer count) -> Source[Elem]
|
|
62
65
|
def drop: (Integer count) -> Source[Elem]
|
|
66
|
+
def grouped: (Integer count) -> Source[Array[Elem]]
|
|
63
67
|
def take_while: () { (Elem) -> boolish } -> Source[Elem]
|
|
64
68
|
def drop_while: () { (Elem) -> boolish } -> Source[Elem]
|
|
65
69
|
def async: () -> Source[Elem]
|
|
66
70
|
def buffer: (Integer count) -> Source[Elem]
|
|
67
71
|
def lines: (?chomp: bool, ?max_length: Integer?) -> Source[String]
|
|
72
|
+
def split: (String separator, ?keep_separator: bool, ?max_length: Integer?) -> Source[String]
|
|
68
73
|
def to: [Mat] (Sink[Elem, Mat] sink) -> Pipeline[Mat]
|
|
69
74
|
def run_with: [Mat] (Sink[Elem, Mat] sink) -> Mat
|
|
70
75
|
end
|
|
@@ -76,11 +81,13 @@ module FiberStream
|
|
|
76
81
|
def self.select: [Elem] () { (Elem) -> boolish } -> Flow[Elem, Elem]
|
|
77
82
|
def self.take: [Elem] (Integer count) -> Flow[Elem, Elem]
|
|
78
83
|
def self.drop: [Elem] (Integer count) -> Flow[Elem, Elem]
|
|
84
|
+
def self.grouped: [Elem] (Integer count) -> Flow[Elem, Array[Elem]]
|
|
79
85
|
def self.take_while: [Elem] () { (Elem) -> boolish } -> Flow[Elem, Elem]
|
|
80
86
|
def self.drop_while: [Elem] () { (Elem) -> boolish } -> Flow[Elem, Elem]
|
|
81
87
|
def self.async: [Elem] () -> Flow[Elem, Elem]
|
|
82
88
|
def self.buffer: [Elem] (Integer count) -> Flow[Elem, Elem]
|
|
83
89
|
def self.lines: (?chomp: bool, ?max_length: Integer?) -> Flow[String, String]
|
|
90
|
+
def self.split: (String separator, ?keep_separator: bool, ?max_length: Integer?) -> Flow[String, String]
|
|
84
91
|
def via: [Next] (Flow[Out, Next] flow) -> Flow[In, Next]
|
|
85
92
|
def to: [Mat] (Sink[Out, Mat] sink) -> Sink[In, Mat]
|
|
86
93
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fiber_stream
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dai Akatsuka
|
|
@@ -117,6 +117,7 @@ files:
|
|
|
117
117
|
- examples/file_copy.rb
|
|
118
118
|
- examples/line_processing.rb
|
|
119
119
|
- examples/ractor_map_hashing.rb
|
|
120
|
+
- examples/ractor_merge_ports_and_map.rb
|
|
120
121
|
- examples/ractor_port_source.rb
|
|
121
122
|
- lib/fiber_stream.rb
|
|
122
123
|
- lib/fiber_stream/errors.rb
|
|
@@ -129,13 +130,17 @@ files:
|
|
|
129
130
|
- lib/fiber_stream/pull/drop.rb
|
|
130
131
|
- lib/fiber_stream/pull/drop_while.rb
|
|
131
132
|
- lib/fiber_stream/pull/each.rb
|
|
133
|
+
- lib/fiber_stream/pull/grouped.rb
|
|
132
134
|
- lib/fiber_stream/pull/io_source.rb
|
|
133
135
|
- lib/fiber_stream/pull/lines.rb
|
|
134
136
|
- lib/fiber_stream/pull/map.rb
|
|
137
|
+
- lib/fiber_stream/pull/merge.rb
|
|
135
138
|
- lib/fiber_stream/pull/parallel_map_boundary.rb
|
|
136
139
|
- lib/fiber_stream/pull/ractor_map_boundary.rb
|
|
140
|
+
- lib/fiber_stream/pull/ractor_merge_ports_source.rb
|
|
137
141
|
- lib/fiber_stream/pull/ractor_port_source.rb
|
|
138
142
|
- lib/fiber_stream/pull/select.rb
|
|
143
|
+
- lib/fiber_stream/pull/split.rb
|
|
139
144
|
- lib/fiber_stream/pull/take.rb
|
|
140
145
|
- lib/fiber_stream/pull/take_while.rb
|
|
141
146
|
- lib/fiber_stream/pull/zip.rb
|
|
@@ -151,7 +156,7 @@ licenses:
|
|
|
151
156
|
metadata:
|
|
152
157
|
allowed_push_host: https://rubygems.org
|
|
153
158
|
homepage_uri: https://github.com/dakatsuka/fiber_stream
|
|
154
|
-
source_code_uri: https://github.com/dakatsuka/fiber_stream/tree/v0.
|
|
159
|
+
source_code_uri: https://github.com/dakatsuka/fiber_stream/tree/v0.3.0
|
|
155
160
|
changelog_uri: https://github.com/dakatsuka/fiber_stream/blob/main/CHANGELOG.md
|
|
156
161
|
rubygems_mfa_required: 'true'
|
|
157
162
|
rdoc_options: []
|