fiber_stream 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/README.md +167 -43
- data/examples/README.md +11 -0
- data/examples/ractor_merge_ports_and_map.rb +116 -0
- data/examples/ractor_producer_sources.rb +43 -0
- data/lib/fiber_stream/errors.rb +4 -1
- data/lib/fiber_stream/flow.rb +75 -16
- data/lib/fiber_stream/internal/ractor_transfer_policy.rb +17 -0
- data/lib/fiber_stream/pipeline.rb +5 -1
- data/lib/fiber_stream/pull/async_boundary.rb +28 -11
- data/lib/fiber_stream/pull/buffer_boundary.rb +28 -10
- data/lib/fiber_stream/pull/concat.rb +9 -1
- data/lib/fiber_stream/pull/grouped.rb +46 -0
- data/lib/fiber_stream/pull/merge.rb +230 -0
- data/lib/fiber_stream/pull/parallel_map_boundary.rb +28 -24
- data/lib/fiber_stream/pull/parallel_unordered_map_boundary.rb +311 -0
- data/lib/fiber_stream/pull/ractor_map_boundary.rb +112 -89
- data/lib/fiber_stream/pull/ractor_merge_ports_source.rb +373 -0
- data/lib/fiber_stream/pull/ractor_port_source.rb +53 -20
- data/lib/fiber_stream/pull/ractor_producer_source.rb +349 -0
- data/lib/fiber_stream/pull/scan.rb +38 -0
- data/lib/fiber_stream/pull/split.rb +134 -0
- data/lib/fiber_stream/pull.rb +74 -5
- data/lib/fiber_stream/ractor_port.rb +3 -1
- data/lib/fiber_stream/ractor_producer.rb +167 -0
- data/lib/fiber_stream/running_pipeline.rb +22 -8
- data/lib/fiber_stream/sink.rb +9 -19
- data/lib/fiber_stream/source.rb +177 -19
- data/lib/fiber_stream/version.rb +1 -1
- data/lib/fiber_stream.rb +2 -0
- data/sig/fiber_stream.rbs +25 -1
- metadata +14 -3
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FiberStream
|
|
4
|
+
# Producer-side context for `Source.ractor_producer`.
|
|
5
|
+
#
|
|
6
|
+
# Producer blocks call `emit`, `complete`, or `fail` to send one protocol
|
|
7
|
+
# message after receiving one downstream acknowledgment. A `false` return
|
|
8
|
+
# means cooperative cancellation was observed before the requested message
|
|
9
|
+
# could be sent.
|
|
10
|
+
class RactorProducer
|
|
11
|
+
def initialize(data_port, ack_port, transfer)
|
|
12
|
+
@data_port = data_port
|
|
13
|
+
@ack_port = ack_port
|
|
14
|
+
@transfer = transfer
|
|
15
|
+
@terminal = false
|
|
16
|
+
@cancelled = false
|
|
17
|
+
@send_failed = false
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def emit(value, transfer: nil)
|
|
21
|
+
return false if terminal? || cancelled?
|
|
22
|
+
|
|
23
|
+
message_transfer = validate_transfer_override!(transfer)
|
|
24
|
+
return false unless wait_for_ack
|
|
25
|
+
|
|
26
|
+
send_emitted_message(RactorPort::Element.new(value), message_transfer)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def complete
|
|
30
|
+
return false if terminal? || cancelled?
|
|
31
|
+
return false unless wait_for_ack
|
|
32
|
+
|
|
33
|
+
send_terminal_message(RactorPort::Complete.new)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def fail(error = nil, cause_class_name: nil, cause_message: nil)
|
|
37
|
+
return false if terminal? || cancelled?
|
|
38
|
+
|
|
39
|
+
failure = failure_message(error, cause_class_name, cause_message)
|
|
40
|
+
return false unless wait_for_ack
|
|
41
|
+
|
|
42
|
+
send_terminal_message(failure)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def cancelled?
|
|
46
|
+
@cancelled
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def terminal? # :nodoc:
|
|
50
|
+
@terminal
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def send_failed? # :nodoc:
|
|
54
|
+
@send_failed
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
def send_emitted_message(message, transfer)
|
|
60
|
+
send_data_message(message, transfer)
|
|
61
|
+
true
|
|
62
|
+
rescue Exception => error # rubocop:disable Lint/RescueException
|
|
63
|
+
report_same_ack_failure(error)
|
|
64
|
+
false
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def send_terminal_message(message)
|
|
68
|
+
send_data_message(message, @transfer)
|
|
69
|
+
@terminal = true
|
|
70
|
+
true
|
|
71
|
+
rescue Exception => send_error # rubocop:disable Lint/RescueException
|
|
72
|
+
report_same_ack_failure(send_error)
|
|
73
|
+
false
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def validate_transfer_override!(transfer)
|
|
77
|
+
return @transfer if transfer.nil?
|
|
78
|
+
return transfer if [:copy, :move].include?(transfer)
|
|
79
|
+
|
|
80
|
+
raise ArgumentError, "transfer must be :copy or :move"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def wait_for_ack
|
|
84
|
+
case @ack_port.receive
|
|
85
|
+
in RactorPort::Ack
|
|
86
|
+
true
|
|
87
|
+
in RactorPort::Cancel
|
|
88
|
+
@cancelled = true
|
|
89
|
+
false
|
|
90
|
+
else
|
|
91
|
+
raise TypeError, "invalid ractor producer control message"
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def send_data_message(message, transfer)
|
|
96
|
+
if transfer == :move
|
|
97
|
+
@data_port.send(message, move: true)
|
|
98
|
+
else
|
|
99
|
+
@data_port.send(message)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def failure_message(error, cause_class_name, cause_message)
|
|
104
|
+
if error
|
|
105
|
+
return RactorPort::Failure.new(safe_class_name(error), safe_message(error))
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
unless cause_class_name.is_a?(String) && cause_message.is_a?(String)
|
|
109
|
+
raise ArgumentError, "fail requires an error or String failure metadata"
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
RactorPort::Failure.new(cause_class_name, cause_message)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def safe_class_name(error)
|
|
116
|
+
name = error.class.name
|
|
117
|
+
name.is_a?(String) && !name.empty? ? name : "Exception"
|
|
118
|
+
rescue Exception # rubocop:disable Lint/RescueException
|
|
119
|
+
"Exception"
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def safe_message(error)
|
|
123
|
+
message = error.message
|
|
124
|
+
message.is_a?(String) ? message : ""
|
|
125
|
+
rescue Exception # rubocop:disable Lint/RescueException
|
|
126
|
+
""
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def report_same_ack_failure(error)
|
|
130
|
+
send_data_message(RactorPort::Failure.new(safe_class_name(error), safe_message(error)), :copy)
|
|
131
|
+
@terminal = true
|
|
132
|
+
rescue Exception # rubocop:disable Lint/RescueException
|
|
133
|
+
@terminal = true
|
|
134
|
+
@send_failed = true
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Builder passed to `Source.ractor_merge_producers`.
|
|
139
|
+
#
|
|
140
|
+
# Each `producer` call records one lazily started owned producer definition.
|
|
141
|
+
# Registration validates producer block isolation and transfer policy but
|
|
142
|
+
# does not create Ractor ports or start producer code.
|
|
143
|
+
class RactorProducerGroup
|
|
144
|
+
Definition = Data.define(:args, :transfer, :block)
|
|
145
|
+
private_constant :Definition
|
|
146
|
+
|
|
147
|
+
def initialize(default_transfer)
|
|
148
|
+
@default_transfer = default_transfer
|
|
149
|
+
@definitions = []
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def producer(*args, transfer: nil, &block)
|
|
153
|
+
raise ArgumentError, "missing block" unless block
|
|
154
|
+
unless transfer.nil? || [:copy, :move].include?(transfer)
|
|
155
|
+
raise ArgumentError, "transfer must be :copy or :move"
|
|
156
|
+
end
|
|
157
|
+
raise TypeError, "block must be shareable" unless Ractor.shareable?(block)
|
|
158
|
+
|
|
159
|
+
@definitions << Definition.new(args:, transfer: transfer || @default_transfer, block:)
|
|
160
|
+
self
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def definitions
|
|
164
|
+
@definitions.dup.freeze
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
@@ -2,6 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
module FiberStream
|
|
4
4
|
class RunningPipeline
|
|
5
|
+
ValueMessage = Data.define(:value)
|
|
6
|
+
ErrorMessage = Data.define(:error)
|
|
7
|
+
CancelledMessage = Data.define(:error)
|
|
8
|
+
private_constant :ValueMessage, :ErrorMessage, :CancelledMessage
|
|
9
|
+
|
|
10
|
+
def self.start(scheduler, &run) # :nodoc:
|
|
11
|
+
new(scheduler, &run)
|
|
12
|
+
end
|
|
13
|
+
|
|
5
14
|
def initialize(scheduler, &run)
|
|
6
15
|
@scheduler = scheduler
|
|
7
16
|
@completion = nil
|
|
@@ -82,16 +91,19 @@ module FiberStream
|
|
|
82
91
|
private
|
|
83
92
|
|
|
84
93
|
def run_background(run)
|
|
85
|
-
complete(
|
|
94
|
+
complete(ValueMessage.new(value: run.call))
|
|
95
|
+
rescue SystemExit, SignalException => error
|
|
96
|
+
complete(ErrorMessage.new(error:))
|
|
97
|
+
raise
|
|
86
98
|
rescue Exception => error # rubocop:disable Lint/RescueException
|
|
87
99
|
complete(classify_error(error))
|
|
88
100
|
end
|
|
89
101
|
|
|
90
102
|
def classify_error(error)
|
|
91
103
|
if cancellation_error?(error)
|
|
92
|
-
|
|
104
|
+
CancelledMessage.new(error:)
|
|
93
105
|
else
|
|
94
|
-
|
|
106
|
+
ErrorMessage.new(error:)
|
|
95
107
|
end
|
|
96
108
|
end
|
|
97
109
|
|
|
@@ -114,11 +126,13 @@ module FiberStream
|
|
|
114
126
|
end
|
|
115
127
|
|
|
116
128
|
def deliver(message)
|
|
117
|
-
case message
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
raise
|
|
129
|
+
case message
|
|
130
|
+
in ValueMessage[value:]
|
|
131
|
+
value
|
|
132
|
+
in ErrorMessage[error:]
|
|
133
|
+
raise error
|
|
134
|
+
in CancelledMessage[error:]
|
|
135
|
+
raise error
|
|
122
136
|
end
|
|
123
137
|
end
|
|
124
138
|
|
data/lib/fiber_stream/sink.rb
CHANGED
|
@@ -10,10 +10,7 @@ module FiberStream
|
|
|
10
10
|
new do |stream|
|
|
11
11
|
values = []
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
value = stream.next
|
|
15
|
-
break if Pull.done?(value)
|
|
16
|
-
|
|
13
|
+
Pull.each_value(stream) do |value|
|
|
17
14
|
values << value
|
|
18
15
|
end
|
|
19
16
|
|
|
@@ -45,10 +42,7 @@ module FiberStream
|
|
|
45
42
|
new do |stream|
|
|
46
43
|
accumulator = initial
|
|
47
44
|
|
|
48
|
-
|
|
49
|
-
value = stream.next
|
|
50
|
-
break if Pull.done?(value)
|
|
51
|
-
|
|
45
|
+
Pull.each_value(stream) do |value|
|
|
52
46
|
accumulator = block.call(accumulator, value)
|
|
53
47
|
end
|
|
54
48
|
|
|
@@ -68,10 +62,7 @@ module FiberStream
|
|
|
68
62
|
new do |stream|
|
|
69
63
|
count = 0
|
|
70
64
|
|
|
71
|
-
|
|
72
|
-
value = stream.next
|
|
73
|
-
break if Pull.done?(value)
|
|
74
|
-
|
|
65
|
+
Pull.each_value(stream) do |value|
|
|
75
66
|
block.call(value)
|
|
76
67
|
count += 1
|
|
77
68
|
end
|
|
@@ -99,15 +90,17 @@ module FiberStream
|
|
|
99
90
|
end
|
|
100
91
|
end
|
|
101
92
|
|
|
93
|
+
def self.build(&run) # :nodoc:
|
|
94
|
+
new(&run)
|
|
95
|
+
end
|
|
96
|
+
|
|
102
97
|
def initialize(&run)
|
|
103
98
|
@run = run
|
|
104
99
|
end
|
|
105
100
|
|
|
106
101
|
private_class_method :new
|
|
107
102
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def run(stream)
|
|
103
|
+
def run_stream(stream) # :nodoc:
|
|
111
104
|
@run.call(stream)
|
|
112
105
|
end
|
|
113
106
|
|
|
@@ -121,10 +114,7 @@ module FiberStream
|
|
|
121
114
|
end
|
|
122
115
|
|
|
123
116
|
def run(stream)
|
|
124
|
-
|
|
125
|
-
value = stream.next
|
|
126
|
-
break if Pull.done?(value)
|
|
127
|
-
|
|
117
|
+
Pull.each_value(stream) do |value|
|
|
128
118
|
write(value)
|
|
129
119
|
end
|
|
130
120
|
|
data/lib/fiber_stream/source.rb
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
module FiberStream
|
|
4
4
|
class Source
|
|
5
|
+
RactorMergePortPair = Data.define(:port, :ack_port)
|
|
6
|
+
private_constant :RactorMergePortPair
|
|
7
|
+
|
|
5
8
|
# Creates a source definition from an Enumerable.
|
|
6
9
|
#
|
|
7
10
|
# The enumerable is not consumed until values are pulled by `run_with`. Each
|
|
@@ -17,7 +20,9 @@ module FiberStream
|
|
|
17
20
|
# The IO object is not read until values are pulled by `run_with`. Each
|
|
18
21
|
# materialization reads from the same IO object's current position; this
|
|
19
22
|
# source does not snapshot, reopen, or guarantee replayability. The IO is
|
|
20
|
-
# closed only when `close: true` is passed.
|
|
23
|
+
# closed only when `close: true` is passed. `chunk_size` is the maximum byte
|
|
24
|
+
# count passed to `readpartial` for one downstream pull; very large values
|
|
25
|
+
# may cause the IO implementation to attempt large allocations.
|
|
21
26
|
def self.io(io, chunk_size: 16 * 1024, close: false)
|
|
22
27
|
raise TypeError, "io must respond to readpartial" unless io.respond_to?(:readpartial)
|
|
23
28
|
raise TypeError, "chunk_size must be an Integer" unless chunk_size.is_a?(Integer)
|
|
@@ -34,19 +39,82 @@ module FiberStream
|
|
|
34
39
|
# producer-owned port that receives `RactorPort::Ack` and
|
|
35
40
|
# `RactorPort::Cancel` control messages. The producer must wait for an ack
|
|
36
41
|
# before sending each `RactorPort::Element`, `RactorPort::Complete`, or
|
|
37
|
-
# `RactorPort::Failure` message.
|
|
42
|
+
# `RactorPort::Failure` message. Failure metadata is producer-provided and
|
|
43
|
+
# should be sanitized before crossing trust boundaries.
|
|
38
44
|
def self.ractor_port(port, ack_port:, ack_transfer: :copy, cancel: true)
|
|
39
45
|
raise TypeError, "port must respond to receive" unless port.respond_to?(:receive)
|
|
40
46
|
unless ack_port.respond_to?(:send) && ack_port.method(:send).owner != Kernel
|
|
41
47
|
raise TypeError, "ack_port must provide Ractor-style send"
|
|
42
48
|
end
|
|
43
49
|
|
|
44
|
-
|
|
50
|
+
Internal::RactorTransferPolicy.validate!(:ack_transfer, ack_transfer)
|
|
45
51
|
raise TypeError, "cancel must be true or false" unless [true, false].include?(cancel)
|
|
46
52
|
|
|
47
53
|
new(-> { Pull.ractor_port(port, ack_port, ack_transfer, cancel) })
|
|
48
54
|
end
|
|
49
55
|
|
|
56
|
+
# Creates a backpressure-aware source definition from multiple Ractor port
|
|
57
|
+
# pairs.
|
|
58
|
+
#
|
|
59
|
+
# Each pair must be a Hash with `:port` and `:ack_port`. The source sends
|
|
60
|
+
# at most one outstanding `RactorPort::Ack` to each active producer and
|
|
61
|
+
# emits producer values in coordinator-observed ready order. Producer work
|
|
62
|
+
# is isolated in Ractors, so demanding this source does not require a
|
|
63
|
+
# `Fiber.scheduler`. Failure metadata is producer-provided and should be
|
|
64
|
+
# sanitized before crossing trust boundaries.
|
|
65
|
+
def self.ractor_merge_ports(ports, ack_transfer: :copy, cancel: true)
|
|
66
|
+
pairs = normalize_ractor_merge_port_pairs(ports)
|
|
67
|
+
|
|
68
|
+
Internal::RactorTransferPolicy.validate!(:ack_transfer, ack_transfer)
|
|
69
|
+
raise TypeError, "cancel must be true or false" unless [true, false].include?(cancel)
|
|
70
|
+
|
|
71
|
+
new(-> { Pull.ractor_merge_ports(pairs, ack_transfer, cancel) })
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Creates a source backed by one FiberStream-owned producer ractor.
|
|
75
|
+
#
|
|
76
|
+
# The producer ractor is started lazily on first downstream demand. The
|
|
77
|
+
# shareable block receives a `RactorProducer` context and the provided
|
|
78
|
+
# arguments. Calls to the context preserve one-outstanding-ack
|
|
79
|
+
# backpressure, and cleanup always requests cooperative cancellation.
|
|
80
|
+
def self.ractor_producer(*args, transfer: :copy, ack_transfer: :copy, &block)
|
|
81
|
+
raise ArgumentError, "missing block" unless block
|
|
82
|
+
|
|
83
|
+
Internal::RactorTransferPolicy.validate!(:transfer, transfer)
|
|
84
|
+
Internal::RactorTransferPolicy.validate!(:ack_transfer, ack_transfer)
|
|
85
|
+
raise TypeError, "block must be shareable" unless Ractor.shareable?(block)
|
|
86
|
+
|
|
87
|
+
group = RactorProducerGroup.new(transfer)
|
|
88
|
+
group.producer(*args, &block)
|
|
89
|
+
definitions = group.definitions
|
|
90
|
+
|
|
91
|
+
new(-> { Pull.ractor_producer(definitions, ack_transfer) })
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Creates a source backed by multiple FiberStream-owned producer ractors.
|
|
95
|
+
#
|
|
96
|
+
# The registration block runs at construction to collect producer
|
|
97
|
+
# definitions, but producer ractors and ports are started lazily on first
|
|
98
|
+
# downstream demand. Outputs are merged with the same ready-order semantics
|
|
99
|
+
# as `Source.ractor_merge_ports`.
|
|
100
|
+
def self.ractor_merge_producers(transfer: :copy, ack_transfer: :copy, &block)
|
|
101
|
+
raise ArgumentError, "missing block" unless block
|
|
102
|
+
|
|
103
|
+
Internal::RactorTransferPolicy.validate!(:transfer, transfer)
|
|
104
|
+
Internal::RactorTransferPolicy.validate!(:ack_transfer, ack_transfer)
|
|
105
|
+
|
|
106
|
+
group = RactorProducerGroup.new(transfer)
|
|
107
|
+
block.call(group)
|
|
108
|
+
definitions = group.definitions
|
|
109
|
+
raise ArgumentError, "ractor_merge_producers requires at least two producers" if definitions.size < 2
|
|
110
|
+
|
|
111
|
+
new(-> { Pull.ractor_merge_producers(definitions, ack_transfer) })
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def self.build(source_factory, flows = []) # :nodoc:
|
|
115
|
+
new(source_factory, flows)
|
|
116
|
+
end
|
|
117
|
+
|
|
50
118
|
def initialize(source_factory, flows = [])
|
|
51
119
|
@source_factory = source_factory
|
|
52
120
|
@flows = flows
|
|
@@ -59,7 +127,7 @@ module FiberStream
|
|
|
59
127
|
def via(flow)
|
|
60
128
|
raise TypeError, "expected FiberStream::Flow" unless flow.is_a?(Flow)
|
|
61
129
|
|
|
62
|
-
self.class.
|
|
130
|
+
self.class.build(@source_factory, @flows + [flow])
|
|
63
131
|
end
|
|
64
132
|
|
|
65
133
|
# Returns a new source definition that emits this source, then `source`.
|
|
@@ -71,10 +139,7 @@ module FiberStream
|
|
|
71
139
|
def concat(source)
|
|
72
140
|
raise TypeError, "expected FiberStream::Source" unless source.is_a?(Source)
|
|
73
141
|
|
|
74
|
-
self.class.
|
|
75
|
-
:new,
|
|
76
|
-
-> { Pull.concat(materializer, source.__send__(:materializer)) }
|
|
77
|
-
)
|
|
142
|
+
self.class.build(-> { Pull.concat(to_pull_materializer, source.to_pull_materializer) })
|
|
78
143
|
end
|
|
79
144
|
|
|
80
145
|
# Returns a new source definition that emits pairs from this source and
|
|
@@ -87,10 +152,21 @@ module FiberStream
|
|
|
87
152
|
def zip(source)
|
|
88
153
|
raise TypeError, "expected FiberStream::Source" unless source.is_a?(Source)
|
|
89
154
|
|
|
90
|
-
self.class.
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
155
|
+
self.class.build(-> { Pull.zip(to_pull_materializer, source.to_pull_materializer) })
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Returns a new source definition that emits values from this source and
|
|
159
|
+
# `source` in scheduler-observed ready order.
|
|
160
|
+
#
|
|
161
|
+
# Construction is lazy. The merged source starts one scheduled producer
|
|
162
|
+
# fiber per input source only when downstream demand reaches the merge. Each
|
|
163
|
+
# input's own element order is preserved, but cross-input ordering is not
|
|
164
|
+
# deterministic and requires an installed `Fiber.scheduler` from a
|
|
165
|
+
# non-blocking fiber when demanded.
|
|
166
|
+
def merge(source)
|
|
167
|
+
raise TypeError, "expected FiberStream::Source" unless source.is_a?(Source)
|
|
168
|
+
|
|
169
|
+
self.class.build(-> { Pull.merge(to_pull_materializer, source.to_pull_materializer) })
|
|
94
170
|
end
|
|
95
171
|
|
|
96
172
|
# Returns a new source definition that maps each element with `block`.
|
|
@@ -112,6 +188,18 @@ module FiberStream
|
|
|
112
188
|
via(Flow.parallel_map(concurrency: concurrency, &block))
|
|
113
189
|
end
|
|
114
190
|
|
|
191
|
+
# Returns a new source definition that maps elements concurrently and emits
|
|
192
|
+
# mapped values in completion order.
|
|
193
|
+
#
|
|
194
|
+
# This is a convenience wrapper around
|
|
195
|
+
# `via(FiberStream::Flow.parallel_unordered_map(concurrency:) { ... })`.
|
|
196
|
+
# The operation preserves the same scheduler requirement, validation,
|
|
197
|
+
# bounded upstream run-ahead, and cancellation behavior while making no
|
|
198
|
+
# input-order guarantee.
|
|
199
|
+
def parallel_unordered_map(concurrency:, &block)
|
|
200
|
+
via(Flow.parallel_unordered_map(concurrency: concurrency, &block))
|
|
201
|
+
end
|
|
202
|
+
|
|
115
203
|
# Returns a new source definition that maps elements in Ractor workers.
|
|
116
204
|
#
|
|
117
205
|
# This is a convenience wrapper around
|
|
@@ -154,6 +242,24 @@ module FiberStream
|
|
|
154
242
|
via(Flow.drop(count))
|
|
155
243
|
end
|
|
156
244
|
|
|
245
|
+
# Returns a new source definition that groups adjacent elements into arrays.
|
|
246
|
+
#
|
|
247
|
+
# This is a convenience wrapper around
|
|
248
|
+
# `via(FiberStream::Flow.grouped(count))` and preserves the same validation,
|
|
249
|
+
# ordering, final partial group, and pull-driven backpressure behavior.
|
|
250
|
+
def grouped(count)
|
|
251
|
+
via(Flow.grouped(count))
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Returns a new source definition that emits running accumulators.
|
|
255
|
+
#
|
|
256
|
+
# This is a convenience wrapper around
|
|
257
|
+
# `via(FiberStream::Flow.scan(initial) { ... })` and preserves the same
|
|
258
|
+
# reducer order, lazy construction, and pull-driven backpressure behavior.
|
|
259
|
+
def scan(initial, &block)
|
|
260
|
+
via(Flow.scan(initial, &block))
|
|
261
|
+
end
|
|
262
|
+
|
|
157
263
|
# Returns a new source definition that emits leading elements while `block`
|
|
158
264
|
# is truthy.
|
|
159
265
|
#
|
|
@@ -194,11 +300,25 @@ module FiberStream
|
|
|
194
300
|
# Returns a new source definition that splits String chunks into lines.
|
|
195
301
|
#
|
|
196
302
|
# This is a convenience wrapper around
|
|
197
|
-
# `via(FiberStream::Flow.lines(chomp:, max_length:))`.
|
|
303
|
+
# `via(FiberStream::Flow.lines(chomp:, max_length:))`. With
|
|
304
|
+
# `max_length: nil`, one unterminated line can buffer without bound. Set a
|
|
305
|
+
# positive `max_length` for untrusted, network-facing, or otherwise
|
|
306
|
+
# unbounded streams.
|
|
198
307
|
def lines(chomp: true, max_length: nil)
|
|
199
308
|
via(Flow.lines(chomp: chomp, max_length: max_length))
|
|
200
309
|
end
|
|
201
310
|
|
|
311
|
+
# Returns a new source definition that splits String chunks into frames.
|
|
312
|
+
#
|
|
313
|
+
# This is a convenience wrapper around
|
|
314
|
+
# `via(FiberStream::Flow.split(separator, keep_separator:, max_length:))`.
|
|
315
|
+
# With `max_length: nil`, one unterminated frame can buffer without bound.
|
|
316
|
+
# Set a positive `max_length` for untrusted, network-facing, or otherwise
|
|
317
|
+
# unbounded streams.
|
|
318
|
+
def split(separator, keep_separator: false, max_length: nil)
|
|
319
|
+
via(Flow.split(separator, keep_separator: keep_separator, max_length: max_length))
|
|
320
|
+
end
|
|
321
|
+
|
|
202
322
|
# Returns a runnable pipeline from this source to `sink`.
|
|
203
323
|
#
|
|
204
324
|
# Construction is lazy. The source and sink are not materialized until
|
|
@@ -206,7 +326,7 @@ module FiberStream
|
|
|
206
326
|
def to(sink)
|
|
207
327
|
raise TypeError, "expected FiberStream::Sink" unless sink.is_a?(Sink)
|
|
208
328
|
|
|
209
|
-
Pipeline.
|
|
329
|
+
Pipeline.build(self, sink)
|
|
210
330
|
end
|
|
211
331
|
|
|
212
332
|
# Materializes and runs this source with `sink`.
|
|
@@ -222,7 +342,7 @@ module FiberStream
|
|
|
222
342
|
begin
|
|
223
343
|
stream = materialize
|
|
224
344
|
|
|
225
|
-
sink.
|
|
345
|
+
sink.run_stream(stream)
|
|
226
346
|
rescue StandardError => error
|
|
227
347
|
primary_error = error
|
|
228
348
|
raise
|
|
@@ -237,19 +357,57 @@ module FiberStream
|
|
|
237
357
|
|
|
238
358
|
private_class_method :new
|
|
239
359
|
|
|
240
|
-
|
|
360
|
+
def to_pull_materializer # :nodoc:
|
|
361
|
+
method(:materialize)
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def self.normalize_ractor_merge_port_pairs(ports)
|
|
365
|
+
raise TypeError, "ports must respond to each" unless ports.respond_to?(:each)
|
|
366
|
+
|
|
367
|
+
data_port_ids = {}
|
|
368
|
+
ack_port_ids = {}
|
|
369
|
+
pairs =
|
|
370
|
+
ports.each.map do |pair|
|
|
371
|
+
normalize_ractor_merge_port_pair(pair, data_port_ids, ack_port_ids)
|
|
372
|
+
end
|
|
241
373
|
|
|
242
|
-
|
|
243
|
-
|
|
374
|
+
raise ArgumentError, "ractor_merge_ports requires at least two port pairs" if pairs.size < 2
|
|
375
|
+
|
|
376
|
+
pairs.freeze
|
|
244
377
|
end
|
|
245
378
|
|
|
379
|
+
def self.normalize_ractor_merge_port_pair(pair, data_port_ids, ack_port_ids)
|
|
380
|
+
raise TypeError, "port pair must be a Hash" unless pair.is_a?(Hash)
|
|
381
|
+
raise TypeError, "port pair must include :port and :ack_port" unless pair.key?(:port) && pair.key?(:ack_port)
|
|
382
|
+
|
|
383
|
+
port = pair.fetch(:port)
|
|
384
|
+
ack_port = pair.fetch(:ack_port)
|
|
385
|
+
raise TypeError, "port must respond to receive" unless port.respond_to?(:receive)
|
|
386
|
+
unless ack_port.respond_to?(:send) && ack_port.method(:send).owner != Kernel
|
|
387
|
+
raise TypeError, "ack_port must provide Ractor-style send"
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
port_id = port.object_id
|
|
391
|
+
ack_port_id = ack_port.object_id
|
|
392
|
+
raise ArgumentError, "data ports must be distinct" if data_port_ids.key?(port_id)
|
|
393
|
+
raise ArgumentError, "ack ports must be distinct" if ack_port_ids.key?(ack_port_id)
|
|
394
|
+
|
|
395
|
+
data_port_ids[port_id] = true
|
|
396
|
+
ack_port_ids[ack_port_id] = true
|
|
397
|
+
RactorMergePortPair.new(port:, ack_port:)
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
private_class_method :normalize_ractor_merge_port_pairs, :normalize_ractor_merge_port_pair
|
|
401
|
+
|
|
402
|
+
private
|
|
403
|
+
|
|
246
404
|
def materialize
|
|
247
405
|
stream = nil
|
|
248
406
|
|
|
249
407
|
begin
|
|
250
408
|
stream = @source_factory.call
|
|
251
409
|
@flows.each do |flow|
|
|
252
|
-
stream = flow.
|
|
410
|
+
stream = flow.attach_to(stream)
|
|
253
411
|
end
|
|
254
412
|
stream
|
|
255
413
|
rescue StandardError
|
data/lib/fiber_stream/version.rb
CHANGED
data/lib/fiber_stream.rb
CHANGED
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
require_relative "fiber_stream/pull"
|
|
4
4
|
require_relative "fiber_stream/version"
|
|
5
5
|
require_relative "fiber_stream/errors"
|
|
6
|
+
require_relative "fiber_stream/internal/ractor_transfer_policy"
|
|
6
7
|
require_relative "fiber_stream/ractor_port"
|
|
8
|
+
require_relative "fiber_stream/ractor_producer"
|
|
7
9
|
require_relative "fiber_stream/flow"
|
|
8
10
|
require_relative "fiber_stream/sink"
|
|
9
11
|
require_relative "fiber_stream/running_pipeline"
|
data/sig/fiber_stream.rbs
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
module FiberStream
|
|
2
2
|
type ractor_transfer_policy = :copy | :move
|
|
3
|
+
type ractor_port_pair = { port: untyped, ack_port: untyped }
|
|
3
4
|
type ractor_map_error_kind = :input_transfer | :output_transfer | :worker | :worker_termination | :isolation
|
|
4
|
-
type ractor_port_source_error_kind = :invalid_message | :producer_failure | :receive | :ack_transfer | :cancel_transfer
|
|
5
|
+
type ractor_port_source_error_kind = :invalid_message | :producer_failure | :receive | :ack_transfer | :cancel_transfer | :producer_setup
|
|
5
6
|
type ractor_port_cancel_reason = :closed
|
|
6
7
|
|
|
7
8
|
class SchedulerRequiredError < RuntimeError
|
|
@@ -47,24 +48,43 @@ module FiberStream
|
|
|
47
48
|
end
|
|
48
49
|
end
|
|
49
50
|
|
|
51
|
+
class RactorProducer
|
|
52
|
+
def emit: [Elem] (Elem value, ?transfer: ractor_transfer_policy?) -> bool
|
|
53
|
+
def complete: () -> bool
|
|
54
|
+
def fail: (?untyped error, ?cause_class_name: String?, ?cause_message: String?) -> bool
|
|
55
|
+
def cancelled?: () -> bool
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
class RactorProducerGroup
|
|
59
|
+
def producer: (*untyped args, ?transfer: ractor_transfer_policy?) { (RactorProducer producer, *untyped args) -> void } -> self
|
|
60
|
+
end
|
|
61
|
+
|
|
50
62
|
class Source[Elem]
|
|
51
63
|
def self.each: [Elem] (Enumerable[Elem] enumerable) -> Source[Elem]
|
|
52
64
|
def self.io: (untyped io, ?chunk_size: Integer, ?close: bool) -> Source[String]
|
|
53
65
|
def self.ractor_port: [Elem] (untyped port, ack_port: untyped, ?ack_transfer: ractor_transfer_policy, ?cancel: bool) -> Source[Elem]
|
|
66
|
+
def self.ractor_merge_ports: [Elem] (Enumerable[ractor_port_pair] ports, ?ack_transfer: ractor_transfer_policy, ?cancel: bool) -> Source[Elem]
|
|
67
|
+
def self.ractor_producer: [Elem] (*untyped args, ?transfer: ractor_transfer_policy, ?ack_transfer: ractor_transfer_policy) { (RactorProducer producer, *untyped args) -> void } -> Source[Elem]
|
|
68
|
+
def self.ractor_merge_producers: [Elem] (?transfer: ractor_transfer_policy, ?ack_transfer: ractor_transfer_policy) { (RactorProducerGroup group) -> void } -> Source[Elem]
|
|
54
69
|
def via: [Out] (Flow[Elem, Out] flow) -> Source[Out]
|
|
55
70
|
def concat: [Other] (Source[Other] source) -> Source[Elem | Other]
|
|
56
71
|
def zip: [Other] (Source[Other] source) -> Source[[Elem, Other]]
|
|
72
|
+
def merge: [Other] (Source[Other] source) -> Source[Elem | Other]
|
|
57
73
|
def map: [Out] () { (Elem) -> Out } -> Source[Out]
|
|
58
74
|
def parallel_map: [Out] (concurrency: Integer) { (Elem) -> Out } -> Source[Out]
|
|
75
|
+
def parallel_unordered_map: [Out] (concurrency: Integer) { (Elem) -> Out } -> Source[Out]
|
|
59
76
|
def ractor_map: [Out] (workers: Integer, ?input_transfer: ractor_transfer_policy, ?output_transfer: ractor_transfer_policy) { (Elem) -> Out } -> Source[Out]
|
|
60
77
|
def select: () { (Elem) -> boolish } -> Source[Elem]
|
|
61
78
|
def take: (Integer count) -> Source[Elem]
|
|
62
79
|
def drop: (Integer count) -> Source[Elem]
|
|
80
|
+
def grouped: (Integer count) -> Source[Array[Elem]]
|
|
81
|
+
def scan: [Acc] (Acc initial) { (Acc, Elem) -> Acc } -> Source[Acc]
|
|
63
82
|
def take_while: () { (Elem) -> boolish } -> Source[Elem]
|
|
64
83
|
def drop_while: () { (Elem) -> boolish } -> Source[Elem]
|
|
65
84
|
def async: () -> Source[Elem]
|
|
66
85
|
def buffer: (Integer count) -> Source[Elem]
|
|
67
86
|
def lines: (?chomp: bool, ?max_length: Integer?) -> Source[String]
|
|
87
|
+
def split: (String separator, ?keep_separator: bool, ?max_length: Integer?) -> Source[String]
|
|
68
88
|
def to: [Mat] (Sink[Elem, Mat] sink) -> Pipeline[Mat]
|
|
69
89
|
def run_with: [Mat] (Sink[Elem, Mat] sink) -> Mat
|
|
70
90
|
end
|
|
@@ -72,15 +92,19 @@ module FiberStream
|
|
|
72
92
|
class Flow[In, Out]
|
|
73
93
|
def self.map: [In, Out] () { (In) -> Out } -> Flow[In, Out]
|
|
74
94
|
def self.parallel_map: [In, Out] (concurrency: Integer) { (In) -> Out } -> Flow[In, Out]
|
|
95
|
+
def self.parallel_unordered_map: [In, Out] (concurrency: Integer) { (In) -> Out } -> Flow[In, Out]
|
|
75
96
|
def self.ractor_map: [In, Out] (workers: Integer, ?input_transfer: ractor_transfer_policy, ?output_transfer: ractor_transfer_policy) { (In) -> Out } -> Flow[In, Out]
|
|
76
97
|
def self.select: [Elem] () { (Elem) -> boolish } -> Flow[Elem, Elem]
|
|
77
98
|
def self.take: [Elem] (Integer count) -> Flow[Elem, Elem]
|
|
78
99
|
def self.drop: [Elem] (Integer count) -> Flow[Elem, Elem]
|
|
100
|
+
def self.grouped: [Elem] (Integer count) -> Flow[Elem, Array[Elem]]
|
|
101
|
+
def self.scan: [Elem, Acc] (Acc initial) { (Acc, Elem) -> Acc } -> Flow[Elem, Acc]
|
|
79
102
|
def self.take_while: [Elem] () { (Elem) -> boolish } -> Flow[Elem, Elem]
|
|
80
103
|
def self.drop_while: [Elem] () { (Elem) -> boolish } -> Flow[Elem, Elem]
|
|
81
104
|
def self.async: [Elem] () -> Flow[Elem, Elem]
|
|
82
105
|
def self.buffer: [Elem] (Integer count) -> Flow[Elem, Elem]
|
|
83
106
|
def self.lines: (?chomp: bool, ?max_length: Integer?) -> Flow[String, String]
|
|
107
|
+
def self.split: (String separator, ?keep_separator: bool, ?max_length: Integer?) -> Flow[String, String]
|
|
84
108
|
def via: [Next] (Flow[Out, Next] flow) -> Flow[In, Next]
|
|
85
109
|
def to: [Mat] (Sink[Out, Mat] sink) -> Sink[In, Mat]
|
|
86
110
|
end
|