fiber_stream 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE +19 -0
  4. data/README.md +361 -0
  5. data/examples/README.md +51 -0
  6. data/examples/async_http_requests.rb +132 -0
  7. data/examples/background_execution.rb +31 -0
  8. data/examples/backpressure_buffer.rb +66 -0
  9. data/examples/basic_pipeline.rb +28 -0
  10. data/examples/composable_pipeline.rb +43 -0
  11. data/examples/file_copy.rb +33 -0
  12. data/examples/line_processing.rb +20 -0
  13. data/examples/ractor_map_hashing.rb +43 -0
  14. data/examples/ractor_port_source.rb +45 -0
  15. data/lib/fiber_stream/errors.rb +44 -0
  16. data/lib/fiber_stream/flow.rb +190 -0
  17. data/lib/fiber_stream/pipeline.rb +49 -0
  18. data/lib/fiber_stream/pull/async_boundary.rb +85 -0
  19. data/lib/fiber_stream/pull/buffer_boundary.rb +123 -0
  20. data/lib/fiber_stream/pull/each.rb +31 -0
  21. data/lib/fiber_stream/pull/io_source.rb +89 -0
  22. data/lib/fiber_stream/pull/lines.rb +121 -0
  23. data/lib/fiber_stream/pull/map.rb +37 -0
  24. data/lib/fiber_stream/pull/parallel_map_boundary.rb +299 -0
  25. data/lib/fiber_stream/pull/ractor_map_boundary.rb +500 -0
  26. data/lib/fiber_stream/pull/ractor_port_source.rb +242 -0
  27. data/lib/fiber_stream/pull/select.rb +40 -0
  28. data/lib/fiber_stream/pull/take.rb +47 -0
  29. data/lib/fiber_stream/pull.rb +85 -0
  30. data/lib/fiber_stream/ractor_port.rb +17 -0
  31. data/lib/fiber_stream/running_pipeline.rb +156 -0
  32. data/lib/fiber_stream/sink.rb +176 -0
  33. data/lib/fiber_stream/source.rb +184 -0
  34. data/lib/fiber_stream/version.rb +5 -0
  35. data/lib/fiber_stream.rb +15 -0
  36. data/sig/fiber_stream.rbs +97 -0
  37. metadata +154 -0
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
4
+
5
+ require "fiber_stream"
6
+
7
+ orders = [
8
+ { id: 101, customer: "Aki", total: 4_800 },
9
+ { id: 102, customer: "Mina", total: 12_400 },
10
+ { id: 103, customer: "Ren", total: 9_900 },
11
+ { id: 104, customer: "Sora", total: 18_200 }
12
+ ]
13
+
14
+ high_value_summaries =
15
+ FiberStream::Source.each(orders)
16
+ .select { |order| order.fetch(:total) >= 10_000 }
17
+ .map do |order|
18
+ format(
19
+ "#%<id>d %-4<customer>s JPY %<total>d",
20
+ id: order.fetch(:id),
21
+ customer: order.fetch(:customer),
22
+ total: order.fetch(:total)
23
+ )
24
+ end
25
+ .run_with(FiberStream::Sink.to_a)
26
+
27
+ puts "High-value orders"
28
+ high_value_summaries.each { |summary| puts "- #{summary}" }
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
4
+
5
+ require "fiber_stream"
6
+
7
+ input_lines = [
8
+ " id,status,total ",
9
+ " 1001,active,12000 ",
10
+ "",
11
+ " 1002,cancelled,3000 ",
12
+ " 1003,active,25000 "
13
+ ]
14
+
15
+ normalize_lines =
16
+ FiberStream::Flow.map(&:strip)
17
+ .via(FiberStream::Flow.select { |line| !line.empty? })
18
+
19
+ parse_order =
20
+ FiberStream::Flow.map do |line|
21
+ id, status, total = line.split(",", 3)
22
+
23
+ {
24
+ id: id,
25
+ status: status,
26
+ total: Integer(total)
27
+ }
28
+ end
29
+
30
+ active_order_sink =
31
+ parse_order
32
+ .via(FiberStream::Flow.select { |order| order.fetch(:status) == "active" })
33
+ .to(FiberStream::Sink.to_a)
34
+
35
+ pipeline =
36
+ FiberStream::Source.each(input_lines.drop(1))
37
+ .via(normalize_lines)
38
+ .to(active_order_sink)
39
+
40
+ puts "Active orders"
41
+ pipeline.run.each do |order|
42
+ puts "- ##{order.fetch(:id)} JPY #{order.fetch(:total)}"
43
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
4
+
5
+ require "async"
6
+ require "fiber_stream"
7
+ require "tmpdir"
8
+
9
+ source_text = <<~TEXT
10
+ FiberStream can read from Ruby core IO objects.
11
+ Source.io emits String chunks, and Sink.io writes them to another IO.
12
+ This keeps stream processing explicit while preserving pull-based demand.
13
+ TEXT
14
+
15
+ Dir.mktmpdir("fiber_stream-example-") do |dir|
16
+ input_path = File.join(dir, "input.txt")
17
+ output_path = File.join(dir, "output.txt")
18
+
19
+ File.write(input_path, source_text)
20
+
21
+ chunks_written =
22
+ Async do
23
+ input = File.open(input_path, "rb")
24
+ output = File.open(output_path, "wb")
25
+
26
+ FiberStream::Source.io(input, chunk_size: 24, close: true)
27
+ .map(&:upcase)
28
+ .run_with(FiberStream::Sink.io(output, close: true, flush: true))
29
+ end.wait
30
+
31
+ puts "Wrote #{chunks_written} chunks to #{output_path}"
32
+ puts File.read(output_path)
33
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
4
+
5
+ require "fiber_stream"
6
+
7
+ log_chunks = [
8
+ "INFO boot\nWARN slow query",
9
+ "\nERROR failed job\n",
10
+ "INFO recovered"
11
+ ]
12
+
13
+ warnings_and_errors =
14
+ FiberStream::Source.each(log_chunks)
15
+ .lines
16
+ .select { |line| line.start_with?("WARN", "ERROR") }
17
+ .run_with(FiberStream::Sink.to_a)
18
+
19
+ puts "Warnings and errors"
20
+ warnings_and_errors.each { |line| puts "- #{line}" }
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
4
+
5
+ require "digest"
6
+ require "fiber_stream"
7
+
8
+ records = [
9
+ { name: "alpha.bin", payload: +"A" * 200_000 },
10
+ { name: "bravo.bin", payload: +"B" * 120_000 },
11
+ { name: "charlie.bin", payload: +"C" * 260_000 },
12
+ { name: "delta.bin", payload: +"D" * 80_000 }
13
+ ]
14
+
15
+ HASH_RECORD =
16
+ Ractor.shareable_proc do |record|
17
+ payload = record.fetch(:payload)
18
+
19
+ {
20
+ name: record.fetch(:name),
21
+ bytes: payload.bytesize,
22
+ sha256: Digest::SHA256.hexdigest(payload)
23
+ }
24
+ end
25
+
26
+ digests =
27
+ FiberStream::Source.each(records)
28
+ .ractor_map(workers: 2, input_transfer: :move, &HASH_RECORD)
29
+ .run_with(FiberStream::Sink.to_a)
30
+
31
+ puts "SHA-256 digests"
32
+ digests.each do |digest|
33
+ puts format(
34
+ "- %-11<name>s %7<bytes>d bytes %<sha256>s",
35
+ name: digest.fetch(:name),
36
+ bytes: digest.fetch(:bytes),
37
+ sha256: digest.fetch(:sha256)
38
+ )
39
+ end
40
+
41
+ puts
42
+ puts "Results are emitted in input order."
43
+ puts "input_transfer: :move is safe here because the input records are not reused."
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
4
+
5
+ require "fiber_stream"
6
+
7
+ data_port = Ractor::Port.new
8
+ setup_port = Ractor::Port.new
9
+
10
+ producer =
11
+ Ractor.new(data_port, setup_port) do |outbox, setup|
12
+ ack_port = Ractor::Port.new
13
+ setup.send(ack_port)
14
+
15
+ values = (1..5).to_enum
16
+ sent = 0
17
+
18
+ loop do
19
+ case ack_port.receive
20
+ in FiberStream::RactorPort::Ack
21
+ begin
22
+ value = values.next
23
+ sent += 1
24
+ outbox.send(FiberStream::RactorPort::Element.new(value))
25
+ rescue StopIteration
26
+ outbox.send(FiberStream::RactorPort::Complete.new)
27
+ break [:completed, sent]
28
+ end
29
+ in FiberStream::RactorPort::Cancel[reason]
30
+ break [:cancelled, sent, reason]
31
+ end
32
+ end
33
+ end
34
+
35
+ ack_port = setup_port.receive
36
+
37
+ result =
38
+ FiberStream::Source.ractor_port(data_port, ack_port: ack_port)
39
+ .map { |number| number * number }
40
+ .run_with(FiberStream::Sink.to_a)
41
+
42
+ puts "Squares from a producer Ractor:"
43
+ puts result.join(", ")
44
+ puts
45
+ puts "Producer status: #{producer.value.inspect}"
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ class SchedulerRequiredError < RuntimeError; end
5
+ class FrameTooLongError < RuntimeError; end
6
+ class PipelineCancelledError < RuntimeError; end
7
+
8
+ # Normalized failure raised by `Source.ractor_port`.
9
+ #
10
+ # Producer failures, invalid protocol messages, and source-side Ractor port
11
+ # failures use this stable error shape so callers do not need to depend on
12
+ # Ruby's Ractor transport exceptions.
13
+ class RactorPortSourceError < RuntimeError
14
+ attr_reader :kind, :cause_class_name, :cause_message, :original_cause
15
+
16
+ def initialize(kind:, cause_class_name:, cause_message:, cause: nil)
17
+ @kind = kind
18
+ @cause_class_name = cause_class_name
19
+ @cause_message = cause_message
20
+ @original_cause = cause
21
+
22
+ super("ractor_port #{kind} failure: #{cause_class_name}: #{cause_message}")
23
+ end
24
+ end
25
+
26
+ # Normalized failure raised for Ractor-backed mapping errors.
27
+ #
28
+ # Worker exceptions and Ractor transfer failures may not be directly
29
+ # transferable back to the main ractor. This error preserves the ordered input
30
+ # sequence, failure kind, and original exception class/message metadata.
31
+ class RactorMapError < RuntimeError
32
+ attr_reader :sequence, :kind, :cause_class_name, :cause_message, :original_cause
33
+
34
+ def initialize(sequence:, kind:, cause_class_name:, cause_message:, cause: nil)
35
+ @sequence = sequence
36
+ @kind = kind
37
+ @cause_class_name = cause_class_name
38
+ @cause_message = cause_message
39
+ @original_cause = cause
40
+
41
+ super("ractor_map #{kind} failure at sequence #{sequence}: #{cause_class_name}: #{cause_message}")
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,190 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ class Flow
5
+ # Creates a mapping flow.
6
+ #
7
+ # The block is called once for each element pulled through this flow.
8
+ # Exceptions raised by the block fail the stream and are re-raised from
9
+ # `Source#run_with`.
10
+ def self.map(&block)
11
+ raise ArgumentError, "missing block" unless block
12
+
13
+ new { |upstream| Pull.map(upstream, block) }
14
+ end
15
+
16
+ # Creates an ordered scheduler-backed parallel mapping flow.
17
+ #
18
+ # The stage starts internal scheduled fibers on first downstream demand and
19
+ # requires an installed `Fiber.scheduler` in a non-blocking fiber at that
20
+ # point. At most `concurrency` mapping blocks run at the same time, and at
21
+ # most `concurrency` upstream elements are pulled but not yet emitted downstream.
22
+ # Results are emitted in input order. Closing the boundary closes upstream
23
+ # and requests internal worker cancellation. FiberStream does not depend on
24
+ # Async at runtime.
25
+ def self.parallel_map(concurrency:, &block)
26
+ raise ArgumentError, "missing block" unless block
27
+ raise TypeError, "concurrency must be an Integer" unless concurrency.is_a?(Integer)
28
+ raise ArgumentError, "concurrency must be positive" unless concurrency.positive?
29
+
30
+ new { |upstream| Pull.parallel_map(upstream, concurrency, block) }
31
+ end
32
+
33
+ # Creates an ordered Ractor-backed mapping flow.
34
+ #
35
+ # The mapper runs inside worker ractors and must be shareable, typically
36
+ # created with `Ractor.shareable_proc`. Results are emitted in input order,
37
+ # and at most `workers` upstream elements are pulled but not yet emitted.
38
+ # `input_transfer` and `output_transfer` must be `:copy` or `:move` and are
39
+ # passed to Ractor message sends for element and result transfer.
40
+ def self.ractor_map(workers:, input_transfer: :copy, output_transfer: :copy, &block)
41
+ raise ArgumentError, "missing block" unless block
42
+ raise TypeError, "workers must be an Integer" unless workers.is_a?(Integer)
43
+ raise ArgumentError, "workers must be positive" unless workers.positive?
44
+
45
+ validate_ractor_transfer_policy!(:input_transfer, input_transfer)
46
+ validate_ractor_transfer_policy!(:output_transfer, output_transfer)
47
+ raise TypeError, "block must be shareable" unless Ractor.shareable?(block)
48
+
49
+ new { |upstream| Pull.ractor_map(upstream, workers, input_transfer, output_transfer, block) }
50
+ end
51
+
52
+ # Creates a filtering flow.
53
+ #
54
+ # The block is called for upstream elements until it returns a truthy value
55
+ # or upstream completes. Matching elements pass through unchanged.
56
+ # Exceptions raised by the block fail the stream and are re-raised from
57
+ # `Source#run_with`.
58
+ def self.select(&block)
59
+ raise ArgumentError, "missing block" unless block
60
+
61
+ new { |upstream| Pull.select(upstream, block) }
62
+ end
63
+
64
+ # Creates a limiting flow.
65
+ #
66
+ # The flow emits at most `count` elements. `take(0)` completes without
67
+ # pulling upstream and closes upstream on the first downstream demand. After
68
+ # the limit is reached, upstream is closed during the pull that forwards
69
+ # the final element. Negative counts raise `ArgumentError`; non-Integer
70
+ # counts raise `TypeError`.
71
+ def self.take(count)
72
+ raise TypeError, "count must be an Integer" unless count.is_a?(Integer)
73
+ raise ArgumentError, "count must be non-negative" if count.negative?
74
+
75
+ new { |upstream| Pull.take(upstream, count) }
76
+ end
77
+
78
+ # Creates a scheduler-backed asynchronous boundary.
79
+ #
80
+ # The boundary starts its producer on the first downstream demand and
81
+ # requires an installed `Fiber.scheduler` at that point. Upstream stages run
82
+ # in a non-blocking producer fiber, downstream stages remain in the caller's
83
+ # current fiber, and each downstream pull resumes at most one upstream pull.
84
+ # Closing the boundary closes upstream and requests producer cancellation.
85
+ # FiberStream does not depend on Async at runtime.
86
+ def self.async
87
+ new { |upstream| Pull.async(upstream) }
88
+ end
89
+
90
+ # Creates a bounded asynchronous buffer.
91
+ #
92
+ # The buffer starts its producer on the first downstream demand and requires
93
+ # an installed `Fiber.scheduler` at that point. It preserves element order,
94
+ # stores at most `count` messages, and closes upstream while requesting
95
+ # producer cancellation when closed. `count` must be a positive Integer.
96
+ # FiberStream does not depend on Async at runtime.
97
+ def self.buffer(count)
98
+ raise TypeError, "count must be an Integer" unless count.is_a?(Integer)
99
+ raise ArgumentError, "count must be positive" unless count.positive?
100
+
101
+ new { |upstream| Pull.buffer(upstream, count) }
102
+ end
103
+
104
+ # Creates a line-splitting flow.
105
+ #
106
+ # The flow accepts String chunks and emits lines split on "\n". By default
107
+ # it chomps the trailing newline and one preceding "\r". `max_length` is an
108
+ # optional per-line bytesize limit.
109
+ def self.lines(chomp: true, max_length: nil)
110
+ raise TypeError, "chomp must be true or false" unless [true, false].include?(chomp)
111
+ unless max_length.nil? || max_length.is_a?(Integer)
112
+ raise TypeError, "max_length must be nil or an Integer"
113
+ end
114
+ raise ArgumentError, "max_length must be positive" if max_length&.<= 0
115
+
116
+ new { |upstream| Pull.lines(upstream, chomp, max_length) }
117
+ end
118
+
119
+ def self.validate_ractor_transfer_policy!(name, value)
120
+ return if [:copy, :move].include?(value)
121
+
122
+ raise ArgumentError, "#{name} must be :copy or :move"
123
+ end
124
+
125
+ private_class_method :validate_ractor_transfer_policy!
126
+
127
+ # Returns a reusable flow that applies this flow and then `flow`.
128
+ #
129
+ # Construction is lazy. No upstream stream is attached and no elements are
130
+ # pulled until the composed flow is materialized by a source or sink.
131
+ def via(flow)
132
+ raise TypeError, "expected FiberStream::Flow" unless flow.is_a?(Flow)
133
+
134
+ self.class.__send__(:new) do |upstream|
135
+ attached_stream = attach(upstream)
136
+
137
+ begin
138
+ flow.__send__(:attach, attached_stream)
139
+ rescue StandardError
140
+ begin
141
+ attached_stream.close
142
+ rescue StandardError
143
+ nil
144
+ end
145
+ raise
146
+ end
147
+ end
148
+ end
149
+
150
+ # Returns a sink that runs this flow before `sink`.
151
+ #
152
+ # The composed sink accepts this flow's input elements and returns the
153
+ # wrapped sink's materialized value. It closes the attached flow chain after
154
+ # normal completion, failure, or early sink completion.
155
+ def to(sink)
156
+ raise TypeError, "expected FiberStream::Sink" unless sink.is_a?(Sink)
157
+
158
+ Sink.__send__(:new) do |stream|
159
+ attached_stream = nil
160
+ primary_error = nil
161
+
162
+ begin
163
+ attached_stream = attach(stream)
164
+ sink.__send__(:run, attached_stream)
165
+ rescue StandardError => error
166
+ primary_error = error
167
+ raise
168
+ ensure
169
+ begin
170
+ attached_stream&.close
171
+ rescue StandardError => close_error
172
+ raise close_error unless primary_error
173
+ end
174
+ end
175
+ end
176
+ end
177
+
178
+ def initialize(&attach)
179
+ @attach = attach
180
+ end
181
+
182
+ private_class_method :new
183
+
184
+ private
185
+
186
+ def attach(upstream)
187
+ @attach.call(upstream)
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ class Pipeline
5
+ def initialize(source, sink)
6
+ @source = source
7
+ @sink = sink
8
+ end
9
+
10
+ # Runs this pipeline in the current fiber.
11
+ #
12
+ # This is equivalent to `source.run_with(sink)` for the source and sink
13
+ # definitions captured by `Source#to`. Repeated runs create new
14
+ # materializations, subject to the replayability and resource ownership
15
+ # semantics of the captured endpoints.
16
+ def run
17
+ @source.run_with(@sink)
18
+ end
19
+
20
+ # Runs this pipeline in a scheduler-backed background fiber.
21
+ #
22
+ # The method starts one new materialization and returns a `RunningPipeline`
23
+ # handle that can wait for the materialized value, observe completion, and
24
+ # request cancellation. Starting background execution requires an installed
25
+ # `Fiber.scheduler` from a non-blocking current fiber. FiberStream does not
26
+ # depend on Async at runtime.
27
+ def run_async
28
+ validate_scheduler!
29
+
30
+ RunningPipeline.__send__(:new, Fiber.scheduler) { run }
31
+ end
32
+
33
+ private_class_method :new
34
+
35
+ private
36
+
37
+ def validate_scheduler!
38
+ return if Fiber.scheduler && !Fiber.current.blocking?
39
+
40
+ message =
41
+ if Fiber.scheduler
42
+ "Pipeline#run_async requires a non-blocking fiber"
43
+ else
44
+ "Pipeline#run_async requires Fiber.scheduler"
45
+ end
46
+ raise SchedulerRequiredError, message
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FiberStream
4
+ module Pull
5
+ # One-element asynchronous boundary for `Flow.async`.
6
+ #
7
+ # The producer fiber is created lazily on first downstream demand. It
8
+ # advances upstream in a non-blocking fiber and yields one message at a
9
+ # time back to the downstream caller, so it adds an async boundary without
10
+ # adding prefetch.
11
+ class AsyncBoundary
12
+ def initialize(upstream)
13
+ @upstream = upstream
14
+ @producer = nil
15
+ @started = false
16
+ @closed = false
17
+ @done = false
18
+ end
19
+
20
+ def next
21
+ return DONE if @closed || @done
22
+
23
+ start
24
+ message = @producer.resume
25
+
26
+ case message.fetch(0)
27
+ when :value
28
+ message.fetch(1)
29
+ when :done
30
+ complete
31
+ when :error
32
+ @done = true
33
+ raise message.fetch(1)
34
+ end
35
+ end
36
+
37
+ def close
38
+ return if @closed
39
+
40
+ @closed = true
41
+ @done = true
42
+ @upstream.close
43
+ ensure
44
+ cancel_producer
45
+ end
46
+
47
+ private
48
+
49
+ def start
50
+ return if @started
51
+ raise SchedulerRequiredError, "Flow.async requires Fiber.scheduler" unless Fiber.scheduler
52
+
53
+ @started = true
54
+ @producer = Fiber.new(blocking: false) { run_producer }
55
+ end
56
+
57
+ def run_producer
58
+ loop do
59
+ break if @closed
60
+
61
+ value = @upstream.next
62
+ if Pull.done?(value)
63
+ Fiber.yield([:done])
64
+ break
65
+ end
66
+
67
+ Fiber.yield([:value, value])
68
+ end
69
+ rescue StandardError => exception
70
+ Fiber.yield([:error, exception]) unless @closed
71
+ ensure
72
+ @upstream.close
73
+ end
74
+
75
+ def complete
76
+ @done = true
77
+ DONE
78
+ end
79
+
80
+ def cancel_producer
81
+ nil
82
+ end
83
+ end
84
+ end
85
+ end