rapidflow 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -1
- data/README.md +78 -63
- data/lib/rapidflow/batch.rb +9 -25
- data/lib/rapidflow/batch_builder.rb +16 -0
- data/lib/rapidflow/counter.rb +1 -1
- data/lib/rapidflow/errors.rb +7 -0
- data/lib/rapidflow/pipeline.rb +1 -1
- data/lib/rapidflow/stage.rb +9 -1
- data/lib/rapidflow/version.rb +2 -2
- data/lib/rapidflow/work_item.rb +1 -1
- data/lib/rapidflow.rb +3 -1
- data/scripts/benchmark/benchmark_api_request_process_and_storing.rb +11 -11
- data/scripts/benchmark/benchmark_images.rb +6 -6
- data/scripts/benchmark/simulated_data_processing.rb +6 -6
- data/sig/rapidflow.rbs +1 -1
- data/test/rapidflow/batch/config_error_test.rb +43 -0
- data/test/rapidflow/batch/error_handling_test.rb +211 -0
- data/test/rapidflow/batch_test.rb +71 -222
- data/test/rapidflow/counter_test.rb +1 -1
- data/test/rapidflow/pipeline_test.rb +67 -0
- data/test/rapidflow/stage_test.rb +110 -0
- data/test/rapidflow/work_item_test.rb +1 -1
- metadata +7 -2
- data/.github/workflows/main.yml +0 -35
|
@@ -109,9 +109,9 @@ def process_data_synchronously(urls)
|
|
|
109
109
|
results
|
|
110
110
|
end
|
|
111
111
|
|
|
112
|
-
# Solution 2:
|
|
112
|
+
# Solution 2: RapidFlow concurrent processing
|
|
113
113
|
def process_data_with_rapidflow(urls, workers: 4)
|
|
114
|
-
belt =
|
|
114
|
+
belt = RapidFlow::Batch.build do
|
|
115
115
|
stage ->(url) { DataProcessor.fetch_html(url) }, workers: workers # Station 1: Fetch HTML
|
|
116
116
|
stage ->(html) { DataProcessor.parse_data(html) }, workers: workers # Station 2: Parse data
|
|
117
117
|
stage ->(data) { DataProcessor.fetch_other_data(data) }, workers: workers # Station 3: Fetch other data
|
|
@@ -126,7 +126,7 @@ end
|
|
|
126
126
|
# Run benchmark
|
|
127
127
|
def run_benchmark(url_count: 50, workers: 4)
|
|
128
128
|
puts "=" * 80
|
|
129
|
-
puts "
|
|
129
|
+
puts "RapidFlow Data Processing Benchmark"
|
|
130
130
|
puts "=" * 80
|
|
131
131
|
puts
|
|
132
132
|
puts "Configuration:"
|
|
@@ -163,7 +163,7 @@ def run_benchmark(url_count: 50, workers: 4)
|
|
|
163
163
|
puts "Results: #{sync_success} successful, #{sync_failed} failed"
|
|
164
164
|
puts
|
|
165
165
|
|
|
166
|
-
# Benchmark
|
|
166
|
+
# Benchmark RapidFlow
|
|
167
167
|
puts "-" * 80
|
|
168
168
|
puts "2. RAPIDFLOW CONCURRENT PROCESSING"
|
|
169
169
|
puts "-" * 80
|
|
@@ -172,7 +172,7 @@ def run_benchmark(url_count: 50, workers: 4)
|
|
|
172
172
|
rapidflow_results = nil
|
|
173
173
|
|
|
174
174
|
Benchmark.bm(30) do |x|
|
|
175
|
-
rapidflow_time = x.report("
|
|
175
|
+
rapidflow_time = x.report("RapidFlow (#{workers} workers):") do
|
|
176
176
|
rapidflow_results = process_data_with_rapidflow(urls, workers: workers)
|
|
177
177
|
end
|
|
178
178
|
end
|
|
@@ -197,7 +197,7 @@ def run_benchmark(url_count: 50, workers: 4)
|
|
|
197
197
|
puts "=" * 80
|
|
198
198
|
puts
|
|
199
199
|
puts "Synchronous time: #{sync_real_time.round(2)}s"
|
|
200
|
-
puts "
|
|
200
|
+
puts "RapidFlow time: #{rapidflow_real_time.round(2)}s"
|
|
201
201
|
puts
|
|
202
202
|
puts "Speedup: #{speedup.round(2)}x faster"
|
|
203
203
|
puts "Time saved: #{time_saved.round(2)}s"
|
data/sig/rapidflow.rbs
CHANGED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require "test_helper"
|
|
3
|
+
|
|
4
|
+
module RapidFlow
|
|
5
|
+
class BatchConfigErrorTest < Minitest::Test
|
|
6
|
+
def test_no_stages_with_build
|
|
7
|
+
error = assert_raises(RapidFlow::ConfigError) do
|
|
8
|
+
Batch.build do
|
|
9
|
+
# no stages
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
assert_equal "Unable to start the batch without any stages", error.message
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_no_stages_batch_start
|
|
17
|
+
error = assert_raises(RapidFlow::ConfigError) do
|
|
18
|
+
batch = Batch.new
|
|
19
|
+
batch.start
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
assert_equal "Unable to start the batch without any stages", error.message
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_invalid_worker_count
|
|
26
|
+
[
|
|
27
|
+
-3,
|
|
28
|
+
0,
|
|
29
|
+
1.5,
|
|
30
|
+
'foo',
|
|
31
|
+
:bar
|
|
32
|
+
].each do |invalid_worker_count|
|
|
33
|
+
error = assert_raises(RapidFlow::ConfigError, "Expected to raise exception for '#{invalid_worker_count}'") do
|
|
34
|
+
Batch.new({ fn: ->(data) { data.upcase }, workers: invalid_worker_count })
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
assert_equal "Worker count should be a positive number for stage", error.message
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require "test_helper"
|
|
3
|
+
|
|
4
|
+
module RapidFlow
|
|
5
|
+
class BatchErrorHandlingTest < Minitest::Test
|
|
6
|
+
def test_error_handling_captures_exceptions
|
|
7
|
+
batch = Batch.build do
|
|
8
|
+
stage ->(data) {
|
|
9
|
+
raise "Error in stage 1" if data == "bad"
|
|
10
|
+
data
|
|
11
|
+
}
|
|
12
|
+
stage ->(data) { data.upcase }
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
batch.push("good")
|
|
16
|
+
batch.push("bad")
|
|
17
|
+
|
|
18
|
+
results = batch.results
|
|
19
|
+
|
|
20
|
+
assert_equal 2, results.length
|
|
21
|
+
|
|
22
|
+
# Good result should complete both stages
|
|
23
|
+
assert_equal "GOOD", results[0][0]
|
|
24
|
+
assert_nil results[0][1]
|
|
25
|
+
|
|
26
|
+
# Bad result should have error from stage 1 and not be processed by stage 2
|
|
27
|
+
assert_equal "bad", results[1][0] # Original data preserved
|
|
28
|
+
assert_instance_of RuntimeError, results[1][1]
|
|
29
|
+
assert_equal "Error in stage 1", results[1][1].message
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def test_error_in_middle_stage
|
|
33
|
+
batch = Batch.build do
|
|
34
|
+
stage ->(data) { data.upcase }
|
|
35
|
+
stage ->(data) {
|
|
36
|
+
raise "Error in stage 2" if data == "BAD"
|
|
37
|
+
data
|
|
38
|
+
}
|
|
39
|
+
stage ->(data) { data + "!" }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
batch.push("good")
|
|
43
|
+
batch.push("bad")
|
|
44
|
+
batch.push("also_good")
|
|
45
|
+
|
|
46
|
+
results = batch.results
|
|
47
|
+
|
|
48
|
+
assert_equal 3, results.length
|
|
49
|
+
assert_equal ["GOOD!", nil], results[0]
|
|
50
|
+
assert_equal ["BAD", results[1][1]], [results[1][0], results[1][1]]
|
|
51
|
+
assert_equal "Error in stage 2", results[1][1].message
|
|
52
|
+
assert_equal ["ALSO_GOOD!", nil], results[2]
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def test_error_in_last_stage
|
|
56
|
+
batch = Batch.build do
|
|
57
|
+
stage ->(data) { data.upcase }
|
|
58
|
+
stage ->(data) {
|
|
59
|
+
raise "Error in final stage" if data == "BAD"
|
|
60
|
+
data
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
batch.push("good")
|
|
65
|
+
batch.push("bad")
|
|
66
|
+
|
|
67
|
+
results = batch.results
|
|
68
|
+
|
|
69
|
+
assert_equal 2, results.length
|
|
70
|
+
assert_equal ["GOOD", nil], results[0]
|
|
71
|
+
assert_equal ["BAD", results[1][1]], [results[1][0], results[1][1]]
|
|
72
|
+
assert_equal "Error in final stage", results[1][1].message
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def test_multiple_errors_in_sequence
|
|
76
|
+
batch = Batch.build do
|
|
77
|
+
stage ->(data) {
|
|
78
|
+
raise "Error at #{data}" if data.start_with?("bad")
|
|
79
|
+
data
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
batch.push("good1")
|
|
84
|
+
batch.push("bad1")
|
|
85
|
+
batch.push("bad2")
|
|
86
|
+
batch.push("good2")
|
|
87
|
+
|
|
88
|
+
results = batch.results
|
|
89
|
+
|
|
90
|
+
assert_equal 4, results.length
|
|
91
|
+
assert_equal ["good1", nil], results[0]
|
|
92
|
+
assert_instance_of RuntimeError, results[1][1]
|
|
93
|
+
assert_instance_of RuntimeError, results[2][1]
|
|
94
|
+
assert_equal ["good2", nil], results[3]
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def test_exception_types_preserved
|
|
98
|
+
batch = Batch.build do
|
|
99
|
+
stage ->(data) {
|
|
100
|
+
case data
|
|
101
|
+
when "argument_error"
|
|
102
|
+
raise ArgumentError, "Bad argument"
|
|
103
|
+
when "runtime_error"
|
|
104
|
+
raise "Runtime problem"
|
|
105
|
+
when "custom_error"
|
|
106
|
+
raise StandardError, "Custom error"
|
|
107
|
+
else
|
|
108
|
+
data
|
|
109
|
+
end
|
|
110
|
+
}
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
batch.push("good")
|
|
114
|
+
batch.push("argument_error")
|
|
115
|
+
batch.push("runtime_error")
|
|
116
|
+
batch.push("custom_error")
|
|
117
|
+
|
|
118
|
+
results = batch.results
|
|
119
|
+
|
|
120
|
+
assert_equal 4, results.length
|
|
121
|
+
assert_equal ["good", nil], results[0]
|
|
122
|
+
assert_instance_of ArgumentError, results[1][1]
|
|
123
|
+
assert_instance_of RuntimeError, results[2][1]
|
|
124
|
+
assert_instance_of StandardError, results[3][1]
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def test_all_items_fail
|
|
128
|
+
batch = Batch.build do
|
|
129
|
+
stage ->(data) { raise "Always fails" }
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
5.times { |i| batch.push(i) }
|
|
133
|
+
|
|
134
|
+
results = batch.results
|
|
135
|
+
|
|
136
|
+
assert_equal 5, results.length
|
|
137
|
+
results.each do |result, error|
|
|
138
|
+
assert_instance_of RuntimeError, error
|
|
139
|
+
assert_equal "Always fails", error.message
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def test_no_method_error_in_first_lamda_function
|
|
144
|
+
batch = Batch.build do
|
|
145
|
+
# invalid stage - calling invalid method
|
|
146
|
+
stage ->(data) { data.foobar }
|
|
147
|
+
|
|
148
|
+
# valid stage
|
|
149
|
+
stage ->(data) { data.upcase }
|
|
150
|
+
|
|
151
|
+
# valid stage
|
|
152
|
+
stage ->(data) { data + '!' }
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
batch.push("hello")
|
|
156
|
+
|
|
157
|
+
results = batch.results
|
|
158
|
+
|
|
159
|
+
assert_equal 1, results.length
|
|
160
|
+
assert_equal "hello", results.first[0] # preserved the original input as the error happened in the first stage
|
|
161
|
+
assert_instance_of NoMethodError, results.first[1]
|
|
162
|
+
|
|
163
|
+
expected_error_message = case RUBY_VERSION
|
|
164
|
+
when /^3.4/
|
|
165
|
+
"undefined method 'foobar' for an instance of String"
|
|
166
|
+
when /^3.3/
|
|
167
|
+
"undefined method `foobar' for an instance of String"
|
|
168
|
+
when /^3.2/
|
|
169
|
+
"undefined method `foobar' for \"hello\":String"
|
|
170
|
+
else
|
|
171
|
+
raise "Unexpected ruby version: #{RUBY_VERSION}"
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
assert_equal expected_error_message, results.first[1].message
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def test_no_method_error_in_mid_lamda_function
|
|
178
|
+
batch = Batch.build do
|
|
179
|
+
# valid stage
|
|
180
|
+
stage ->(data) { data.upcase }
|
|
181
|
+
|
|
182
|
+
# invalid stage - calling invalid method
|
|
183
|
+
stage ->(data) { data.foobar }
|
|
184
|
+
|
|
185
|
+
# valid stage
|
|
186
|
+
stage ->(data) { data + '!' }
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
batch.push("hello")
|
|
190
|
+
|
|
191
|
+
results = batch.results
|
|
192
|
+
|
|
193
|
+
assert_equal 1, results.length
|
|
194
|
+
assert_equal "HELLO", results.first[0]
|
|
195
|
+
assert_instance_of NoMethodError, results.first[1]
|
|
196
|
+
|
|
197
|
+
expected_error_message = case RUBY_VERSION
|
|
198
|
+
when /^3.4/
|
|
199
|
+
"undefined method 'foobar' for an instance of String"
|
|
200
|
+
when /^3.3/
|
|
201
|
+
"undefined method `foobar' for an instance of String"
|
|
202
|
+
when /^3.2/
|
|
203
|
+
"undefined method `foobar' for \"HELLO\":String"
|
|
204
|
+
else
|
|
205
|
+
raise "Unexpected ruby version: #{RUBY_VERSION}"
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
assert_equal expected_error_message, results.first[1].message
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
end
|