simple_flow 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.envrc +1 -0
- data/.github/workflows/deploy-github-pages.yml +52 -0
- data/.rubocop.yml +57 -0
- data/CHANGELOG.md +4 -0
- data/COMMITS.md +196 -0
- data/LICENSE +21 -0
- data/README.md +481 -0
- data/Rakefile +15 -0
- data/benchmarks/parallel_vs_sequential.rb +98 -0
- data/benchmarks/pipeline_overhead.rb +130 -0
- data/docs/api/middleware.md +468 -0
- data/docs/api/parallel-step.md +363 -0
- data/docs/api/pipeline.md +382 -0
- data/docs/api/result.md +375 -0
- data/docs/concurrent/best-practices.md +687 -0
- data/docs/concurrent/introduction.md +246 -0
- data/docs/concurrent/parallel-steps.md +418 -0
- data/docs/concurrent/performance.md +481 -0
- data/docs/core-concepts/flow-control.md +452 -0
- data/docs/core-concepts/middleware.md +389 -0
- data/docs/core-concepts/overview.md +219 -0
- data/docs/core-concepts/pipeline.md +315 -0
- data/docs/core-concepts/result.md +168 -0
- data/docs/core-concepts/steps.md +391 -0
- data/docs/development/benchmarking.md +443 -0
- data/docs/development/contributing.md +380 -0
- data/docs/development/dagwood-concepts.md +435 -0
- data/docs/development/testing.md +514 -0
- data/docs/getting-started/examples.md +197 -0
- data/docs/getting-started/installation.md +62 -0
- data/docs/getting-started/quick-start.md +218 -0
- data/docs/guides/choosing-concurrency-model.md +441 -0
- data/docs/guides/complex-workflows.md +440 -0
- data/docs/guides/data-fetching.md +478 -0
- data/docs/guides/error-handling.md +635 -0
- data/docs/guides/file-processing.md +505 -0
- data/docs/guides/validation-patterns.md +496 -0
- data/docs/index.md +169 -0
- data/examples/.gitignore +3 -0
- data/examples/01_basic_pipeline.rb +112 -0
- data/examples/02_error_handling.rb +178 -0
- data/examples/03_middleware.rb +186 -0
- data/examples/04_parallel_automatic.rb +221 -0
- data/examples/05_parallel_explicit.rb +279 -0
- data/examples/06_real_world_ecommerce.rb +288 -0
- data/examples/07_real_world_etl.rb +277 -0
- data/examples/08_graph_visualization.rb +246 -0
- data/examples/09_pipeline_visualization.rb +266 -0
- data/examples/10_concurrency_control.rb +235 -0
- data/examples/11_sequential_dependencies.rb +243 -0
- data/examples/12_none_constant.rb +161 -0
- data/examples/README.md +374 -0
- data/examples/regression_test/01_basic_pipeline.txt +38 -0
- data/examples/regression_test/02_error_handling.txt +92 -0
- data/examples/regression_test/03_middleware.txt +61 -0
- data/examples/regression_test/04_parallel_automatic.txt +86 -0
- data/examples/regression_test/05_parallel_explicit.txt +80 -0
- data/examples/regression_test/06_real_world_ecommerce.txt +53 -0
- data/examples/regression_test/07_real_world_etl.txt +58 -0
- data/examples/regression_test/08_graph_visualization.txt +429 -0
- data/examples/regression_test/09_pipeline_visualization.txt +305 -0
- data/examples/regression_test/10_concurrency_control.txt +96 -0
- data/examples/regression_test/11_sequential_dependencies.txt +86 -0
- data/examples/regression_test/12_none_constant.txt +64 -0
- data/examples/regression_test.rb +105 -0
- data/lib/simple_flow/dependency_graph.rb +120 -0
- data/lib/simple_flow/dependency_graph_visualizer.rb +326 -0
- data/lib/simple_flow/middleware.rb +36 -0
- data/lib/simple_flow/parallel_executor.rb +80 -0
- data/lib/simple_flow/pipeline.rb +405 -0
- data/lib/simple_flow/result.rb +88 -0
- data/lib/simple_flow/step_tracker.rb +58 -0
- data/lib/simple_flow/version.rb +5 -0
- data/lib/simple_flow.rb +41 -0
- data/mkdocs.yml +146 -0
- data/pipeline_graph.dot +51 -0
- data/pipeline_graph.html +60 -0
- data/pipeline_graph.mmd +19 -0
- metadata +127 -0
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
# Parallel Execution API Reference
|
|
2
|
+
|
|
3
|
+
This document covers the APIs for parallel execution in SimpleFlow, including the ParallelExecutor class and dependency graph management.
|
|
4
|
+
|
|
5
|
+
## Class: `SimpleFlow::ParallelExecutor`
|
|
6
|
+
|
|
7
|
+
**Location**: `/Users/dewayne/sandbox/git_repos/madbomber/simple_flow/lib/simple_flow/parallel_executor.rb`
|
|
8
|
+
|
|
9
|
+
Handles parallel execution of steps using the async gem, with automatic fallback to sequential execution.
|
|
10
|
+
|
|
11
|
+
### Class Methods
|
|
12
|
+
|
|
13
|
+
#### `execute_parallel(steps, result)`
|
|
14
|
+
|
|
15
|
+
Executes a group of steps in parallel.
|
|
16
|
+
|
|
17
|
+
**Parameters:**
|
|
18
|
+
- `steps` (Array<Proc>) - Array of callable steps
|
|
19
|
+
- `result` (Result) - The input result to pass to each step
|
|
20
|
+
|
|
21
|
+
**Returns:** Array<Result> - Results from each step
|
|
22
|
+
|
|
23
|
+
**Behavior:**
|
|
24
|
+
- Uses async gem for true parallel execution if available
|
|
25
|
+
- Falls back to sequential execution if async is not available
|
|
26
|
+
- Each step receives the same input result
|
|
27
|
+
- Returns array of results in same order as input steps
|
|
28
|
+
|
|
29
|
+
**Example:**
|
|
30
|
+
```ruby
|
|
31
|
+
steps = [
|
|
32
|
+
->(result) { result.with_context(:a, "data_a").continue(result.value) },
|
|
33
|
+
->(result) { result.with_context(:b, "data_b").continue(result.value) },
|
|
34
|
+
->(result) { result.with_context(:c, "data_c").continue(result.value) }
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
initial = SimpleFlow::Result.new(123)
|
|
38
|
+
results = SimpleFlow::ParallelExecutor.execute_parallel(steps, initial)
|
|
39
|
+
|
|
40
|
+
results.size # => 3
|
|
41
|
+
results[0].context[:a] # => "data_a"
|
|
42
|
+
results[1].context[:b] # => "data_b"
|
|
43
|
+
results[2].context[:c] # => "data_c"
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
#### `execute_sequential(steps, result)`
|
|
47
|
+
|
|
48
|
+
Executes steps sequentially (fallback implementation).
|
|
49
|
+
|
|
50
|
+
**Parameters:**
|
|
51
|
+
- `steps` (Array<Proc>) - Array of callable steps
|
|
52
|
+
- `result` (Result) - The input result
|
|
53
|
+
|
|
54
|
+
**Returns:** Array<Result>
|
|
55
|
+
|
|
56
|
+
**Example:**
|
|
57
|
+
```ruby
|
|
58
|
+
results = SimpleFlow::ParallelExecutor.execute_sequential(steps, initial)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
#### `async_available?`
|
|
62
|
+
|
|
63
|
+
Checks if the async gem is available.
|
|
64
|
+
|
|
65
|
+
**Returns:** Boolean
|
|
66
|
+
|
|
67
|
+
**Example:**
|
|
68
|
+
```ruby
|
|
69
|
+
if SimpleFlow::ParallelExecutor.async_available?
|
|
70
|
+
puts "Async gem is installed - true parallel execution enabled"
|
|
71
|
+
else
|
|
72
|
+
puts "Async gem not found - will use sequential fallback"
|
|
73
|
+
end
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Implementation Details
|
|
77
|
+
|
|
78
|
+
#### Async Integration
|
|
79
|
+
|
|
80
|
+
When async gem is available:
|
|
81
|
+
```ruby
|
|
82
|
+
# Uses Async::Barrier for concurrent execution
|
|
83
|
+
Async do
|
|
84
|
+
barrier = Async::Barrier.new
|
|
85
|
+
tasks = []
|
|
86
|
+
|
|
87
|
+
steps.each do |step|
|
|
88
|
+
tasks << barrier.async do
|
|
89
|
+
step.call(result)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
barrier.wait
|
|
94
|
+
results = tasks.map(&:result)
|
|
95
|
+
end
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
#### Sequential Fallback
|
|
99
|
+
|
|
100
|
+
When async is not available:
|
|
101
|
+
```ruby
|
|
102
|
+
steps.map { |step| step.call(result) }
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Class: `SimpleFlow::DependencyGraph`
|
|
106
|
+
|
|
107
|
+
**Location**: `/Users/dewayne/sandbox/git_repos/madbomber/simple_flow/lib/simple_flow/dependency_graph.rb`
|
|
108
|
+
|
|
109
|
+
Manages dependencies between pipeline steps and determines which steps can execute in parallel.
|
|
110
|
+
|
|
111
|
+
### Constructor
|
|
112
|
+
|
|
113
|
+
#### `new(dependencies)`
|
|
114
|
+
|
|
115
|
+
Creates a new dependency graph.
|
|
116
|
+
|
|
117
|
+
**Parameters:**
|
|
118
|
+
- `dependencies` (Hash) - Hash mapping step names to their dependencies
|
|
119
|
+
|
|
120
|
+
**Example:**
|
|
121
|
+
```ruby
|
|
122
|
+
graph = SimpleFlow::DependencyGraph.new(
|
|
123
|
+
fetch_user: [],
|
|
124
|
+
fetch_orders: [:fetch_user],
|
|
125
|
+
fetch_products: [:fetch_user],
|
|
126
|
+
calculate_total: [:fetch_orders, :fetch_products]
|
|
127
|
+
)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Instance Methods
|
|
131
|
+
|
|
132
|
+
#### `order`
|
|
133
|
+
|
|
134
|
+
Returns steps in topological order (dependencies first).
|
|
135
|
+
|
|
136
|
+
**Returns:** Array - Ordered list of step names
|
|
137
|
+
|
|
138
|
+
**Example:**
|
|
139
|
+
```ruby
|
|
140
|
+
graph.order
|
|
141
|
+
# => [:fetch_user, :fetch_orders, :fetch_products, :calculate_total]
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
#### `reverse_order`
|
|
145
|
+
|
|
146
|
+
Returns steps in reverse topological order.
|
|
147
|
+
|
|
148
|
+
**Returns:** Array
|
|
149
|
+
|
|
150
|
+
**Example:**
|
|
151
|
+
```ruby
|
|
152
|
+
graph.reverse_order
|
|
153
|
+
# => [:calculate_total, :fetch_products, :fetch_orders, :fetch_user]
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
#### `parallel_order`
|
|
157
|
+
|
|
158
|
+
Groups steps that can be executed in parallel.
|
|
159
|
+
|
|
160
|
+
**Returns:** Array<Array> - Array of groups, where each group can run in parallel
|
|
161
|
+
|
|
162
|
+
**Algorithm:**
|
|
163
|
+
Steps can run in parallel if:
|
|
164
|
+
1. They have the exact same dependencies, OR
|
|
165
|
+
2. All of a step's dependencies have been resolved in previous groups
|
|
166
|
+
|
|
167
|
+
**Example:**
|
|
168
|
+
```ruby
|
|
169
|
+
graph = SimpleFlow::DependencyGraph.new(
|
|
170
|
+
step_a: [],
|
|
171
|
+
step_b: [:step_a],
|
|
172
|
+
step_c: [:step_a],
|
|
173
|
+
step_d: [:step_b, :step_c]
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
graph.parallel_order
|
|
177
|
+
# => [
|
|
178
|
+
# [:step_a], # Group 1: step_a (no dependencies)
|
|
179
|
+
# [:step_b, :step_c], # Group 2: parallel (both depend only on step_a)
|
|
180
|
+
# [:step_d] # Group 3: step_d (waits for step_b and step_c)
|
|
181
|
+
# ]
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
#### `subgraph(node)`
|
|
185
|
+
|
|
186
|
+
Generates a subgraph starting at the given node.
|
|
187
|
+
|
|
188
|
+
**Parameters:**
|
|
189
|
+
- `node` (Symbol) - The starting node
|
|
190
|
+
|
|
191
|
+
**Returns:** DependencyGraph - New graph containing only the node and its dependencies
|
|
192
|
+
|
|
193
|
+
**Example:**
|
|
194
|
+
```ruby
|
|
195
|
+
graph = SimpleFlow::DependencyGraph.new(
|
|
196
|
+
step_a: [],
|
|
197
|
+
step_b: [:step_a],
|
|
198
|
+
step_c: [:step_b]
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
subgraph = graph.subgraph(:step_c)
|
|
202
|
+
subgraph.dependencies
|
|
203
|
+
# => { step_c: [:step_b], step_b: [:step_a], step_a: [] }
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
#### `merge(other)`
|
|
207
|
+
|
|
208
|
+
Merges this graph with another graph.
|
|
209
|
+
|
|
210
|
+
**Parameters:**
|
|
211
|
+
- `other` (DependencyGraph) - Another dependency graph
|
|
212
|
+
|
|
213
|
+
**Returns:** DependencyGraph - New merged graph
|
|
214
|
+
|
|
215
|
+
**Behavior:**
|
|
216
|
+
- Combines all dependencies from both graphs
|
|
217
|
+
- If both graphs depend on the same item, uses the union of dependencies
|
|
218
|
+
|
|
219
|
+
**Example:**
|
|
220
|
+
```ruby
|
|
221
|
+
graph1 = SimpleFlow::DependencyGraph.new(
|
|
222
|
+
step_a: [],
|
|
223
|
+
step_b: [:step_a]
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
graph2 = SimpleFlow::DependencyGraph.new(
|
|
227
|
+
step_c: [],
|
|
228
|
+
step_b: [:step_c] # Different dependency for step_b
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
merged = graph1.merge(graph2)
|
|
232
|
+
merged.dependencies[:step_b]
|
|
233
|
+
# => [:step_a, :step_c] # Union of dependencies
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### Instance Attributes
|
|
237
|
+
|
|
238
|
+
#### `dependencies`
|
|
239
|
+
|
|
240
|
+
Hash of dependencies (read-only).
|
|
241
|
+
|
|
242
|
+
**Type:** Hash
|
|
243
|
+
|
|
244
|
+
**Example:**
|
|
245
|
+
```ruby
|
|
246
|
+
graph.dependencies
|
|
247
|
+
# => {
|
|
248
|
+
# fetch_user: [],
|
|
249
|
+
# fetch_orders: [:fetch_user],
|
|
250
|
+
# fetch_products: [:fetch_user],
|
|
251
|
+
# calculate_total: [:fetch_orders, :fetch_products]
|
|
252
|
+
# }
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
## Class: `SimpleFlow::Pipeline::ParallelBlock`
|
|
256
|
+
|
|
257
|
+
Internal helper class for building parallel blocks.
|
|
258
|
+
|
|
259
|
+
### Methods
|
|
260
|
+
|
|
261
|
+
#### `step(name_or_callable = nil, callable = nil, depends_on: [], &block)`
|
|
262
|
+
|
|
263
|
+
Adds a step to the parallel block.
|
|
264
|
+
|
|
265
|
+
**Note:** This is used internally by the Pipeline DSL.
|
|
266
|
+
|
|
267
|
+
## Usage Examples
|
|
268
|
+
|
|
269
|
+
### Direct ParallelExecutor Usage
|
|
270
|
+
|
|
271
|
+
```ruby
|
|
272
|
+
steps = [
|
|
273
|
+
->(result) {
|
|
274
|
+
data = fetch_from_api_a(result.value)
|
|
275
|
+
result.with_context(:api_a, data).continue(result.value)
|
|
276
|
+
},
|
|
277
|
+
->(result) {
|
|
278
|
+
data = fetch_from_api_b(result.value)
|
|
279
|
+
result.with_context(:api_b, data).continue(result.value)
|
|
280
|
+
},
|
|
281
|
+
->(result) {
|
|
282
|
+
data = fetch_from_cache(result.value)
|
|
283
|
+
result.with_context(:cache, data).continue(result.value)
|
|
284
|
+
}
|
|
285
|
+
]
|
|
286
|
+
|
|
287
|
+
initial = SimpleFlow::Result.new(request_id)
|
|
288
|
+
results = SimpleFlow::ParallelExecutor.execute_parallel(steps, initial)
|
|
289
|
+
|
|
290
|
+
# Merge contexts
|
|
291
|
+
merged_context = results.reduce({}) do |acc, r|
|
|
292
|
+
acc.merge(r.context)
|
|
293
|
+
end
|
|
294
|
+
# => { api_a: ..., api_b: ..., cache: ... }
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Dependency Graph Analysis
|
|
298
|
+
|
|
299
|
+
```ruby
|
|
300
|
+
# Define dependencies
|
|
301
|
+
dependencies = {
|
|
302
|
+
validate_input: [],
|
|
303
|
+
check_inventory: [:validate_input],
|
|
304
|
+
check_pricing: [:validate_input],
|
|
305
|
+
check_shipping: [:validate_input],
|
|
306
|
+
calculate_discount: [:check_inventory, :check_pricing],
|
|
307
|
+
finalize_order: [:calculate_discount, :check_shipping]
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
graph = SimpleFlow::DependencyGraph.new(dependencies)
|
|
311
|
+
|
|
312
|
+
# Analyze execution order
|
|
313
|
+
puts "Sequential order:"
|
|
314
|
+
puts graph.order.join(' -> ')
|
|
315
|
+
# => validate_input -> check_inventory -> check_pricing -> check_shipping -> calculate_discount -> finalize_order
|
|
316
|
+
|
|
317
|
+
puts "\nParallel execution groups:"
|
|
318
|
+
graph.parallel_order.each_with_index do |group, index|
|
|
319
|
+
puts "Group #{index + 1}: #{group.join(', ')}"
|
|
320
|
+
end
|
|
321
|
+
# => Group 1: validate_input
|
|
322
|
+
# => Group 2: check_inventory, check_pricing, check_shipping
|
|
323
|
+
# => Group 3: calculate_discount
|
|
324
|
+
# => Group 4: finalize_order
|
|
325
|
+
|
|
326
|
+
# Calculate potential speedup
|
|
327
|
+
total_steps = graph.order.size
|
|
328
|
+
total_groups = graph.parallel_order.size
|
|
329
|
+
puts "\nPotential speedup: #{total_steps.to_f / total_groups}x"
|
|
330
|
+
# => Potential speedup: 1.5x
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
### Installing Async Gem
|
|
334
|
+
|
|
335
|
+
Add to your Gemfile:
|
|
336
|
+
```ruby
|
|
337
|
+
gem 'async', '~> 2.0'
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
Then run:
|
|
341
|
+
```bash
|
|
342
|
+
bundle install
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
### Checking Async Availability
|
|
346
|
+
|
|
347
|
+
```ruby
|
|
348
|
+
# In your application
|
|
349
|
+
if SimpleFlow::ParallelExecutor.async_available?
|
|
350
|
+
puts "Parallel execution enabled"
|
|
351
|
+
puts "Using async gem for true concurrency"
|
|
352
|
+
else
|
|
353
|
+
puts "Parallel execution disabled"
|
|
354
|
+
puts "Add 'async' gem to Gemfile for parallel support"
|
|
355
|
+
end
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
## Related Documentation
|
|
359
|
+
|
|
360
|
+
- [Pipeline API](pipeline.md) - Pipeline class reference
|
|
361
|
+
- [Parallel Steps Guide](../concurrent/parallel-steps.md) - Using parallel execution
|
|
362
|
+
- [Performance Guide](../concurrent/performance.md) - Performance characteristics
|
|
363
|
+
- [Best Practices](../concurrent/best-practices.md) - Concurrent execution best practices
|