simple_flow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.envrc +1 -0
  3. data/.github/workflows/deploy-github-pages.yml +52 -0
  4. data/.rubocop.yml +57 -0
  5. data/CHANGELOG.md +4 -0
  6. data/COMMITS.md +196 -0
  7. data/LICENSE +21 -0
  8. data/README.md +481 -0
  9. data/Rakefile +15 -0
  10. data/benchmarks/parallel_vs_sequential.rb +98 -0
  11. data/benchmarks/pipeline_overhead.rb +130 -0
  12. data/docs/api/middleware.md +468 -0
  13. data/docs/api/parallel-step.md +363 -0
  14. data/docs/api/pipeline.md +382 -0
  15. data/docs/api/result.md +375 -0
  16. data/docs/concurrent/best-practices.md +687 -0
  17. data/docs/concurrent/introduction.md +246 -0
  18. data/docs/concurrent/parallel-steps.md +418 -0
  19. data/docs/concurrent/performance.md +481 -0
  20. data/docs/core-concepts/flow-control.md +452 -0
  21. data/docs/core-concepts/middleware.md +389 -0
  22. data/docs/core-concepts/overview.md +219 -0
  23. data/docs/core-concepts/pipeline.md +315 -0
  24. data/docs/core-concepts/result.md +168 -0
  25. data/docs/core-concepts/steps.md +391 -0
  26. data/docs/development/benchmarking.md +443 -0
  27. data/docs/development/contributing.md +380 -0
  28. data/docs/development/dagwood-concepts.md +435 -0
  29. data/docs/development/testing.md +514 -0
  30. data/docs/getting-started/examples.md +197 -0
  31. data/docs/getting-started/installation.md +62 -0
  32. data/docs/getting-started/quick-start.md +218 -0
  33. data/docs/guides/choosing-concurrency-model.md +441 -0
  34. data/docs/guides/complex-workflows.md +440 -0
  35. data/docs/guides/data-fetching.md +478 -0
  36. data/docs/guides/error-handling.md +635 -0
  37. data/docs/guides/file-processing.md +505 -0
  38. data/docs/guides/validation-patterns.md +496 -0
  39. data/docs/index.md +169 -0
  40. data/examples/.gitignore +3 -0
  41. data/examples/01_basic_pipeline.rb +112 -0
  42. data/examples/02_error_handling.rb +178 -0
  43. data/examples/03_middleware.rb +186 -0
  44. data/examples/04_parallel_automatic.rb +221 -0
  45. data/examples/05_parallel_explicit.rb +279 -0
  46. data/examples/06_real_world_ecommerce.rb +288 -0
  47. data/examples/07_real_world_etl.rb +277 -0
  48. data/examples/08_graph_visualization.rb +246 -0
  49. data/examples/09_pipeline_visualization.rb +266 -0
  50. data/examples/10_concurrency_control.rb +235 -0
  51. data/examples/11_sequential_dependencies.rb +243 -0
  52. data/examples/12_none_constant.rb +161 -0
  53. data/examples/README.md +374 -0
  54. data/examples/regression_test/01_basic_pipeline.txt +38 -0
  55. data/examples/regression_test/02_error_handling.txt +92 -0
  56. data/examples/regression_test/03_middleware.txt +61 -0
  57. data/examples/regression_test/04_parallel_automatic.txt +86 -0
  58. data/examples/regression_test/05_parallel_explicit.txt +80 -0
  59. data/examples/regression_test/06_real_world_ecommerce.txt +53 -0
  60. data/examples/regression_test/07_real_world_etl.txt +58 -0
  61. data/examples/regression_test/08_graph_visualization.txt +429 -0
  62. data/examples/regression_test/09_pipeline_visualization.txt +305 -0
  63. data/examples/regression_test/10_concurrency_control.txt +96 -0
  64. data/examples/regression_test/11_sequential_dependencies.txt +86 -0
  65. data/examples/regression_test/12_none_constant.txt +64 -0
  66. data/examples/regression_test.rb +105 -0
  67. data/lib/simple_flow/dependency_graph.rb +120 -0
  68. data/lib/simple_flow/dependency_graph_visualizer.rb +326 -0
  69. data/lib/simple_flow/middleware.rb +36 -0
  70. data/lib/simple_flow/parallel_executor.rb +80 -0
  71. data/lib/simple_flow/pipeline.rb +405 -0
  72. data/lib/simple_flow/result.rb +88 -0
  73. data/lib/simple_flow/step_tracker.rb +58 -0
  74. data/lib/simple_flow/version.rb +5 -0
  75. data/lib/simple_flow.rb +41 -0
  76. data/mkdocs.yml +146 -0
  77. data/pipeline_graph.dot +51 -0
  78. data/pipeline_graph.html +60 -0
  79. data/pipeline_graph.mmd +19 -0
  80. metadata +127 -0
@@ -0,0 +1,443 @@
1
+ # Benchmarking Guide
2
+
3
+ This guide explains how to benchmark SimpleFlow pipelines and measure performance improvements.
4
+
5
+ ## Running Benchmarks
6
+
7
+ ### Basic Benchmark
8
+
9
+ ```ruby
10
+ require 'benchmark'
11
+ require_relative '../lib/simple_flow'
12
+
13
+ # Create pipeline
14
+ pipeline = SimpleFlow::Pipeline.new do
15
+ step ->(result) {
16
+ sleep 0.1 # Simulate I/O
17
+ result.continue(result.value + 1)
18
+ }
19
+
20
+ step ->(result) {
21
+ sleep 0.1 # Simulate I/O
22
+ result.continue(result.value * 2)
23
+ }
24
+ end
25
+
26
+ # Benchmark execution
27
+ initial = SimpleFlow::Result.new(5)
28
+
29
+ time = Benchmark.realtime do
30
+ pipeline.call(initial)
31
+ end
32
+
33
+ puts "Execution time: #{(time * 1000).round(2)}ms"
34
+ ```
35
+
36
+ ### Parallel vs Sequential Comparison
37
+
38
+ ```ruby
39
+ require 'benchmark'
40
+ require_relative '../lib/simple_flow'
41
+
42
+ # Sequential pipeline
43
+ sequential = SimpleFlow::Pipeline.new do
44
+ step ->(result) { sleep 0.1; result.continue(result.value) }
45
+ step ->(result) { sleep 0.1; result.continue(result.value) }
46
+ step ->(result) { sleep 0.1; result.continue(result.value) }
47
+ step ->(result) { sleep 0.1; result.continue(result.value) }
48
+ end
49
+
50
+ # Parallel pipeline
51
+ parallel = SimpleFlow::Pipeline.new do
52
+ step :step_a, ->(result) {
53
+ sleep 0.1
54
+ result.with_context(:a, true).continue(result.value)
55
+ }, depends_on: []
56
+
57
+ step :step_b, ->(result) {
58
+ sleep 0.1
59
+ result.with_context(:b, true).continue(result.value)
60
+ }, depends_on: []
61
+
62
+ step :step_c, ->(result) {
63
+ sleep 0.1
64
+ result.with_context(:c, true).continue(result.value)
65
+ }, depends_on: []
66
+
67
+ step :step_d, ->(result) {
68
+ sleep 0.1
69
+ result.with_context(:d, true).continue(result.value)
70
+ }, depends_on: []
71
+ end
72
+
73
+ initial = SimpleFlow::Result.new(nil)
74
+
75
+ puts "Running benchmarks..."
76
+ puts "=" * 60
77
+
78
+ sequential_time = Benchmark.realtime do
79
+ sequential.call(initial)
80
+ end
81
+
82
+ parallel_time = Benchmark.realtime do
83
+ parallel.call_parallel(initial)
84
+ end
85
+
86
+ puts "Sequential: #{(sequential_time * 1000).round(2)}ms"
87
+ puts "Parallel: #{(parallel_time * 1000).round(2)}ms"
88
+ puts "Speedup: #{(sequential_time / parallel_time).round(2)}x"
89
+ ```
90
+
91
+ Expected output:
92
+ ```
93
+ Running benchmarks...
94
+ ============================================================
95
+ Sequential: 401.23ms
96
+ Parallel: 102.45ms
97
+ Speedup: 3.92x
98
+ ```
99
+
100
+ ## Benchmarking Patterns
101
+
102
+ ### Memory Usage
103
+
104
+ ```ruby
105
+ require 'benchmark'
106
+ require 'objspace'
107
+
108
+ def measure_memory
109
+ GC.start
110
+ before = ObjectSpace.memsize_of_all
111
+ yield
112
+ GC.start
113
+ after = ObjectSpace.memsize_of_all
114
+ (after - before) / 1024.0 / 1024.0 # MB
115
+ end
116
+
117
+ pipeline = SimpleFlow::Pipeline.new do
118
+ step ->(result) {
119
+ large_data = Array.new(10000) { |i| { id: i, data: "x" * 100 } }
120
+ result.with_context(:data, large_data).continue(result.value)
121
+ }
122
+ end
123
+
124
+ memory_used = measure_memory do
125
+ pipeline.call(SimpleFlow::Result.new(nil))
126
+ end
127
+
128
+ puts "Memory used: #{memory_used.round(2)} MB"
129
+ ```
130
+
131
+ ### Throughput Testing
132
+
133
+ ```ruby
134
+ require 'benchmark'
135
+
136
+ def measure_throughput(pipeline, iterations: 1000)
137
+ start = Time.now
138
+
139
+ iterations.times do |i|
140
+ pipeline.call(SimpleFlow::Result.new(i))
141
+ end
142
+
143
+ duration = Time.now - start
144
+ throughput = iterations / duration
145
+
146
+ {
147
+ duration: duration,
148
+ throughput: throughput,
149
+ avg_time: duration / iterations
150
+ }
151
+ end
152
+
153
+ pipeline = SimpleFlow::Pipeline.new do
154
+ step ->(result) { result.continue(result.value * 2) }
155
+ step ->(result) { result.continue(result.value + 10) }
156
+ end
157
+
158
+ stats = measure_throughput(pipeline, iterations: 10000)
159
+
160
+ puts "Total time: #{stats[:duration].round(2)}s"
161
+ puts "Throughput: #{stats[:throughput].round(2)} ops/sec"
162
+ puts "Average time per operation: #{(stats[:avg_time] * 1000).round(4)}ms"
163
+ ```
164
+
165
+ ### Middleware Overhead
166
+
167
+ ```ruby
168
+ require 'benchmark'
169
+
170
+ # Pipeline without middleware
171
+ plain_pipeline = SimpleFlow::Pipeline.new do
172
+ step ->(result) { result.continue(result.value + 1) }
173
+ step ->(result) { result.continue(result.value * 2) }
174
+ end
175
+
176
+ # Pipeline with middleware
177
+ middleware_pipeline = SimpleFlow::Pipeline.new do
178
+ use_middleware SimpleFlow::MiddleWare::Logging
179
+ use_middleware SimpleFlow::MiddleWare::Instrumentation, api_key: 'test'
180
+
181
+ step ->(result) { result.continue(result.value + 1) }
182
+ step ->(result) { result.continue(result.value * 2) }
183
+ end
184
+
185
+ iterations = 1000
186
+ initial = SimpleFlow::Result.new(5)
187
+
188
+ plain_time = Benchmark.realtime do
189
+ iterations.times { plain_pipeline.call(initial) }
190
+ end
191
+
192
+ middleware_time = Benchmark.realtime do
193
+ iterations.times { middleware_pipeline.call(initial) }
194
+ end
195
+
196
+ overhead = ((middleware_time - plain_time) / plain_time * 100)
197
+
198
+ puts "Plain pipeline: #{(plain_time * 1000).round(2)}ms for #{iterations} iterations"
199
+ puts "With middleware: #{(middleware_time * 1000).round(2)}ms for #{iterations} iterations"
200
+ puts "Middleware overhead: #{overhead.round(2)}%"
201
+ ```
202
+
203
+ ## Benchmark Suite
204
+
205
+ Create a comprehensive benchmark suite:
206
+
207
+ ```ruby
208
+ #!/usr/bin/env ruby
209
+ # benchmark/suite.rb
210
+
211
+ require 'benchmark'
212
+ require_relative '../lib/simple_flow'
213
+
214
+ class BenchmarkSuite
215
+ def initialize
216
+ @results = {}
217
+ end
218
+
219
+ def run_all
220
+ puts "SimpleFlow Benchmark Suite"
221
+ puts "=" * 60
222
+ puts
223
+
224
+ benchmark_sequential_pipeline
225
+ benchmark_parallel_pipeline
226
+ benchmark_middleware_overhead
227
+ benchmark_context_merging
228
+ benchmark_error_handling
229
+
230
+ print_summary
231
+ end
232
+
233
+ private
234
+
235
+ def benchmark_sequential_pipeline
236
+ pipeline = SimpleFlow::Pipeline.new do
237
+ 10.times do
238
+ step ->(result) { result.continue(result.value + 1) }
239
+ end
240
+ end
241
+
242
+ time = Benchmark.realtime do
243
+ 100.times { pipeline.call(SimpleFlow::Result.new(0)) }
244
+ end
245
+
246
+ @results[:sequential] = time
247
+ puts "Sequential (10 steps, 100 iterations): #{(time * 1000).round(2)}ms"
248
+ end
249
+
250
+ def benchmark_parallel_pipeline
251
+ return unless SimpleFlow::Pipeline.new.async_available?
252
+
253
+ pipeline = SimpleFlow::Pipeline.new do
254
+ 10.times do |i|
255
+ step "step_#{i}".to_sym, ->(result) {
256
+ result.with_context("step_#{i}".to_sym, true).continue(result.value)
257
+ }, depends_on: []
258
+ end
259
+ end
260
+
261
+ time = Benchmark.realtime do
262
+ 100.times { pipeline.call_parallel(SimpleFlow::Result.new(0)) }
263
+ end
264
+
265
+ @results[:parallel] = time
266
+ puts "Parallel (10 steps, 100 iterations): #{(time * 1000).round(2)}ms"
267
+ end
268
+
269
+ def benchmark_middleware_overhead
270
+ pipeline = SimpleFlow::Pipeline.new do
271
+ use_middleware SimpleFlow::MiddleWare::Logging
272
+ step ->(result) { result.continue(result.value) }
273
+ end
274
+
275
+ time = Benchmark.realtime do
276
+ 100.times { pipeline.call(SimpleFlow::Result.new(0)) }
277
+ end
278
+
279
+ @results[:middleware] = time
280
+ puts "Middleware overhead (100 iterations): #{(time * 1000).round(2)}ms"
281
+ end
282
+
283
+ def benchmark_context_merging
284
+ pipeline = SimpleFlow::Pipeline.new do
285
+ step ->(result) {
286
+ result
287
+ .with_context(:key1, "value1")
288
+ .with_context(:key2, "value2")
289
+ .with_context(:key3, "value3")
290
+ .continue(result.value)
291
+ }
292
+ end
293
+
294
+ time = Benchmark.realtime do
295
+ 1000.times { pipeline.call(SimpleFlow::Result.new(0)) }
296
+ end
297
+
298
+ @results[:context_merging] = time
299
+ puts "Context merging (1000 iterations): #{(time * 1000).round(2)}ms"
300
+ end
301
+
302
+ def benchmark_error_handling
303
+ pipeline = SimpleFlow::Pipeline.new do
304
+ step ->(result) {
305
+ result
306
+ .with_error(:validation, "Error 1")
307
+ .with_error(:validation, "Error 2")
308
+ .halt
309
+ }
310
+ end
311
+
312
+ time = Benchmark.realtime do
313
+ 1000.times { pipeline.call(SimpleFlow::Result.new(0)) }
314
+ end
315
+
316
+ @results[:error_handling] = time
317
+ puts "Error handling (1000 iterations): #{(time * 1000).round(2)}ms"
318
+ end
319
+
320
+ def print_summary
321
+ puts
322
+ puts "=" * 60
323
+ puts "Summary"
324
+ puts "=" * 60
325
+
326
+ @results.each do |name, time|
327
+ puts "#{name.to_s.ljust(20)}: #{(time * 1000).round(2)}ms"
328
+ end
329
+ end
330
+ end
331
+
332
+ BenchmarkSuite.new.run_all
333
+ ```
334
+
335
+ Run the suite:
336
+ ```bash
337
+ ruby benchmark/suite.rb
338
+ ```
339
+
340
+ ## Profiling
341
+
342
+ ### Using Ruby's Profiler
343
+
344
+ ```ruby
345
+ require 'profile'
346
+ require_relative '../lib/simple_flow'
347
+
348
+ pipeline = SimpleFlow::Pipeline.new do
349
+ step ->(result) { result.continue(result.value + 1) }
350
+ step ->(result) { result.continue(result.value * 2) }
351
+ end
352
+
353
+ 100.times { pipeline.call(SimpleFlow::Result.new(5)) }
354
+ ```
355
+
356
+ ### Using ruby-prof
357
+
358
+ ```ruby
359
+ require 'ruby-prof'
360
+ require_relative '../lib/simple_flow'
361
+
362
+ pipeline = SimpleFlow::Pipeline.new do
363
+ step ->(result) { result.continue(result.value + 1) }
364
+ step ->(result) { result.continue(result.value * 2) }
365
+ end
366
+
367
+ RubyProf.start
368
+
369
+ 1000.times { pipeline.call(SimpleFlow::Result.new(5)) }
370
+
371
+ result = RubyProf.stop
372
+
373
+ # Print a flat profile to text
374
+ printer = RubyProf::FlatPrinter.new(result)
375
+ printer.print($stdout)
376
+ ```
377
+
378
+ ## Performance Tips
379
+
380
+ ### 1. Minimize Context Size
381
+
382
+ ```ruby
383
+ # SLOW: Large context objects
384
+ step ->(result) {
385
+ large_data = load_all_users # 10,000 records
386
+ result.with_context(:users, large_data).continue(result.value)
387
+ }
388
+
389
+ # FAST: Only essential data
390
+ step ->(result) {
391
+ users = load_all_users
392
+ user_ids = users.map(&:id)
393
+ result.with_context(:user_ids, user_ids).continue(result.value)
394
+ }
395
+ ```
396
+
397
+ ### 2. Use Parallel Execution for I/O
398
+
399
+ ```ruby
400
+ # SLOW: Sequential I/O
401
+ pipeline = SimpleFlow::Pipeline.new do
402
+ step ->(result) { result.with_context(:a, fetch_api_a).continue(result.value) }
403
+ step ->(result) { result.with_context(:b, fetch_api_b).continue(result.value) }
404
+ step ->(result) { result.with_context(:c, fetch_api_c).continue(result.value) }
405
+ end
406
+
407
+ # FAST: Parallel I/O
408
+ pipeline = SimpleFlow::Pipeline.new do
409
+ step :fetch_a, ->(result) {
410
+ result.with_context(:a, fetch_api_a).continue(result.value)
411
+ }, depends_on: []
412
+
413
+ step :fetch_b, ->(result) {
414
+ result.with_context(:b, fetch_api_b).continue(result.value)
415
+ }, depends_on: []
416
+
417
+ step :fetch_c, ->(result) {
418
+ result.with_context(:c, fetch_api_c).continue(result.value)
419
+ }, depends_on: []
420
+ end
421
+ ```
422
+
423
+ ### 3. Avoid Unnecessary Steps
424
+
425
+ ```ruby
426
+ # SLOW: Too many fine-grained steps
427
+ pipeline = SimpleFlow::Pipeline.new do
428
+ step ->(result) { result.continue(result.value + 1) }
429
+ step ->(result) { result.continue(result.value + 1) }
430
+ step ->(result) { result.continue(result.value + 1) }
431
+ end
432
+
433
+ # FAST: Combine simple operations
434
+ pipeline = SimpleFlow::Pipeline.new do
435
+ step ->(result) { result.continue(result.value + 3) }
436
+ end
437
+ ```
438
+
439
+ ## Related Documentation
440
+
441
+ - [Testing Guide](testing.md) - Writing tests
442
+ - [Performance Guide](../concurrent/performance.md) - Performance characteristics
443
+ - [Contributing Guide](contributing.md) - Contributing to SimpleFlow