simple_flow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.envrc +1 -0
  3. data/.github/workflows/deploy-github-pages.yml +52 -0
  4. data/.rubocop.yml +57 -0
  5. data/CHANGELOG.md +4 -0
  6. data/COMMITS.md +196 -0
  7. data/LICENSE +21 -0
  8. data/README.md +481 -0
  9. data/Rakefile +15 -0
  10. data/benchmarks/parallel_vs_sequential.rb +98 -0
  11. data/benchmarks/pipeline_overhead.rb +130 -0
  12. data/docs/api/middleware.md +468 -0
  13. data/docs/api/parallel-step.md +363 -0
  14. data/docs/api/pipeline.md +382 -0
  15. data/docs/api/result.md +375 -0
  16. data/docs/concurrent/best-practices.md +687 -0
  17. data/docs/concurrent/introduction.md +246 -0
  18. data/docs/concurrent/parallel-steps.md +418 -0
  19. data/docs/concurrent/performance.md +481 -0
  20. data/docs/core-concepts/flow-control.md +452 -0
  21. data/docs/core-concepts/middleware.md +389 -0
  22. data/docs/core-concepts/overview.md +219 -0
  23. data/docs/core-concepts/pipeline.md +315 -0
  24. data/docs/core-concepts/result.md +168 -0
  25. data/docs/core-concepts/steps.md +391 -0
  26. data/docs/development/benchmarking.md +443 -0
  27. data/docs/development/contributing.md +380 -0
  28. data/docs/development/dagwood-concepts.md +435 -0
  29. data/docs/development/testing.md +514 -0
  30. data/docs/getting-started/examples.md +197 -0
  31. data/docs/getting-started/installation.md +62 -0
  32. data/docs/getting-started/quick-start.md +218 -0
  33. data/docs/guides/choosing-concurrency-model.md +441 -0
  34. data/docs/guides/complex-workflows.md +440 -0
  35. data/docs/guides/data-fetching.md +478 -0
  36. data/docs/guides/error-handling.md +635 -0
  37. data/docs/guides/file-processing.md +505 -0
  38. data/docs/guides/validation-patterns.md +496 -0
  39. data/docs/index.md +169 -0
  40. data/examples/.gitignore +3 -0
  41. data/examples/01_basic_pipeline.rb +112 -0
  42. data/examples/02_error_handling.rb +178 -0
  43. data/examples/03_middleware.rb +186 -0
  44. data/examples/04_parallel_automatic.rb +221 -0
  45. data/examples/05_parallel_explicit.rb +279 -0
  46. data/examples/06_real_world_ecommerce.rb +288 -0
  47. data/examples/07_real_world_etl.rb +277 -0
  48. data/examples/08_graph_visualization.rb +246 -0
  49. data/examples/09_pipeline_visualization.rb +266 -0
  50. data/examples/10_concurrency_control.rb +235 -0
  51. data/examples/11_sequential_dependencies.rb +243 -0
  52. data/examples/12_none_constant.rb +161 -0
  53. data/examples/README.md +374 -0
  54. data/examples/regression_test/01_basic_pipeline.txt +38 -0
  55. data/examples/regression_test/02_error_handling.txt +92 -0
  56. data/examples/regression_test/03_middleware.txt +61 -0
  57. data/examples/regression_test/04_parallel_automatic.txt +86 -0
  58. data/examples/regression_test/05_parallel_explicit.txt +80 -0
  59. data/examples/regression_test/06_real_world_ecommerce.txt +53 -0
  60. data/examples/regression_test/07_real_world_etl.txt +58 -0
  61. data/examples/regression_test/08_graph_visualization.txt +429 -0
  62. data/examples/regression_test/09_pipeline_visualization.txt +305 -0
  63. data/examples/regression_test/10_concurrency_control.txt +96 -0
  64. data/examples/regression_test/11_sequential_dependencies.txt +86 -0
  65. data/examples/regression_test/12_none_constant.txt +64 -0
  66. data/examples/regression_test.rb +105 -0
  67. data/lib/simple_flow/dependency_graph.rb +120 -0
  68. data/lib/simple_flow/dependency_graph_visualizer.rb +326 -0
  69. data/lib/simple_flow/middleware.rb +36 -0
  70. data/lib/simple_flow/parallel_executor.rb +80 -0
  71. data/lib/simple_flow/pipeline.rb +405 -0
  72. data/lib/simple_flow/result.rb +88 -0
  73. data/lib/simple_flow/step_tracker.rb +58 -0
  74. data/lib/simple_flow/version.rb +5 -0
  75. data/lib/simple_flow.rb +41 -0
  76. data/mkdocs.yml +146 -0
  77. data/pipeline_graph.dot +51 -0
  78. data/pipeline_graph.html +60 -0
  79. data/pipeline_graph.mmd +19 -0
  80. metadata +127 -0
@@ -0,0 +1,687 @@
1
+ # Best Practices for Concurrent Execution
2
+
3
+ This guide provides comprehensive best practices for designing, implementing, and debugging concurrent pipelines in SimpleFlow.
4
+
5
+ ## Design Principles
6
+
7
+ ### 1. Embrace Immutability
8
+
9
+ SimpleFlow's Result objects are immutable by design. Embrace this pattern throughout your pipeline:
10
+
11
+ ```ruby
12
+ # GOOD: Creating new results
13
+ step :transform_data, ->(result) {
14
+ transformed = result.value.map(&:upcase)
15
+ result.continue(transformed) # Returns new Result
16
+ }
17
+
18
+ # GOOD: Adding context
19
+ step :enrich_data, ->(result) {
20
+ result
21
+ .with_context(:timestamp, Time.now)
22
+ .with_context(:source, "api")
23
+ .continue(result.value)
24
+ }
25
+
26
+ # BAD: Mutating input
27
+ step :bad_transform, ->(result) {
28
+ result.value.map!(&:upcase) # Mutates shared data!
29
+ result.continue(result.value)
30
+ }
31
+ ```
32
+
33
+ ### 2. Design Independent Steps
34
+
35
+ Parallel steps should be completely independent:
36
+
37
+ ```ruby
38
+ # GOOD: Independent operations
39
+ pipeline = SimpleFlow::Pipeline.new do
40
+ step :fetch_weather, ->(result) {
41
+ weather = WeatherAPI.fetch(result.value[:location])
42
+ result.with_context(:weather, weather).continue(result.value)
43
+ }, depends_on: []
44
+
45
+ step :fetch_traffic, ->(result) {
46
+ traffic = TrafficAPI.fetch(result.value[:location])
47
+ result.with_context(:traffic, traffic).continue(result.value)
48
+ }, depends_on: []
49
+ end
50
+
51
+ # BAD: Steps that depend on execution order
52
+ counter = 0
53
+ pipeline = SimpleFlow::Pipeline.new do
54
+ step :increment, ->(result) {
55
+ counter += 1 # Race condition!
56
+ result.continue(result.value)
57
+ }, depends_on: []
58
+
59
+ step :read_counter, ->(result) {
60
+ result.with_context(:count, counter).continue(result.value)
61
+ }, depends_on: []
62
+ end
63
+ ```
64
+
65
+ ### 3. Use Context for Communication
66
+
67
+ Pass data between steps exclusively through the Result context:
68
+
69
+ ```ruby
70
+ # GOOD: Context-based communication
71
+ pipeline = SimpleFlow::Pipeline.new do
72
+ step :load_user, ->(result) {
73
+ user = User.find(result.value)
74
+ result.with_context(:user, user).continue(result.value)
75
+ }, depends_on: []
76
+
77
+ step :load_preferences, ->(result) {
78
+ user_id = result.context[:user][:id]
79
+ prefs = Preferences.find_by(user_id: user_id)
80
+ result.with_context(:preferences, prefs).continue(result.value)
81
+ }, depends_on: [:load_user]
82
+ end
83
+
84
+ # BAD: Instance variables
85
+ class PipelineRunner
86
+ def initialize
87
+ @shared_data = {}
88
+ end
89
+
90
+ def build_pipeline
91
+ SimpleFlow::Pipeline.new do
92
+ step :store_data, ->(result) {
93
+ @shared_data[:key] = result.value # Don't do this!
94
+ result.continue(result.value)
95
+ }, depends_on: []
96
+
97
+ step :read_data, ->(result) {
98
+ data = @shared_data[:key] # Race condition!
99
+ result.continue(data)
100
+ }, depends_on: []
101
+ end
102
+ end
103
+ end
104
+ ```
105
+
106
+ ## Dependency Management
107
+
108
+ ### 1. Declare All Dependencies Explicitly
109
+
110
+ Be exhaustive when declaring dependencies:
111
+
112
+ ```ruby
113
+ # GOOD: All dependencies declared
114
+ pipeline = SimpleFlow::Pipeline.new do
115
+ step :fetch_data, ->(result) { ... }, depends_on: []
116
+ step :validate_data, ->(result) { ... }, depends_on: [:fetch_data]
117
+ step :transform_data, ->(result) { ... }, depends_on: [:validate_data]
118
+ step :save_data, ->(result) { ... }, depends_on: [:transform_data]
119
+ end
120
+
121
+ # BAD: Missing dependencies
122
+ pipeline = SimpleFlow::Pipeline.new do
123
+ step :fetch_data, ->(result) { ... }, depends_on: []
124
+ step :transform_data, ->(result) { ... }, depends_on: [] # Should depend on fetch_data!
125
+ step :save_data, ->(result) { ... }, depends_on: [:transform_data]
126
+ end
127
+ ```
128
+
129
+ ### 2. Avoid Circular Dependencies
130
+
131
+ Circular dependencies will cause runtime errors:
132
+
133
+ ```ruby
134
+ # BAD: Circular dependency
135
+ pipeline = SimpleFlow::Pipeline.new do
136
+ step :step_a, ->(result) { ... }, depends_on: [:step_b]
137
+ step :step_b, ->(result) { ... }, depends_on: [:step_a]
138
+ end
139
+ # Raises TSort::Cyclic error
140
+ ```
141
+
142
+ ### 3. Minimize Dependency Chains
143
+
144
+ Flatten dependency chains when possible to maximize parallelism:
145
+
146
+ ```ruby
147
+ # GOOD: Maximum parallelism
148
+ pipeline = SimpleFlow::Pipeline.new do
149
+ step :validate, ->(result) { ... }, depends_on: []
150
+
151
+ # All depend only on validate - can run in parallel
152
+ step :check_inventory, ->(result) { ... }, depends_on: [:validate]
153
+ step :check_pricing, ->(result) { ... }, depends_on: [:validate]
154
+ step :check_shipping, ->(result) { ... }, depends_on: [:validate]
155
+ step :check_discounts, ->(result) { ... }, depends_on: [:validate]
156
+
157
+ # Waits for all parallel steps
158
+ step :finalize, ->(result) { ... }, depends_on: [
159
+ :check_inventory,
160
+ :check_pricing,
161
+ :check_shipping,
162
+ :check_discounts
163
+ ]
164
+ end
165
+
166
+ # BAD: Sequential chain (slower)
167
+ pipeline = SimpleFlow::Pipeline.new do
168
+ step :validate, ->(result) { ... }, depends_on: []
169
+ step :check_inventory, ->(result) { ... }, depends_on: [:validate]
170
+ step :check_pricing, ->(result) { ... }, depends_on: [:check_inventory]
171
+ step :check_shipping, ->(result) { ... }, depends_on: [:check_pricing]
172
+ step :finalize, ->(result) { ... }, depends_on: [:check_shipping]
173
+ end
174
+ # All steps run sequentially!
175
+ ```
176
+
177
+ ## Error Handling
178
+
179
+ ### 1. Validate Early
180
+
181
+ Place validation steps before expensive parallel operations:
182
+
183
+ ```ruby
184
+ # GOOD: Validate before parallel execution
185
+ pipeline = SimpleFlow::Pipeline.new do
186
+ step :validate_input, ->(result) {
187
+ if result.value[:email].nil?
188
+ return result.halt.with_error(:validation, "Email required")
189
+ end
190
+ result.continue(result.value)
191
+ }, depends_on: []
192
+
193
+ # Only execute if validation passes
194
+ step :fetch_user, ->(result) { ... }, depends_on: [:validate_input]
195
+ step :fetch_orders, ->(result) { ... }, depends_on: [:validate_input]
196
+ step :fetch_preferences, ->(result) { ... }, depends_on: [:validate_input]
197
+ end
198
+
199
+ # BAD: Validate after expensive operations
200
+ pipeline = SimpleFlow::Pipeline.new do
201
+ step :fetch_user, ->(result) { ... }, depends_on: []
202
+ step :fetch_orders, ->(result) { ... }, depends_on: []
203
+ step :fetch_preferences, ->(result) { ... }, depends_on: []
204
+
205
+ step :validate_results, ->(result) {
206
+ # Too late - already did expensive work!
207
+ if result.errors.any?
208
+ result.halt(result.value)
209
+ end
210
+ }, depends_on: [:fetch_user, :fetch_orders, :fetch_preferences]
211
+ end
212
+ ```
213
+
214
+ ### 2. Accumulate Errors, Then Halt
215
+
216
+ For validation pipelines, accumulate all errors before halting:
217
+
218
+ ```ruby
219
+ # GOOD: Collect all validation errors
220
+ pipeline = SimpleFlow::Pipeline.new do
221
+ step :validate_email, ->(result) {
222
+ if invalid_email?(result.value[:email])
223
+ result.with_error(:email, "Invalid email format")
224
+ else
225
+ result.continue(result.value)
226
+ end
227
+ }, depends_on: []
228
+
229
+ step :validate_phone, ->(result) {
230
+ if invalid_phone?(result.value[:phone])
231
+ result.with_error(:phone, "Invalid phone format")
232
+ else
233
+ result.continue(result.value)
234
+ end
235
+ }, depends_on: []
236
+
237
+ step :validate_age, ->(result) {
238
+ if result.value[:age] < 18
239
+ result.with_error(:age, "Must be 18 or older")
240
+ else
241
+ result.continue(result.value)
242
+ end
243
+ }, depends_on: []
244
+
245
+ # Check all errors at once
246
+ step :check_validations, ->(result) {
247
+ if result.errors.any?
248
+ result.halt(result.value)
249
+ else
250
+ result.continue(result.value)
251
+ end
252
+ }, depends_on: [:validate_email, :validate_phone, :validate_age]
253
+ end
254
+
255
+ # User gets all validation errors at once, not just the first one
256
+ ```
257
+
258
+ ### 3. Add Context to Errors
259
+
260
+ Include helpful debugging information:
261
+
262
+ ```ruby
263
+ step :process_file, ->(result) {
264
+ begin
265
+ data = File.read(result.value[:path])
266
+ parsed = JSON.parse(data)
267
+ result.with_context(:file_size, data.size).continue(parsed)
268
+ rescue Errno::ENOENT => e
269
+ result.halt.with_error(
270
+ :file_error,
271
+ "File not found: #{result.value[:path]}"
272
+ )
273
+ rescue JSON::ParserError => e
274
+ result.halt.with_error(
275
+ :parse_error,
276
+ "Invalid JSON in #{result.value[:path]}: #{e.message}"
277
+ )
278
+ end
279
+ }
280
+ ```
281
+
282
+ ## Performance Optimization
283
+
284
+ ### 1. Use Parallelism for I/O Operations
285
+
286
+ Prioritize parallelizing I/O-bound operations:
287
+
288
+ ```ruby
289
+ # GOOD: Parallel I/O operations
290
+ pipeline = SimpleFlow::Pipeline.new do
291
+ step :fetch_api_a, ->(result) {
292
+ # Network I/O - benefits from parallelism
293
+ response = HTTP.get("https://api-a.example.com")
294
+ result.with_context(:api_a, response).continue(result.value)
295
+ }, depends_on: []
296
+
297
+ step :fetch_api_b, ->(result) {
298
+ # Network I/O - benefits from parallelism
299
+ response = HTTP.get("https://api-b.example.com")
300
+ result.with_context(:api_b, response).continue(result.value)
301
+ }, depends_on: []
302
+ end
303
+
304
+ # Sequential: ~200ms (100ms per API call)
305
+ # Parallel: ~100ms
306
+ # Speedup: 2x
307
+ ```
308
+
309
+ ### 2. Keep CPU-Bound Operations Sequential
310
+
311
+ Don't parallelize CPU-intensive calculations (due to GIL):
312
+
313
+ ```ruby
314
+ # Keep CPU-bound operations sequential
315
+ pipeline = SimpleFlow::Pipeline.new do
316
+ step :calculate_fibonacci, ->(result) {
317
+ # CPU-bound - no benefit from parallelism
318
+ fib = calculate_fib(result.value)
319
+ result.continue(fib)
320
+ }, depends_on: []
321
+
322
+ step :process_result, ->(result) {
323
+ result.continue(result.value * 2)
324
+ }, depends_on: [:calculate_fibonacci]
325
+ end
326
+ ```
327
+
328
+ ### 3. Minimize Context Payload
329
+
330
+ Keep context lean to reduce merging overhead:
331
+
332
+ ```ruby
333
+ # GOOD: Minimal context
334
+ step :fetch_users, ->(result) {
335
+ users = UserService.all
336
+ user_count = users.size
337
+ result.with_context(:user_count, user_count).continue(result.value)
338
+ }
339
+
340
+ # BAD: Large context
341
+ step :fetch_users, ->(result) {
342
+ users = UserService.all # Could be thousands of records
343
+ result.with_context(:all_users, users).continue(result.value)
344
+ }
345
+ ```
346
+
347
+ ## Testing Strategies
348
+
349
+ ### 1. Test Steps in Isolation
350
+
351
+ Design steps to be testable independently:
352
+
353
+ ```ruby
354
+ # GOOD: Testable step
355
+ class FetchUserStep
356
+ def call(result)
357
+ user = UserService.find(result.value)
358
+ result.with_context(:user, user).continue(result.value)
359
+ end
360
+ end
361
+
362
+ # Easy to test
363
+ describe FetchUserStep do
364
+ it "fetches user and adds to context" do
365
+ step = FetchUserStep.new
366
+ result = SimpleFlow::Result.new(123)
367
+
368
+ output = step.call(result)
369
+
370
+ expect(output.context[:user]).to be_present
371
+ expect(output.continue?).to be true
372
+ end
373
+ end
374
+
375
+ # Use in pipeline
376
+ pipeline = SimpleFlow::Pipeline.new do
377
+ step :fetch_user, FetchUserStep.new, depends_on: []
378
+ end
379
+ ```
380
+
381
+ ### 2. Test Dependency Graphs
382
+
383
+ Verify your dependency structure:
384
+
385
+ ```ruby
386
+ describe "OrderPipeline" do
387
+ let(:pipeline) { OrderPipeline.build }
388
+
389
+ it "has correct dependency structure" do
390
+ graph = pipeline.dependency_graph
391
+
392
+ expect(graph.dependencies[:validate_order]).to eq([])
393
+ expect(graph.dependencies[:check_inventory]).to eq([:validate_order])
394
+ expect(graph.dependencies[:calculate_total]).to eq([
395
+ :check_inventory,
396
+ :check_pricing
397
+ ])
398
+ end
399
+
400
+ it "groups parallel steps correctly" do
401
+ graph = pipeline.dependency_graph
402
+ groups = graph.parallel_order
403
+
404
+ # Check inventory and pricing run in parallel
405
+ expect(groups[1]).to include(:check_inventory, :check_pricing)
406
+ end
407
+ end
408
+ ```
409
+
410
+ ### 3. Test Both Sequential and Parallel Execution
411
+
412
+ Ensure your pipeline works in both modes:
413
+
414
+ ```ruby
415
+ describe "DataPipeline" do
416
+ let(:pipeline) { DataPipeline.build }
417
+ let(:input) { SimpleFlow::Result.new(data) }
418
+
419
+ it "produces same result sequentially" do
420
+ result = pipeline.call(input)
421
+ expect(result.value).to eq(expected_output)
422
+ end
423
+
424
+ it "produces same result in parallel" do
425
+ result = pipeline.call_parallel(input)
426
+ expect(result.value).to eq(expected_output)
427
+ end
428
+
429
+ it "merges context from parallel steps" do
430
+ result = pipeline.call_parallel(input)
431
+ expect(result.context).to include(:data_a, :data_b, :data_c)
432
+ end
433
+ end
434
+ ```
435
+
436
+ ## Debugging Techniques
437
+
438
+ ### 1. Use Visualization Tools
439
+
440
+ Visualize your pipeline to understand execution flow:
441
+
442
+ ```ruby
443
+ pipeline = OrderPipeline.build
444
+
445
+ # ASCII visualization for quick debugging
446
+ puts pipeline.visualize_ascii
447
+
448
+ # Detailed execution plan
449
+ puts pipeline.execution_plan
450
+
451
+ # Generate diagram for documentation
452
+ File.write('pipeline.dot', pipeline.visualize_dot)
453
+ system('dot -Tpng pipeline.dot -o pipeline.png')
454
+ ```
455
+
456
+ ### 2. Add Logging Middleware
457
+
458
+ Use middleware to trace execution:
459
+
460
+ ```ruby
461
+ class DetailedLogging
462
+ def initialize(callable, step_name: nil)
463
+ @callable = callable
464
+ @step_name = step_name
465
+ end
466
+
467
+ def call(result)
468
+ puts "[#{Time.now}] Starting #{@step_name}"
469
+ puts " Input value: #{result.value.inspect}"
470
+
471
+ output = @callable.call(result)
472
+
473
+ puts "[#{Time.now}] Completed #{@step_name}"
474
+ puts " Output value: #{output.value.inspect}"
475
+ puts " Continue? #{output.continue?}"
476
+ puts " Errors: #{output.errors}" if output.errors.any?
477
+ puts
478
+
479
+ output
480
+ end
481
+ end
482
+
483
+ pipeline = SimpleFlow::Pipeline.new do
484
+ use_middleware DetailedLogging, step_name: "pipeline step"
485
+
486
+ step :fetch_data, ->(result) { ... }, depends_on: []
487
+ step :process_data, ->(result) { ... }, depends_on: [:fetch_data]
488
+ end
489
+ ```
490
+
491
+ ### 3. Track Step Execution Time
492
+
493
+ Measure performance of individual steps:
494
+
495
+ ```ruby
496
+ class TimingMiddleware
497
+ def initialize(callable, step_name:)
498
+ @callable = callable
499
+ @step_name = step_name
500
+ end
501
+
502
+ def call(result)
503
+ start_time = Time.now
504
+ output = @callable.call(result)
505
+ duration = Time.now - start_time
506
+
507
+ output.with_context(
508
+ "#{@step_name}_duration".to_sym,
509
+ duration
510
+ )
511
+ end
512
+ end
513
+
514
+ pipeline = SimpleFlow::Pipeline.new do
515
+ use_middleware TimingMiddleware, step_name: "my_step"
516
+
517
+ step :slow_operation, ->(result) { ... }, depends_on: []
518
+ end
519
+
520
+ result = pipeline.call(initial_data)
521
+ puts "Execution time: #{result.context[:slow_operation_duration]}s"
522
+ ```
523
+
524
+ ## Code Organization
525
+
526
+ ### 1. Extract Steps to Classes
527
+
528
+ For complex steps, use dedicated classes:
529
+
530
+ ```ruby
531
+ # GOOD: Dedicated step classes
532
+ module OrderPipeline
533
+ class ValidateOrder
534
+ def call(result)
535
+ order = result.value
536
+ errors = []
537
+
538
+ errors << "Missing email" unless order[:email]
539
+ errors << "No items" if order[:items].empty?
540
+
541
+ if errors.any?
542
+ result.halt.with_error(:validation, errors.join(", "))
543
+ else
544
+ result.continue(order)
545
+ end
546
+ end
547
+ end
548
+
549
+ class CalculateTotal
550
+ def call(result)
551
+ items = result.context[:items]
552
+ shipping = result.context[:shipping]
553
+
554
+ subtotal = items.sum { |i| i[:price] * i[:quantity] }
555
+ total = subtotal + shipping
556
+
557
+ result.with_context(:total, total).continue(result.value)
558
+ end
559
+ end
560
+
561
+ def self.build
562
+ SimpleFlow::Pipeline.new do
563
+ step :validate, ValidateOrder.new, depends_on: []
564
+ step :calculate_total, CalculateTotal.new, depends_on: [:validate]
565
+ end
566
+ end
567
+ end
568
+ ```
569
+
570
+ ### 2. Use Builder Pattern
571
+
572
+ Create pipeline builders for complex workflows:
573
+
574
+ ```ruby
575
+ class EcommercePipelineBuilder
576
+ def self.build(options = {})
577
+ SimpleFlow::Pipeline.new do
578
+ if options[:enable_logging]
579
+ use_middleware SimpleFlow::MiddleWare::Logging
580
+ end
581
+
582
+ # Validation phase
583
+ step :validate_order, ValidateOrder.new, depends_on: []
584
+
585
+ # Parallel checks
586
+ step :check_inventory, CheckInventory.new, depends_on: [:validate_order]
587
+ step :check_pricing, CheckPricing.new, depends_on: [:validate_order]
588
+ step :check_shipping, CheckShipping.new, depends_on: [:validate_order]
589
+
590
+ # Process payment
591
+ step :calculate_total, CalculateTotal.new,
592
+ depends_on: [:check_inventory, :check_pricing, :check_shipping]
593
+
594
+ step :process_payment, ProcessPayment.new,
595
+ depends_on: [:calculate_total]
596
+ end
597
+ end
598
+ end
599
+
600
+ # Use in application
601
+ pipeline = EcommercePipelineBuilder.build(enable_logging: true)
602
+ result = pipeline.call_parallel(order_data)
603
+ ```
604
+
605
+ ### 3. Document Dependencies
606
+
607
+ Add comments explaining why dependencies exist:
608
+
609
+ ```ruby
610
+ pipeline = SimpleFlow::Pipeline.new do
611
+ # Must validate before any processing
612
+ step :validate_input, ->(result) { ... }, depends_on: []
613
+
614
+ # These checks are independent and can run in parallel
615
+ step :check_inventory, ->(result) { ... }, depends_on: [:validate_input]
616
+ step :check_pricing, ->(result) { ... }, depends_on: [:validate_input]
617
+
618
+ # Discount requires both inventory (stock levels) and pricing
619
+ step :calculate_discount, ->(result) { ... },
620
+ depends_on: [:check_inventory, :check_pricing]
621
+ end
622
+ ```
623
+
624
+ ## Common Pitfalls
625
+
626
+ ### 1. Avoid Premature Parallelization
627
+
628
+ Don't parallelize until you have measured performance:
629
+
630
+ ```ruby
631
+ # Start simple
632
+ pipeline = SimpleFlow::Pipeline.new do
633
+ step ->(result) { fetch_data(result.value) }
634
+ step ->(result) { transform_data(result.value) }
635
+ step ->(result) { save_data(result.value) }
636
+ end
637
+
638
+ # Measure
639
+ time = Benchmark.realtime { pipeline.call(data) }
640
+
641
+ # Only add parallelism if it helps
642
+ if time > 1.0 # If pipeline takes > 1 second
643
+ # Refactor to use named steps with parallelism
644
+ end
645
+ ```
646
+
647
+ ### 2. Don't Parallelize Everything
648
+
649
+ Not all steps benefit from parallelism:
650
+
651
+ ```ruby
652
+ # BAD: Unnecessary parallelism
653
+ pipeline = SimpleFlow::Pipeline.new do
654
+ step :upcase, ->(result) {
655
+ result.continue(result.value.upcase) # Fast operation
656
+ }, depends_on: []
657
+
658
+ step :reverse, ->(result) {
659
+ result.continue(result.value.reverse) # Fast operation
660
+ }, depends_on: []
661
+ end
662
+
663
+ # Parallel overhead > benefit for fast operations
664
+ ```
665
+
666
+ ### 3. Watch for Deadlocks
667
+
668
+ Ensure database connections and resources are properly managed:
669
+
670
+ ```ruby
671
+ # GOOD: Connection pooling
672
+ DB = Sequel.connect(
673
+ 'postgres://localhost/db',
674
+ max_connections: 10 # Allow 10 concurrent connections
675
+ )
676
+
677
+ # BAD: Single connection
678
+ DB = Sequel.connect('postgres://localhost/db')
679
+ # Parallel steps will deadlock waiting for the connection!
680
+ ```
681
+
682
+ ## Related Documentation
683
+
684
+ - [Parallel Steps Guide](parallel-steps.md) - How to use named steps with dependencies
685
+ - [Performance Guide](performance.md) - Understanding parallel execution performance
686
+ - [Testing Guide](../development/testing.md) - Testing strategies for pipelines
687
+ - [Pipeline API](../api/pipeline.md) - Complete Pipeline API reference