fractor 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +227 -102
  3. data/README.adoc +113 -1940
  4. data/docs/.lycheeignore +16 -0
  5. data/docs/Gemfile +24 -0
  6. data/docs/README.md +157 -0
  7. data/docs/_config.yml +151 -0
  8. data/docs/_features/error-handling.adoc +1192 -0
  9. data/docs/_features/index.adoc +80 -0
  10. data/docs/_features/monitoring.adoc +589 -0
  11. data/docs/_features/signal-handling.adoc +202 -0
  12. data/docs/_features/workflows.adoc +1235 -0
  13. data/docs/_guides/continuous-mode.adoc +736 -0
  14. data/docs/_guides/cookbook.adoc +1133 -0
  15. data/docs/_guides/index.adoc +55 -0
  16. data/docs/_guides/pipeline-mode.adoc +730 -0
  17. data/docs/_guides/troubleshooting.adoc +358 -0
  18. data/docs/_pages/architecture.adoc +1390 -0
  19. data/docs/_pages/core-concepts.adoc +1392 -0
  20. data/docs/_pages/design-principles.adoc +862 -0
  21. data/docs/_pages/getting-started.adoc +290 -0
  22. data/docs/_pages/installation.adoc +143 -0
  23. data/docs/_reference/api.adoc +1080 -0
  24. data/docs/_reference/error-reporting.adoc +670 -0
  25. data/docs/_reference/examples.adoc +181 -0
  26. data/docs/_reference/index.adoc +96 -0
  27. data/docs/_reference/troubleshooting.adoc +862 -0
  28. data/docs/_tutorials/complex-workflows.adoc +1022 -0
  29. data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
  30. data/docs/_tutorials/first-application.adoc +384 -0
  31. data/docs/_tutorials/index.adoc +48 -0
  32. data/docs/_tutorials/long-running-services.adoc +931 -0
  33. data/docs/assets/images/favicon-16.png +0 -0
  34. data/docs/assets/images/favicon-32.png +0 -0
  35. data/docs/assets/images/favicon-48.png +0 -0
  36. data/docs/assets/images/favicon.ico +0 -0
  37. data/docs/assets/images/favicon.png +0 -0
  38. data/docs/assets/images/favicon.svg +45 -0
  39. data/docs/assets/images/fractor-icon.svg +49 -0
  40. data/docs/assets/images/fractor-logo.svg +61 -0
  41. data/docs/index.adoc +131 -0
  42. data/docs/lychee.toml +39 -0
  43. data/examples/api_aggregator/README.adoc +627 -0
  44. data/examples/api_aggregator/api_aggregator.rb +376 -0
  45. data/examples/auto_detection/README.adoc +407 -29
  46. data/examples/continuous_chat_common/message_protocol.rb +1 -1
  47. data/examples/error_reporting.rb +207 -0
  48. data/examples/file_processor/README.adoc +170 -0
  49. data/examples/file_processor/file_processor.rb +615 -0
  50. data/examples/file_processor/sample_files/invalid.csv +1 -0
  51. data/examples/file_processor/sample_files/orders.xml +24 -0
  52. data/examples/file_processor/sample_files/products.json +23 -0
  53. data/examples/file_processor/sample_files/users.csv +6 -0
  54. data/examples/hierarchical_hasher/README.adoc +629 -41
  55. data/examples/image_processor/README.adoc +610 -0
  56. data/examples/image_processor/image_processor.rb +349 -0
  57. data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
  58. data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
  59. data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
  60. data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
  61. data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
  62. data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
  63. data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
  64. data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
  65. data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
  66. data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
  67. data/examples/image_processor/test_images/sample_1.png +1 -0
  68. data/examples/image_processor/test_images/sample_10.png +1 -0
  69. data/examples/image_processor/test_images/sample_2.png +1 -0
  70. data/examples/image_processor/test_images/sample_3.png +1 -0
  71. data/examples/image_processor/test_images/sample_4.png +1 -0
  72. data/examples/image_processor/test_images/sample_5.png +1 -0
  73. data/examples/image_processor/test_images/sample_6.png +1 -0
  74. data/examples/image_processor/test_images/sample_7.png +1 -0
  75. data/examples/image_processor/test_images/sample_8.png +1 -0
  76. data/examples/image_processor/test_images/sample_9.png +1 -0
  77. data/examples/log_analyzer/README.adoc +662 -0
  78. data/examples/log_analyzer/log_analyzer.rb +579 -0
  79. data/examples/log_analyzer/sample_logs/apache.log +20 -0
  80. data/examples/log_analyzer/sample_logs/json.log +15 -0
  81. data/examples/log_analyzer/sample_logs/nginx.log +15 -0
  82. data/examples/log_analyzer/sample_logs/rails.log +29 -0
  83. data/examples/multi_work_type/README.adoc +576 -26
  84. data/examples/performance_monitoring.rb +120 -0
  85. data/examples/pipeline_processing/README.adoc +740 -26
  86. data/examples/pipeline_processing/pipeline_processing.rb +2 -2
  87. data/examples/priority_work_example.rb +155 -0
  88. data/examples/producer_subscriber/README.adoc +889 -46
  89. data/examples/scatter_gather/README.adoc +829 -27
  90. data/examples/simple/README.adoc +347 -0
  91. data/examples/specialized_workers/README.adoc +622 -26
  92. data/examples/specialized_workers/specialized_workers.rb +44 -8
  93. data/examples/stream_processor/README.adoc +206 -0
  94. data/examples/stream_processor/stream_processor.rb +284 -0
  95. data/examples/web_scraper/README.adoc +625 -0
  96. data/examples/web_scraper/web_scraper.rb +285 -0
  97. data/examples/workflow/README.adoc +406 -0
  98. data/examples/workflow/circuit_breaker/README.adoc +360 -0
  99. data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
  100. data/examples/workflow/conditional/README.adoc +483 -0
  101. data/examples/workflow/conditional/conditional_workflow.rb +215 -0
  102. data/examples/workflow/dead_letter_queue/README.adoc +374 -0
  103. data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
  104. data/examples/workflow/fan_out/README.adoc +381 -0
  105. data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
  106. data/examples/workflow/retry/README.adoc +248 -0
  107. data/examples/workflow/retry/retry_workflow.rb +195 -0
  108. data/examples/workflow/simple_linear/README.adoc +267 -0
  109. data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
  110. data/examples/workflow/simplified/README.adoc +329 -0
  111. data/examples/workflow/simplified/simplified_workflow.rb +222 -0
  112. data/exe/fractor +10 -0
  113. data/lib/fractor/cli.rb +288 -0
  114. data/lib/fractor/configuration.rb +307 -0
  115. data/lib/fractor/continuous_server.rb +60 -65
  116. data/lib/fractor/error_formatter.rb +72 -0
  117. data/lib/fractor/error_report_generator.rb +152 -0
  118. data/lib/fractor/error_reporter.rb +244 -0
  119. data/lib/fractor/error_statistics.rb +147 -0
  120. data/lib/fractor/execution_tracer.rb +162 -0
  121. data/lib/fractor/logger.rb +230 -0
  122. data/lib/fractor/main_loop_handler.rb +406 -0
  123. data/lib/fractor/main_loop_handler3.rb +135 -0
  124. data/lib/fractor/main_loop_handler4.rb +299 -0
  125. data/lib/fractor/performance_metrics_collector.rb +181 -0
  126. data/lib/fractor/performance_monitor.rb +215 -0
  127. data/lib/fractor/performance_report_generator.rb +202 -0
  128. data/lib/fractor/priority_work.rb +93 -0
  129. data/lib/fractor/priority_work_queue.rb +189 -0
  130. data/lib/fractor/result_aggregator.rb +32 -0
  131. data/lib/fractor/shutdown_handler.rb +168 -0
  132. data/lib/fractor/signal_handler.rb +80 -0
  133. data/lib/fractor/supervisor.rb +382 -269
  134. data/lib/fractor/supervisor_logger.rb +88 -0
  135. data/lib/fractor/version.rb +1 -1
  136. data/lib/fractor/work.rb +12 -0
  137. data/lib/fractor/work_distribution_manager.rb +151 -0
  138. data/lib/fractor/work_queue.rb +20 -0
  139. data/lib/fractor/work_result.rb +181 -9
  140. data/lib/fractor/worker.rb +73 -0
  141. data/lib/fractor/workflow/builder.rb +210 -0
  142. data/lib/fractor/workflow/chain_builder.rb +169 -0
  143. data/lib/fractor/workflow/circuit_breaker.rb +183 -0
  144. data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
  145. data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
  146. data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
  147. data/lib/fractor/workflow/execution_hooks.rb +39 -0
  148. data/lib/fractor/workflow/execution_strategy.rb +225 -0
  149. data/lib/fractor/workflow/execution_trace.rb +134 -0
  150. data/lib/fractor/workflow/helpers.rb +191 -0
  151. data/lib/fractor/workflow/job.rb +290 -0
  152. data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
  153. data/lib/fractor/workflow/logger.rb +110 -0
  154. data/lib/fractor/workflow/pre_execution_context.rb +193 -0
  155. data/lib/fractor/workflow/retry_config.rb +156 -0
  156. data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
  157. data/lib/fractor/workflow/retry_strategy.rb +93 -0
  158. data/lib/fractor/workflow/structured_logger.rb +30 -0
  159. data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
  160. data/lib/fractor/workflow/visualizer.rb +211 -0
  161. data/lib/fractor/workflow/workflow_context.rb +132 -0
  162. data/lib/fractor/workflow/workflow_executor.rb +669 -0
  163. data/lib/fractor/workflow/workflow_result.rb +55 -0
  164. data/lib/fractor/workflow/workflow_validator.rb +295 -0
  165. data/lib/fractor/workflow.rb +333 -0
  166. data/lib/fractor/wrapped_ractor.rb +66 -101
  167. data/lib/fractor/wrapped_ractor3.rb +161 -0
  168. data/lib/fractor/wrapped_ractor4.rb +242 -0
  169. data/lib/fractor.rb +92 -4
  170. metadata +179 -6
  171. data/tests/sample.rb.bak +0 -309
  172. data/tests/sample_working.rb.bak +0 -209
@@ -1,44 +1,758 @@
1
1
  = Pipeline Processing Example
2
+ :toc: macro
3
+ :toc-title: Table of Contents
4
+ :toclevels: 3
2
5
 
3
- == Overview
6
+ toc::[]
4
7
 
5
- This example demonstrates the Pipeline Processing pattern implemented with Fractor. In this pattern, data flows through a series of sequential processing stages, where the output of one stage becomes the input to the next.
8
+ == Purpose
6
9
 
7
- == Key Concepts
10
+ The Pipeline Processing example demonstrates how to implement multi-stage processing pipelines using Fractor. It showcases automatic stage progression where work flows through sequential transformations, with each stage feeding its output to the next stage. This is a fundamental pattern for building ETL (Extract, Transform, Load) systems, media processing workflows, and data transformation pipelines.
8
11
 
9
- * *Pipeline*: A series of connected processing stages
10
- * *Data Flow*: Information passes through each stage in sequence
11
- * *Transformation*: Each stage performs a specific operation on the data
12
- * *Concurrency*: Multiple items can be at different stages of the pipeline simultaneously
12
+ == Focus
13
13
 
14
- == Example Explanation
14
+ This example demonstrates:
15
15
 
16
- This example processes data through a multi-stage pipeline:
16
+ * **Sequential stage processing** with automatic progression
17
+ * **Pipeline orchestration** using callbacks
18
+ * **Metadata propagation** through processing stages
19
+ * **Stage-specific transformations** within a unified worker
20
+ * **Automatic work generation** for downstream stages
21
+ * **Concurrent pipeline execution** for multiple items
17
22
 
18
- 1. *Input Stage*: Raw data is prepared for processing
19
- 2. *Processing Stages*: Data moves through a series of transformations
20
- 3. *Output Stage*: Final results are collected and reported
23
+ == Architecture
21
24
 
22
- Each stage of the pipeline can run concurrently on different workers, allowing for efficient parallel processing while maintaining the required order of operations.
25
+ === Pipeline Flow Overview
23
26
 
24
- == Features Demonstrated
27
+ [source]
28
+ ----
29
+ ┌──────────────────────────────────────────────────────────────┐
30
+ │ Input Images │
31
+ │ [sunset.jpg, mountains.png, beach.jpg, ...] │
32
+ └──────────────────────────────────────────────────────────────┘
33
+
34
+ │ Create initial work items
35
+
36
+ ┌───────────────────────────────┐
37
+ │ Stage 1: RESIZE │
38
+ │ MediaWork(data, :resize) │
39
+ └───────────────────────────────┘
40
+
41
+ │ PipelineWorker.process
42
+
43
+ ┌───────────────────────────────┐
44
+ │ Result: resized image │
45
+ │ next_stage = :filter │
46
+ └───────────────────────────────┘
47
+
48
+ │ on_new_result callback
49
+ │ auto-creates next work
50
+
51
+ ┌───────────────────────────────┐
52
+ │ Stage 2: FILTER │
53
+ │ MediaWork(resized, :filter) │
54
+ └───────────────────────────────┘
55
+
56
+ │ PipelineWorker.process
57
+
58
+ ┌───────────────────────────────┐
59
+ │ Result: filtered image │
60
+ │ next_stage = :compress │
61
+ └───────────────────────────────┘
62
+
63
+ │ on_new_result callback
64
+
65
+ ┌───────────────────────────────┐
66
+ │ Stage 3: COMPRESS │
67
+ │ MediaWork(filtered,:compress)│
68
+ └───────────────────────────────┘
69
+
70
+ │ PipelineWorker.process
71
+
72
+ ┌───────────────────────────────┐
73
+ │ Result: compressed image │
74
+ │ next_stage = :tag │
75
+ └───────────────────────────────┘
76
+
77
+ │ on_new_result callback
78
+
79
+ ┌───────────────────────────────┐
80
+ │ Stage 4: TAG │
81
+ │ MediaWork(compressed, :tag) │
82
+ └───────────────────────────────┘
83
+
84
+ │ PipelineWorker.process
85
+
86
+ ┌───────────────────────────────┐
87
+ │ Result: tagged image │
88
+ │ next_stage = nil (complete) │
89
+ └───────────────────────────────┘
90
+
91
+
92
+ ┌───────────────────────────────┐
93
+ │ Final Results Collection │
94
+ └───────────────────────────────┘
95
+ ----
96
+
97
+ === Concurrent Pipeline Execution
98
+
99
+ [source]
100
+ ----
101
+ Multiple images flowing through the pipeline concurrently:
102
+
103
+ Time →
104
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
105
+
106
+ Image 1: [Resize]─[Filter]─[Compress]─[Tag]─[Done]
107
+ Image 2: [Resize]─[Filter]─[Compress]─[Tag]─[Done]
108
+ Image 3: [Resize]─[Filter]─[Compress]─[Tag]─[Done]
109
+ Image 4: [Resize]─[Filter]─[Compress]─[Tag]─[Done]
110
+ Image 5: [Resize]─[Filter]─[Compress]─[Tag]─[Done]
111
+
112
+ Each image moves through stages independently while workers
113
+ process different images at different stages concurrently.
114
+ ----
115
+
116
+ === Stage Progression Mechanism
117
+
118
+ [source]
119
+ ----
120
+ ┌─────────────────────────────────────────────────────────┐
121
+ │ PipelineWorker │
122
+ │ │
123
+ │ 1. Receive work with current stage │
124
+ │ 2. Process based on stage (:resize/:filter/etc) │
125
+ │ 3. Determine next stage from sequence │
126
+ │ 4. Return result with next_stage information │
127
+ └─────────────────────────────────────────────────────────┘
128
+
129
+ │ Result with next_stage
130
+
131
+ ┌─────────────────────────────────────────────────────────┐
132
+ │ ResultAggregator Callback │
133
+ │ │
134
+ │ on_new_result { |result| │
135
+ │ if result.next_stage exists: │
136
+ │ - Create new MediaWork for next stage │
137
+ │ - Copy processed_data as input │
138
+ │ - Preserve and enhance metadata │
139
+ │ - Add work back to supervisor │
140
+ │ } │
141
+ └─────────────────────────────────────────────────────────┘
142
+
143
+ │ New work created
144
+
145
+ ┌─────────────────────────────────────────────────────────┐
146
+ │ Work Queue (Next Stage) │
147
+ │ Newly created work is queued for processing │
148
+ └─────────────────────────────────────────────────────────┘
149
+ ----
150
+
151
+ == Key Components
152
+
153
+ === MediaWork: Stage-Aware Work Unit
154
+
155
+ The `MediaWork` class carries both data and stage information:
156
+
157
+ [source,ruby]
158
+ ----
159
+ class MediaWork < Fractor::Work
160
+ def initialize(data, stage = :resize, metadata = {})
161
+ super({
162
+ data: data, # <1>
163
+ stage: stage, # <2>
164
+ metadata: metadata # <3>
165
+ })
166
+ end
167
+
168
+ def stage
169
+ input[:stage]
170
+ end
171
+ end
172
+ ----
173
+ <1> The actual data being processed (image path, processed output, etc.)
174
+ <2> Current processing stage (`:resize`, `:filter`, `:compress`, `:tag`)
175
+ <3> Metadata tracking processing history and timing
176
+
177
+ Why stage information in work items:
178
+
179
+ * **Single worker type** can handle all stages
180
+ * **Dynamic routing** based on current stage
181
+ * **Clear state tracking** throughout pipeline
182
+ * **Enables stage-specific processing** logic
25
183
 
26
- * Sequential processing with dependencies between stages
27
- * Concurrent execution of pipeline stages
28
- * Processing optimizations through specialized workers
29
- * Handling data flow between processing stages
184
+ === PipelineWorker: Polymorphic Stage Processor
30
185
 
31
- == Running the Example
186
+ The `PipelineWorker` handles all stages using a case statement:
32
187
 
33
- [source,sh]
188
+ [source,ruby]
34
189
  ----
35
- ruby examples/pipeline_processing/pipeline_processing.rb
190
+ class PipelineWorker < Fractor::Worker
191
+ def process(work)
192
+ result = case work.stage # <1>
193
+ when :resize then process_resize(work)
194
+ when :filter then process_filter(work)
195
+ when :compress then process_compress(work)
196
+ when :tag then process_tag(work)
197
+ else
198
+ return Fractor::WorkResult.new(
199
+ error: "Unknown stage: #{work.stage}",
200
+ work: work
201
+ )
202
+ end
203
+
204
+ stages = [:resize, :filter, :compress, :tag] # <2>
205
+ current_index = stages.index(work.stage)
206
+ next_stage = current_index < stages.size - 1 ? stages[current_index + 1] : nil # <3>
207
+
208
+ updated_metadata = work.metadata.merge( # <4>
209
+ "#{work.stage}_completed" => true,
210
+ "#{work.stage}_time" => Time.now.to_s
211
+ )
212
+
213
+ Fractor::WorkResult.new(
214
+ result: {
215
+ processed_data: result, # <5>
216
+ current_stage: work.stage,
217
+ next_stage: next_stage,
218
+ metadata: updated_metadata
219
+ },
220
+ work: work
221
+ )
222
+ end
223
+ end
224
+ ----
225
+ <1> Route to appropriate processing method based on stage
226
+ <2> Define the complete pipeline stage sequence
227
+ <3> Calculate the next stage (or `nil` if at the end)
228
+ <4> Augment metadata with completion tracking
229
+ <5> Package result with stage progression information
230
+
231
+ Design benefits:
232
+
233
+ * **Centralized stage logic**: All stages in one worker class
234
+ * **Sequential guarantee**: Explicit stage ordering
235
+ * **Metadata enrichment**: Each stage adds its completion info
236
+ * **Clear termination**: `nil` next_stage signals completion
237
+
238
+ === MediaPipeline: Automatic Stage Progression
239
+
240
+ The `MediaPipeline` orchestrates automatic work flow:
241
+
242
+ [source,ruby]
243
+ ----
244
+ class MediaPipeline
245
+ def initialize(worker_count = 4)
246
+ @supervisor = Fractor::Supervisor.new(
247
+ worker_pools: [
248
+ { worker_class: PipelineWorker, num_workers: worker_count }
249
+ ]
250
+ )
251
+
252
+ @supervisor.results.on_new_result do |result| # <1>
253
+ next_stage = result.result[:next_stage]
254
+
255
+ if next_stage # <2>
256
+ new_work = MediaWork.new(
257
+ result.result[:processed_data], # <3>
258
+ next_stage, # <4>
259
+ result.result[:metadata] # <5>
260
+ )
261
+ @supervisor.add_work_item(new_work) # <6>
262
+ end
263
+ end
264
+ end
265
+
266
+ def process_images(images)
267
+ initial_work_items = images.map do |image| # <7>
268
+ MediaWork.new(image, :resize, {
269
+ original_filename: image,
270
+ started_at: Time.now.to_s
271
+ })
272
+ end
273
+
274
+ @supervisor.add_work_items(initial_work_items)
275
+ @supervisor.run # <8>
276
+
277
+ # Collect completed results (where next_stage is nil)
278
+ @supervisor.results.results.each do |result|
279
+ if result.result[:next_stage].nil? # <9>
280
+ @results[:completed] << result.result
281
+ end
282
+ end
283
+ end
284
+ end
36
285
  ----
286
+ <1> Register callback for each completed work item
287
+ <2> Check if there's another stage to process
288
+ <3> Use processed output as input for next stage
289
+ <4> Progress to the next stage in the pipeline
290
+ <5> Carry forward the metadata chain
291
+ <6> Dynamically add new work to the supervisor
292
+ <7> Create initial work items (all start at `:resize`)
293
+ <8> Execute pipeline (processes all stages automatically)
294
+ <9> Identify completed items (those that have finished all stages)
295
+
296
+ Pipeline orchestration features:
297
+
298
+ * **Callback-driven**: Each result triggers next stage creation
299
+ * **Dynamic work injection**: New work added during execution
300
+ * **Automatic progression**: No manual stage management needed
301
+ * **Completion detection**: Tracks which items finished all stages
302
+
303
+ == Usage
304
+
305
+ .Basic usage
306
+ [example]
307
+ ====
308
+ [source,bash]
309
+ ----
310
+ # Run the pipeline processing example
311
+ ruby pipeline_processing.rb
312
+ ----
313
+ ====
314
+
315
+ .Programmatic usage
316
+ [example]
317
+ ====
318
+ [source,ruby]
319
+ ----
320
+ require_relative "pipeline_processing"
321
+
322
+ # Create pipeline with 8 workers
323
+ pipeline = PipelineProcessing::MediaPipeline.new(8)
324
+
325
+ # Process a batch of images
326
+ images = ["photo1.jpg", "photo2.jpg", "photo3.jpg"]
327
+ result = pipeline.process_images(images)
328
+
329
+ puts "Completed: #{result[:completed]} images"
330
+ result[:results].each do |image_result|
331
+ puts "Processed: #{image_result[:processed_data]}"
332
+ puts "Metadata: #{image_result[:metadata]}"
333
+ end
334
+ ----
335
+ ====
37
336
 
38
337
  == Expected Output
39
338
 
40
- The example will show:
41
- * Data moving through each stage of the pipeline
42
- * Workers processing different stages concurrently
43
- * The transformation of data at each stage
44
- * Final results after passing through the complete pipeline
339
+ [source,text]
340
+ ----
341
+ Starting Pipeline Processing Example
342
+ =====================================
343
+ This example demonstrates a media processing pipeline with multiple stages:
344
+ 1. Resize - Adjusts image dimensions
345
+ 2. Filter - Applies visual filters
346
+ 3. Compress - Optimizes file size
347
+ 4. Tag - Analyzes and adds metadata tags
348
+
349
+ Processing 5 images with 4 workers...
350
+
351
+ Pipeline Results:
352
+ ----------------
353
+ Total images: 5
354
+ Completed: 5
355
+ In progress: 0
356
+
357
+ Processed Images:
358
+ Image 1: Tagged image: Compressed image: Applied vibrance filter to: Resized image: sunset.jpg (1024x768) (reduced by 45%) (tags: landscape, nature)
359
+ Processing path:
360
+ resize_completed: true
361
+ resize_time: 2025-10-18 07:30:15 +0800
362
+ filter_completed: true
363
+ filter_time: 2025-10-18 07:30:16 +0800
364
+ compress_completed: true
365
+ compress_time: 2025-10-18 07:30:17 +0800
366
+ tag_completed: true
367
+ tag_time: 2025-10-18 07:30:18 +0800
368
+
369
+ Image 2: Tagged image: Compressed image: Applied grayscale filter to: ...
370
+ ...
371
+
372
+ Processing completed in 0.456789 seconds
373
+ ----
374
+
375
+ == Learning Points
376
+
377
+ === 1. Callback-Driven Pipeline Progression
378
+
379
+ The example uses callbacks to automatically create next-stage work:
380
+
381
+ [source,ruby]
382
+ ----
383
+ @supervisor.results.on_new_result do |result|
384
+ next_stage = result.result[:next_stage]
385
+
386
+ if next_stage
387
+ # Automatically create and queue next stage work
388
+ new_work = MediaWork.new(
389
+ result.result[:processed_data],
390
+ next_stage,
391
+ result.result[:metadata]
392
+ )
393
+ @supervisor.add_work_item(new_work)
394
+ end
395
+ end
396
+ ----
397
+
398
+ **Key insight**: The supervisor continues running while the callback adds new work, enabling seamless stage transitions.
399
+
400
+ === 2. Stage Sequencing
401
+
402
+ The pipeline defines a fixed stage sequence:
403
+
404
+ [source,ruby]
405
+ ----
406
+ stages = [:resize, :filter, :compress, :tag]
407
+ current_index = stages.index(work.stage)
408
+ next_stage = current_index < stages.size - 1 ? stages[current_index + 1] : nil
409
+ ----
410
+
411
+ **Alternatives**:
412
+
413
+ * **Hash-based routing**: `NEXT_STAGE = { resize: :filter, filter: :compress, ... }`
414
+ * **State machine**: Use a formal state transition table
415
+ * **Dynamic routing**: Determine next stage based on data content
416
+
417
+ === 3. Metadata Propagation
418
+
419
+ Each stage enriches metadata:
420
+
421
+ [source,ruby]
422
+ ----
423
+ updated_metadata = work.metadata.merge(
424
+ "#{work.stage}_completed" => true,
425
+ "#{work.stage}_time" => Time.now.to_s
426
+ )
427
+ ----
428
+
429
+ **Benefits**:
430
+
431
+ * **Complete audit trail**: Track when each stage completed
432
+ * **Performance analysis**: Measure per-stage processing time
433
+ * **Debugging support**: Identify which stage caused issues
434
+ * **Lineage tracking**: Full data processing history
435
+
436
+ === 4. Pipeline vs. Workflow
437
+
438
+ **Pipeline Pattern** (this example):
439
+ * Fixed stage sequence
440
+ * Automatic progression
441
+ * All items follow same path
442
+ * Simple, linear flow
443
+
444
+ **Workflow Pattern** (see workflow examples):
445
+ * Flexible stage dependencies
446
+ * Conditional branching
447
+ * Parallel execution paths
448
+ * Complex orchestration
449
+
450
+ Choose pipelines for:
451
+
452
+ * ETL processes
453
+ * Media transformations
454
+ * Data validation chains
455
+ * Sequential transformations
456
+
457
+ === 5. Error Handling in Pipelines
458
+
459
+ Current implementation processes all items. For production:
460
+
461
+ [source,ruby]
462
+ ----
463
+ def process(work)
464
+ begin
465
+ result = case work.stage
466
+ when :resize then process_resize(work)
467
+ # ... other stages
468
+ end
469
+
470
+ # Return success with next stage
471
+ Fractor::WorkResult.new(result: { ... })
472
+
473
+ rescue StandardError => e
474
+ # Return error, stops this item's pipeline
475
+ Fractor::WorkResult.new(
476
+ error: "Stage #{work.stage} failed: #{e.message}",
477
+ work: work
478
+ )
479
+ end
480
+ end
481
+ ----
482
+
483
+ **Error strategies**:
484
+
485
+ * **Fail-fast**: Stop item at failed stage
486
+ * **Retry**: Attempt stage again (add retry count to metadata)
487
+ * **Skip**: Continue to next stage with error flag
488
+ * **Rollback**: Undo previous stages (if possible)
489
+
490
+ === 6. Performance Characteristics
491
+
492
+ **Throughput analysis**:
493
+
494
+ [source]
495
+ ----
496
+ Total stages: 4
497
+ Items: 5
498
+ Workers: 4
499
+
500
+ Time per stage: 0.03s (average)
501
+ Sequential time: 5 items × 4 stages × 0.03s = 0.6s
502
+
503
+ With pipelining:
504
+ - Initial fill: 4 stages × 0.03s = 0.12s
505
+ - Steady state: 5 items × 0.03s = 0.15s
506
+ - Total: ~0.27s (2.2x speedup)
507
+
508
+ Actual speedup depends on stage duration variance and worker count.
509
+ ----
510
+
511
+ == Use Cases and Patterns
512
+
513
+ === ETL Pipeline
514
+
515
+ Extract, Transform, Load data processing:
516
+
517
+ [source,ruby]
518
+ ----
519
+ stages = [:extract, :validate, :transform, :enrich, :load]
520
+
521
+ def process(work)
522
+ result = case work.stage
523
+ when :extract then read_from_source(work)
524
+ when :validate then check_data_quality(work)
525
+ when :transform then apply_business_rules(work)
526
+ when :enrich then add_derived_fields(work)
527
+ when :load then write_to_destination(work)
528
+ end
529
+ # ... next stage logic
530
+ end
531
+ ----
532
+
533
+ === Media Processing Pipeline
534
+
535
+ Video/image processing workflow:
536
+
537
+ [source,ruby]
538
+ ----
539
+ stages = [:transcode, :watermark, :thumbnail, :upload]
540
+
541
+ def process(work)
542
+ result = case work.stage
543
+ when :transcode then convert_format(work)
544
+ when :watermark then apply_branding(work)
545
+ when :thumbnail then generate_previews(work)
546
+ when :upload then store_in_cdn(work)
547
+ end
548
+ # ... next stage logic
549
+ end
550
+ ----
551
+
552
+ === Data Validation Chain
553
+
554
+ Multi-stage data validation:
555
+
556
+ [source,ruby]
557
+ ----
558
+ stages = [:format_check, :schema_validation, :business_rules, :duplicate_check]
559
+
560
+ def process(work)
561
+ result = case work.stage
562
+ when :format_check then validate_file_format(work)
563
+ when :schema_validation then check_against_schema(work)
564
+ when :business_rules then apply_domain_rules(work)
565
+ when :duplicate_check then find_duplicates(work)
566
+ end
567
+ # ... next stage logic
568
+ end
569
+ ----
570
+
571
+ === Document Processing
572
+
573
+ Multi-step document transformation:
574
+
575
+ [source,ruby]
576
+ ----
577
+ stages = [:parse, :extract_text, :classify, :index]
578
+
579
+ def process(work)
580
+ result = case work.stage
581
+ when :parse then parse_pdf(work)
582
+ when :extract_text then ocr_if_needed(work)
583
+ when :classify then categorize_document(work)
584
+ when :index then add_to_search_engine(work)
585
+ end
586
+ # ... next stage logic
587
+ end
588
+ ----
589
+
590
+ == Advanced Patterns
591
+
592
+ === Conditional Stages
593
+
594
+ Add stage skipping logic:
595
+
596
+ [source,ruby]
597
+ ----
598
+ def determine_next_stage(work, result)
599
+ case work.stage
600
+ when :validate
601
+ result[:valid] ? :transform : :error_queue
602
+ when :transform
603
+ result[:needs_enrichment] ? :enrich : :load
604
+ else
605
+ # Default sequential progression
606
+ stages[stages.index(work.stage) + 1]
607
+ end
608
+ end
609
+ ----
610
+
611
+ === Parallel Sub-Pipelines
612
+
613
+ Fork into parallel processing paths:
614
+
615
+ [source,ruby]
616
+ ----
617
+ @supervisor.results.on_new_result do |result|
618
+ if result.result[:current_stage] == :split
619
+ # Create multiple parallel work items
620
+ result.result[:chunks].each do |chunk|
621
+ @supervisor.add_work_item(
622
+ MediaWork.new(chunk, :process_chunk, result.result[:metadata])
623
+ )
624
+ end
625
+ elsif result.result[:current_stage] == :process_chunk
626
+ # Collect for merging
627
+ @completed_chunks << result
628
+ if all_chunks_complete?
629
+ @supervisor.add_work_item(
630
+ MediaWork.new(merged_data, :merge, metadata)
631
+ )
632
+ end
633
+ else
634
+ # Normal progression
635
+ # ...
636
+ end
637
+ end
638
+ ----
639
+
640
+ === Stage Retry Logic
641
+
642
+ Add automatic retry for failed stages:
643
+
644
+ [source,ruby]
645
+ ----
646
+ MAX_RETRIES = 3
647
+
648
+ def process(work)
649
+ retry_count = work.metadata["#{work.stage}_retries"] || 0
650
+
651
+ begin
652
+ result = execute_stage(work)
653
+ Fractor::WorkResult.new(result: { ... })
654
+
655
+ rescue StandardError => e
656
+ if retry_count < MAX_RETRIES
657
+ # Retry same stage
658
+ @supervisor.add_work_item(
659
+ MediaWork.new(
660
+ work.data,
661
+ work.stage, # Same stage
662
+ work.metadata.merge("#{work.stage}_retries" => retry_count + 1)
663
+ )
664
+ )
665
+ Fractor::WorkResult.new(result: { retrying: true })
666
+ else
667
+ # Max retries exceeded
668
+ Fractor::WorkResult.new(error: "Failed after #{retry_count} retries: #{e}")
669
+ end
670
+ end
671
+ end
672
+ ----
673
+
674
+ === Pipeline Monitoring
675
+
676
+ Add detailed progress tracking:
677
+
678
+ [source,ruby]
679
+ ----
680
+ def initialize(worker_count = 4)
681
+ @stage_metrics = Hash.new { |h, k| h[k] = { count: 0, total_time: 0 } }
682
+
683
+ @supervisor.results.on_new_result do |result|
684
+ stage = result.result[:current_stage]
685
+ duration = calculate_duration(result)
686
+
687
+ @stage_metrics[stage][:count] += 1
688
+ @stage_metrics[stage][:total_time] += duration
689
+
690
+ log_progress(result)
691
+ create_next_stage_work(result)
692
+ end
693
+ end
694
+
695
+ def print_metrics
696
+ @stage_metrics.each do |stage, metrics|
697
+ avg_time = metrics[:total_time] / metrics[:count]
698
+ puts "#{stage}: #{metrics[:count]} items, avg #{avg_time}s"
699
+ end
700
+ end
701
+ ----
702
+
703
+ == Performance Tuning
704
+
705
+ === Worker Pool Sizing
706
+
707
+ [source,ruby]
708
+ ----
709
+ # For CPU-bound stages (encoding, compression)
710
+ worker_count = Etc.nprocessors
711
+
712
+ # For I/O-bound stages (reading, writing)
713
+ worker_count = Etc.nprocessors * 2
714
+
715
+ # For mixed workloads
716
+ worker_count = (Etc.nprocessors * 1.5).to_i
717
+ ----
718
+
719
+ === Batching for Efficiency
720
+
721
+ Process multiple items per work unit:
722
+
723
+ [source,ruby]
724
+ ----
725
+ def process_images(images)
726
+ batch_size = 10
727
+ batches = images.each_slice(batch_size).to_a
728
+
729
+ batches.map do |batch|
730
+ MediaWork.new(batch, :resize_batch, { batch_size: batch.size })
731
+ end
732
+ end
733
+ ----
734
+
735
+ === Stage-Specific Optimization
736
+
737
+ Optimize each stage individually:
738
+
739
+ [source,ruby]
740
+ ----
741
+ def process_resize(work)
742
+ # Use faster resize for small images
743
+ if work.data.size < 1_000_000
744
+ quick_resize(work)
745
+ else
746
+ high_quality_resize(work)
747
+ end
748
+ end
749
+ ----
750
+
751
+ == Next Steps
752
+
753
+ After understanding pipeline processing, explore:
754
+
755
+ * **link:../producer_subscriber/README.adoc[Producer-Subscriber]**: Streaming data patterns
756
+ * **link:../scatter_gather/README.adoc[Scatter-Gather]**: Dynamic distribution and collection
757
+ * **link:../workflow/README.adoc[Workflow System]**: Complex multi-path pipelines with branching
758
+ * **link:../hierarchical_hasher/README.adoc[Hierarchical Hasher]**: Map-reduce aggregation patterns