fractor 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +227 -102
- data/README.adoc +113 -1940
- data/docs/.lycheeignore +16 -0
- data/docs/Gemfile +24 -0
- data/docs/README.md +157 -0
- data/docs/_config.yml +151 -0
- data/docs/_features/error-handling.adoc +1192 -0
- data/docs/_features/index.adoc +80 -0
- data/docs/_features/monitoring.adoc +589 -0
- data/docs/_features/signal-handling.adoc +202 -0
- data/docs/_features/workflows.adoc +1235 -0
- data/docs/_guides/continuous-mode.adoc +736 -0
- data/docs/_guides/cookbook.adoc +1133 -0
- data/docs/_guides/index.adoc +55 -0
- data/docs/_guides/pipeline-mode.adoc +730 -0
- data/docs/_guides/troubleshooting.adoc +358 -0
- data/docs/_pages/architecture.adoc +1390 -0
- data/docs/_pages/core-concepts.adoc +1392 -0
- data/docs/_pages/design-principles.adoc +862 -0
- data/docs/_pages/getting-started.adoc +290 -0
- data/docs/_pages/installation.adoc +143 -0
- data/docs/_reference/api.adoc +1080 -0
- data/docs/_reference/error-reporting.adoc +670 -0
- data/docs/_reference/examples.adoc +181 -0
- data/docs/_reference/index.adoc +96 -0
- data/docs/_reference/troubleshooting.adoc +862 -0
- data/docs/_tutorials/complex-workflows.adoc +1022 -0
- data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
- data/docs/_tutorials/first-application.adoc +384 -0
- data/docs/_tutorials/index.adoc +48 -0
- data/docs/_tutorials/long-running-services.adoc +931 -0
- data/docs/assets/images/favicon-16.png +0 -0
- data/docs/assets/images/favicon-32.png +0 -0
- data/docs/assets/images/favicon-48.png +0 -0
- data/docs/assets/images/favicon.ico +0 -0
- data/docs/assets/images/favicon.png +0 -0
- data/docs/assets/images/favicon.svg +45 -0
- data/docs/assets/images/fractor-icon.svg +49 -0
- data/docs/assets/images/fractor-logo.svg +61 -0
- data/docs/index.adoc +131 -0
- data/docs/lychee.toml +39 -0
- data/examples/api_aggregator/README.adoc +627 -0
- data/examples/api_aggregator/api_aggregator.rb +376 -0
- data/examples/auto_detection/README.adoc +407 -29
- data/examples/continuous_chat_common/message_protocol.rb +1 -1
- data/examples/error_reporting.rb +207 -0
- data/examples/file_processor/README.adoc +170 -0
- data/examples/file_processor/file_processor.rb +615 -0
- data/examples/file_processor/sample_files/invalid.csv +1 -0
- data/examples/file_processor/sample_files/orders.xml +24 -0
- data/examples/file_processor/sample_files/products.json +23 -0
- data/examples/file_processor/sample_files/users.csv +6 -0
- data/examples/hierarchical_hasher/README.adoc +629 -41
- data/examples/image_processor/README.adoc +610 -0
- data/examples/image_processor/image_processor.rb +349 -0
- data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
- data/examples/image_processor/test_images/sample_1.png +1 -0
- data/examples/image_processor/test_images/sample_10.png +1 -0
- data/examples/image_processor/test_images/sample_2.png +1 -0
- data/examples/image_processor/test_images/sample_3.png +1 -0
- data/examples/image_processor/test_images/sample_4.png +1 -0
- data/examples/image_processor/test_images/sample_5.png +1 -0
- data/examples/image_processor/test_images/sample_6.png +1 -0
- data/examples/image_processor/test_images/sample_7.png +1 -0
- data/examples/image_processor/test_images/sample_8.png +1 -0
- data/examples/image_processor/test_images/sample_9.png +1 -0
- data/examples/log_analyzer/README.adoc +662 -0
- data/examples/log_analyzer/log_analyzer.rb +579 -0
- data/examples/log_analyzer/sample_logs/apache.log +20 -0
- data/examples/log_analyzer/sample_logs/json.log +15 -0
- data/examples/log_analyzer/sample_logs/nginx.log +15 -0
- data/examples/log_analyzer/sample_logs/rails.log +29 -0
- data/examples/multi_work_type/README.adoc +576 -26
- data/examples/performance_monitoring.rb +120 -0
- data/examples/pipeline_processing/README.adoc +740 -26
- data/examples/pipeline_processing/pipeline_processing.rb +2 -2
- data/examples/priority_work_example.rb +155 -0
- data/examples/producer_subscriber/README.adoc +889 -46
- data/examples/scatter_gather/README.adoc +829 -27
- data/examples/simple/README.adoc +347 -0
- data/examples/specialized_workers/README.adoc +622 -26
- data/examples/specialized_workers/specialized_workers.rb +44 -8
- data/examples/stream_processor/README.adoc +206 -0
- data/examples/stream_processor/stream_processor.rb +284 -0
- data/examples/web_scraper/README.adoc +625 -0
- data/examples/web_scraper/web_scraper.rb +285 -0
- data/examples/workflow/README.adoc +406 -0
- data/examples/workflow/circuit_breaker/README.adoc +360 -0
- data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
- data/examples/workflow/conditional/README.adoc +483 -0
- data/examples/workflow/conditional/conditional_workflow.rb +215 -0
- data/examples/workflow/dead_letter_queue/README.adoc +374 -0
- data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
- data/examples/workflow/fan_out/README.adoc +381 -0
- data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
- data/examples/workflow/retry/README.adoc +248 -0
- data/examples/workflow/retry/retry_workflow.rb +195 -0
- data/examples/workflow/simple_linear/README.adoc +267 -0
- data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
- data/examples/workflow/simplified/README.adoc +329 -0
- data/examples/workflow/simplified/simplified_workflow.rb +222 -0
- data/exe/fractor +10 -0
- data/lib/fractor/cli.rb +288 -0
- data/lib/fractor/configuration.rb +307 -0
- data/lib/fractor/continuous_server.rb +60 -65
- data/lib/fractor/error_formatter.rb +72 -0
- data/lib/fractor/error_report_generator.rb +152 -0
- data/lib/fractor/error_reporter.rb +244 -0
- data/lib/fractor/error_statistics.rb +147 -0
- data/lib/fractor/execution_tracer.rb +162 -0
- data/lib/fractor/logger.rb +230 -0
- data/lib/fractor/main_loop_handler.rb +406 -0
- data/lib/fractor/main_loop_handler3.rb +135 -0
- data/lib/fractor/main_loop_handler4.rb +299 -0
- data/lib/fractor/performance_metrics_collector.rb +181 -0
- data/lib/fractor/performance_monitor.rb +215 -0
- data/lib/fractor/performance_report_generator.rb +202 -0
- data/lib/fractor/priority_work.rb +93 -0
- data/lib/fractor/priority_work_queue.rb +189 -0
- data/lib/fractor/result_aggregator.rb +32 -0
- data/lib/fractor/shutdown_handler.rb +168 -0
- data/lib/fractor/signal_handler.rb +80 -0
- data/lib/fractor/supervisor.rb +382 -269
- data/lib/fractor/supervisor_logger.rb +88 -0
- data/lib/fractor/version.rb +1 -1
- data/lib/fractor/work.rb +12 -0
- data/lib/fractor/work_distribution_manager.rb +151 -0
- data/lib/fractor/work_queue.rb +20 -0
- data/lib/fractor/work_result.rb +181 -9
- data/lib/fractor/worker.rb +73 -0
- data/lib/fractor/workflow/builder.rb +210 -0
- data/lib/fractor/workflow/chain_builder.rb +169 -0
- data/lib/fractor/workflow/circuit_breaker.rb +183 -0
- data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
- data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
- data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
- data/lib/fractor/workflow/execution_hooks.rb +39 -0
- data/lib/fractor/workflow/execution_strategy.rb +225 -0
- data/lib/fractor/workflow/execution_trace.rb +134 -0
- data/lib/fractor/workflow/helpers.rb +191 -0
- data/lib/fractor/workflow/job.rb +290 -0
- data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
- data/lib/fractor/workflow/logger.rb +110 -0
- data/lib/fractor/workflow/pre_execution_context.rb +193 -0
- data/lib/fractor/workflow/retry_config.rb +156 -0
- data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
- data/lib/fractor/workflow/retry_strategy.rb +93 -0
- data/lib/fractor/workflow/structured_logger.rb +30 -0
- data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
- data/lib/fractor/workflow/visualizer.rb +211 -0
- data/lib/fractor/workflow/workflow_context.rb +132 -0
- data/lib/fractor/workflow/workflow_executor.rb +669 -0
- data/lib/fractor/workflow/workflow_result.rb +55 -0
- data/lib/fractor/workflow/workflow_validator.rb +295 -0
- data/lib/fractor/workflow.rb +333 -0
- data/lib/fractor/wrapped_ractor.rb +66 -101
- data/lib/fractor/wrapped_ractor3.rb +161 -0
- data/lib/fractor/wrapped_ractor4.rb +242 -0
- data/lib/fractor.rb +92 -4
- metadata +179 -6
- data/tests/sample.rb.bak +0 -309
- data/tests/sample_working.rb.bak +0 -209
|
@@ -1,44 +1,758 @@
|
|
|
1
1
|
= Pipeline Processing Example
|
|
2
|
+
:toc: macro
|
|
3
|
+
:toc-title: Table of Contents
|
|
4
|
+
:toclevels: 3
|
|
2
5
|
|
|
3
|
-
|
|
6
|
+
toc::[]
|
|
4
7
|
|
|
5
|
-
|
|
8
|
+
== Purpose
|
|
6
9
|
|
|
7
|
-
|
|
10
|
+
The Pipeline Processing example demonstrates how to implement multi-stage processing pipelines using Fractor. It showcases automatic stage progression where work flows through sequential transformations, with each stage feeding its output to the next stage. This is a fundamental pattern for building ETL (Extract, Transform, Load) systems, media processing workflows, and data transformation pipelines.
|
|
8
11
|
|
|
9
|
-
|
|
10
|
-
* *Data Flow*: Information passes through each stage in sequence
|
|
11
|
-
* *Transformation*: Each stage performs a specific operation on the data
|
|
12
|
-
* *Concurrency*: Multiple items can be at different stages of the pipeline simultaneously
|
|
12
|
+
== Focus
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
This example demonstrates:
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
* **Sequential stage processing** with automatic progression
|
|
17
|
+
* **Pipeline orchestration** using callbacks
|
|
18
|
+
* **Metadata propagation** through processing stages
|
|
19
|
+
* **Stage-specific transformations** within a unified worker
|
|
20
|
+
* **Automatic work generation** for downstream stages
|
|
21
|
+
* **Concurrent pipeline execution** for multiple items
|
|
17
22
|
|
|
18
|
-
|
|
19
|
-
2. *Processing Stages*: Data moves through a series of transformations
|
|
20
|
-
3. *Output Stage*: Final results are collected and reported
|
|
23
|
+
== Architecture
|
|
21
24
|
|
|
22
|
-
|
|
25
|
+
=== Pipeline Flow Overview
|
|
23
26
|
|
|
24
|
-
|
|
27
|
+
[source]
|
|
28
|
+
----
|
|
29
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
30
|
+
│ Input Images │
|
|
31
|
+
│ [sunset.jpg, mountains.png, beach.jpg, ...] │
|
|
32
|
+
└──────────────────────────────────────────────────────────────┘
|
|
33
|
+
│
|
|
34
|
+
│ Create initial work items
|
|
35
|
+
▼
|
|
36
|
+
┌───────────────────────────────┐
|
|
37
|
+
│ Stage 1: RESIZE │
|
|
38
|
+
│ MediaWork(data, :resize) │
|
|
39
|
+
└───────────────────────────────┘
|
|
40
|
+
│
|
|
41
|
+
│ PipelineWorker.process
|
|
42
|
+
▼
|
|
43
|
+
┌───────────────────────────────┐
|
|
44
|
+
│ Result: resized image │
|
|
45
|
+
│ next_stage = :filter │
|
|
46
|
+
└───────────────────────────────┘
|
|
47
|
+
│
|
|
48
|
+
│ on_new_result callback
|
|
49
|
+
│ auto-creates next work
|
|
50
|
+
▼
|
|
51
|
+
┌───────────────────────────────┐
|
|
52
|
+
│ Stage 2: FILTER │
|
|
53
|
+
│ MediaWork(resized, :filter) │
|
|
54
|
+
└───────────────────────────────┘
|
|
55
|
+
│
|
|
56
|
+
│ PipelineWorker.process
|
|
57
|
+
▼
|
|
58
|
+
┌───────────────────────────────┐
|
|
59
|
+
│ Result: filtered image │
|
|
60
|
+
│ next_stage = :compress │
|
|
61
|
+
└───────────────────────────────┘
|
|
62
|
+
│
|
|
63
|
+
│ on_new_result callback
|
|
64
|
+
▼
|
|
65
|
+
┌───────────────────────────────┐
|
|
66
|
+
│ Stage 3: COMPRESS │
|
|
67
|
+
│ MediaWork(filtered,:compress)│
|
|
68
|
+
└───────────────────────────────┘
|
|
69
|
+
│
|
|
70
|
+
│ PipelineWorker.process
|
|
71
|
+
▼
|
|
72
|
+
┌───────────────────────────────┐
|
|
73
|
+
│ Result: compressed image │
|
|
74
|
+
│ next_stage = :tag │
|
|
75
|
+
└───────────────────────────────┘
|
|
76
|
+
│
|
|
77
|
+
│ on_new_result callback
|
|
78
|
+
▼
|
|
79
|
+
┌───────────────────────────────┐
|
|
80
|
+
│ Stage 4: TAG │
|
|
81
|
+
│ MediaWork(compressed, :tag) │
|
|
82
|
+
└───────────────────────────────┘
|
|
83
|
+
│
|
|
84
|
+
│ PipelineWorker.process
|
|
85
|
+
▼
|
|
86
|
+
┌───────────────────────────────┐
|
|
87
|
+
│ Result: tagged image │
|
|
88
|
+
│ next_stage = nil (complete) │
|
|
89
|
+
└───────────────────────────────┘
|
|
90
|
+
│
|
|
91
|
+
▼
|
|
92
|
+
┌───────────────────────────────┐
|
|
93
|
+
│ Final Results Collection │
|
|
94
|
+
└───────────────────────────────┘
|
|
95
|
+
----
|
|
96
|
+
|
|
97
|
+
=== Concurrent Pipeline Execution
|
|
98
|
+
|
|
99
|
+
[source]
|
|
100
|
+
----
|
|
101
|
+
Multiple images flowing through the pipeline concurrently:
|
|
102
|
+
|
|
103
|
+
Time →
|
|
104
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
105
|
+
|
|
106
|
+
Image 1: [Resize]─[Filter]─[Compress]─[Tag]─[Done]
|
|
107
|
+
Image 2: [Resize]─[Filter]─[Compress]─[Tag]─[Done]
|
|
108
|
+
Image 3: [Resize]─[Filter]─[Compress]─[Tag]─[Done]
|
|
109
|
+
Image 4: [Resize]─[Filter]─[Compress]─[Tag]─[Done]
|
|
110
|
+
Image 5: [Resize]─[Filter]─[Compress]─[Tag]─[Done]
|
|
111
|
+
|
|
112
|
+
Each image moves through stages independently while workers
|
|
113
|
+
process different images at different stages concurrently.
|
|
114
|
+
----
|
|
115
|
+
|
|
116
|
+
=== Stage Progression Mechanism
|
|
117
|
+
|
|
118
|
+
[source]
|
|
119
|
+
----
|
|
120
|
+
┌─────────────────────────────────────────────────────────┐
|
|
121
|
+
│ PipelineWorker │
|
|
122
|
+
│ │
|
|
123
|
+
│ 1. Receive work with current stage │
|
|
124
|
+
│ 2. Process based on stage (:resize/:filter/etc) │
|
|
125
|
+
│ 3. Determine next stage from sequence │
|
|
126
|
+
│ 4. Return result with next_stage information │
|
|
127
|
+
└─────────────────────────────────────────────────────────┘
|
|
128
|
+
│
|
|
129
|
+
│ Result with next_stage
|
|
130
|
+
▼
|
|
131
|
+
┌─────────────────────────────────────────────────────────┐
|
|
132
|
+
│ ResultAggregator Callback │
|
|
133
|
+
│ │
|
|
134
|
+
│ on_new_result { |result| │
|
|
135
|
+
│ if result.next_stage exists: │
|
|
136
|
+
│ - Create new MediaWork for next stage │
|
|
137
|
+
│ - Copy processed_data as input │
|
|
138
|
+
│ - Preserve and enhance metadata │
|
|
139
|
+
│ - Add work back to supervisor │
|
|
140
|
+
│ } │
|
|
141
|
+
└─────────────────────────────────────────────────────────┘
|
|
142
|
+
│
|
|
143
|
+
│ New work created
|
|
144
|
+
▼
|
|
145
|
+
┌─────────────────────────────────────────────────────────┐
|
|
146
|
+
│ Work Queue (Next Stage) │
|
|
147
|
+
│ Newly created work is queued for processing │
|
|
148
|
+
└─────────────────────────────────────────────────────────┘
|
|
149
|
+
----
|
|
150
|
+
|
|
151
|
+
== Key Components
|
|
152
|
+
|
|
153
|
+
=== MediaWork: Stage-Aware Work Unit
|
|
154
|
+
|
|
155
|
+
The `MediaWork` class carries both data and stage information:
|
|
156
|
+
|
|
157
|
+
[source,ruby]
|
|
158
|
+
----
|
|
159
|
+
class MediaWork < Fractor::Work
|
|
160
|
+
def initialize(data, stage = :resize, metadata = {})
|
|
161
|
+
super({
|
|
162
|
+
data: data, # <1>
|
|
163
|
+
stage: stage, # <2>
|
|
164
|
+
metadata: metadata # <3>
|
|
165
|
+
})
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def stage
|
|
169
|
+
input[:stage]
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
----
|
|
173
|
+
<1> The actual data being processed (image path, processed output, etc.)
|
|
174
|
+
<2> Current processing stage (`:resize`, `:filter`, `:compress`, `:tag`)
|
|
175
|
+
<3> Metadata tracking processing history and timing
|
|
176
|
+
|
|
177
|
+
Why stage information in work items:
|
|
178
|
+
|
|
179
|
+
* **Single worker type** can handle all stages
|
|
180
|
+
* **Dynamic routing** based on current stage
|
|
181
|
+
* **Clear state tracking** throughout pipeline
|
|
182
|
+
* **Enables stage-specific processing** logic
|
|
25
183
|
|
|
26
|
-
|
|
27
|
-
* Concurrent execution of pipeline stages
|
|
28
|
-
* Processing optimizations through specialized workers
|
|
29
|
-
* Handling data flow between processing stages
|
|
184
|
+
=== PipelineWorker: Polymorphic Stage Processor
|
|
30
185
|
|
|
31
|
-
|
|
186
|
+
The `PipelineWorker` handles all stages using a case statement:
|
|
32
187
|
|
|
33
|
-
[source,
|
|
188
|
+
[source,ruby]
|
|
34
189
|
----
|
|
35
|
-
|
|
190
|
+
class PipelineWorker < Fractor::Worker
|
|
191
|
+
def process(work)
|
|
192
|
+
result = case work.stage # <1>
|
|
193
|
+
when :resize then process_resize(work)
|
|
194
|
+
when :filter then process_filter(work)
|
|
195
|
+
when :compress then process_compress(work)
|
|
196
|
+
when :tag then process_tag(work)
|
|
197
|
+
else
|
|
198
|
+
return Fractor::WorkResult.new(
|
|
199
|
+
error: "Unknown stage: #{work.stage}",
|
|
200
|
+
work: work
|
|
201
|
+
)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
stages = [:resize, :filter, :compress, :tag] # <2>
|
|
205
|
+
current_index = stages.index(work.stage)
|
|
206
|
+
next_stage = current_index < stages.size - 1 ? stages[current_index + 1] : nil # <3>
|
|
207
|
+
|
|
208
|
+
updated_metadata = work.metadata.merge( # <4>
|
|
209
|
+
"#{work.stage}_completed" => true,
|
|
210
|
+
"#{work.stage}_time" => Time.now.to_s
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
Fractor::WorkResult.new(
|
|
214
|
+
result: {
|
|
215
|
+
processed_data: result, # <5>
|
|
216
|
+
current_stage: work.stage,
|
|
217
|
+
next_stage: next_stage,
|
|
218
|
+
metadata: updated_metadata
|
|
219
|
+
},
|
|
220
|
+
work: work
|
|
221
|
+
)
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
----
|
|
225
|
+
<1> Route to appropriate processing method based on stage
|
|
226
|
+
<2> Define the complete pipeline stage sequence
|
|
227
|
+
<3> Calculate the next stage (or `nil` if at the end)
|
|
228
|
+
<4> Augment metadata with completion tracking
|
|
229
|
+
<5> Package result with stage progression information
|
|
230
|
+
|
|
231
|
+
Design benefits:
|
|
232
|
+
|
|
233
|
+
* **Centralized stage logic**: All stages in one worker class
|
|
234
|
+
* **Sequential guarantee**: Explicit stage ordering
|
|
235
|
+
* **Metadata enrichment**: Each stage adds its completion info
|
|
236
|
+
* **Clear termination**: `nil` next_stage signals completion
|
|
237
|
+
|
|
238
|
+
=== MediaPipeline: Automatic Stage Progression
|
|
239
|
+
|
|
240
|
+
The `MediaPipeline` orchestrates automatic work flow:
|
|
241
|
+
|
|
242
|
+
[source,ruby]
|
|
243
|
+
----
|
|
244
|
+
class MediaPipeline
|
|
245
|
+
def initialize(worker_count = 4)
|
|
246
|
+
@supervisor = Fractor::Supervisor.new(
|
|
247
|
+
worker_pools: [
|
|
248
|
+
{ worker_class: PipelineWorker, num_workers: worker_count }
|
|
249
|
+
]
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
@supervisor.results.on_new_result do |result| # <1>
|
|
253
|
+
next_stage = result.result[:next_stage]
|
|
254
|
+
|
|
255
|
+
if next_stage # <2>
|
|
256
|
+
new_work = MediaWork.new(
|
|
257
|
+
result.result[:processed_data], # <3>
|
|
258
|
+
next_stage, # <4>
|
|
259
|
+
result.result[:metadata] # <5>
|
|
260
|
+
)
|
|
261
|
+
@supervisor.add_work_item(new_work) # <6>
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def process_images(images)
|
|
267
|
+
initial_work_items = images.map do |image| # <7>
|
|
268
|
+
MediaWork.new(image, :resize, {
|
|
269
|
+
original_filename: image,
|
|
270
|
+
started_at: Time.now.to_s
|
|
271
|
+
})
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
@supervisor.add_work_items(initial_work_items)
|
|
275
|
+
@supervisor.run # <8>
|
|
276
|
+
|
|
277
|
+
# Collect completed results (where next_stage is nil)
|
|
278
|
+
@supervisor.results.results.each do |result|
|
|
279
|
+
if result.result[:next_stage].nil? # <9>
|
|
280
|
+
@results[:completed] << result.result
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
end
|
|
36
285
|
----
|
|
286
|
+
<1> Register callback for each completed work item
|
|
287
|
+
<2> Check if there's another stage to process
|
|
288
|
+
<3> Use processed output as input for next stage
|
|
289
|
+
<4> Progress to the next stage in the pipeline
|
|
290
|
+
<5> Carry forward the metadata chain
|
|
291
|
+
<6> Dynamically add new work to the supervisor
|
|
292
|
+
<7> Create initial work items (all start at `:resize`)
|
|
293
|
+
<8> Execute pipeline (processes all stages automatically)
|
|
294
|
+
<9> Identify completed items (those that have finished all stages)
|
|
295
|
+
|
|
296
|
+
Pipeline orchestration features:
|
|
297
|
+
|
|
298
|
+
* **Callback-driven**: Each result triggers next stage creation
|
|
299
|
+
* **Dynamic work injection**: New work added during execution
|
|
300
|
+
* **Automatic progression**: No manual stage management needed
|
|
301
|
+
* **Completion detection**: Tracks which items finished all stages
|
|
302
|
+
|
|
303
|
+
== Usage
|
|
304
|
+
|
|
305
|
+
.Basic usage
|
|
306
|
+
[example]
|
|
307
|
+
====
|
|
308
|
+
[source,bash]
|
|
309
|
+
----
|
|
310
|
+
# Run the pipeline processing example
|
|
311
|
+
ruby pipeline_processing.rb
|
|
312
|
+
----
|
|
313
|
+
====
|
|
314
|
+
|
|
315
|
+
.Programmatic usage
|
|
316
|
+
[example]
|
|
317
|
+
====
|
|
318
|
+
[source,ruby]
|
|
319
|
+
----
|
|
320
|
+
require_relative "pipeline_processing"
|
|
321
|
+
|
|
322
|
+
# Create pipeline with 8 workers
|
|
323
|
+
pipeline = PipelineProcessing::MediaPipeline.new(8)
|
|
324
|
+
|
|
325
|
+
# Process a batch of images
|
|
326
|
+
images = ["photo1.jpg", "photo2.jpg", "photo3.jpg"]
|
|
327
|
+
result = pipeline.process_images(images)
|
|
328
|
+
|
|
329
|
+
puts "Completed: #{result[:completed]} images"
|
|
330
|
+
result[:results].each do |image_result|
|
|
331
|
+
puts "Processed: #{image_result[:processed_data]}"
|
|
332
|
+
puts "Metadata: #{image_result[:metadata]}"
|
|
333
|
+
end
|
|
334
|
+
----
|
|
335
|
+
====
|
|
37
336
|
|
|
38
337
|
== Expected Output
|
|
39
338
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
339
|
+
[source,text]
|
|
340
|
+
----
|
|
341
|
+
Starting Pipeline Processing Example
|
|
342
|
+
=====================================
|
|
343
|
+
This example demonstrates a media processing pipeline with multiple stages:
|
|
344
|
+
1. Resize - Adjusts image dimensions
|
|
345
|
+
2. Filter - Applies visual filters
|
|
346
|
+
3. Compress - Optimizes file size
|
|
347
|
+
4. Tag - Analyzes and adds metadata tags
|
|
348
|
+
|
|
349
|
+
Processing 5 images with 4 workers...
|
|
350
|
+
|
|
351
|
+
Pipeline Results:
|
|
352
|
+
----------------
|
|
353
|
+
Total images: 5
|
|
354
|
+
Completed: 5
|
|
355
|
+
In progress: 0
|
|
356
|
+
|
|
357
|
+
Processed Images:
|
|
358
|
+
Image 1: Tagged image: Compressed image: Applied vibrance filter to: Resized image: sunset.jpg (1024x768) (reduced by 45%) (tags: landscape, nature)
|
|
359
|
+
Processing path:
|
|
360
|
+
resize_completed: true
|
|
361
|
+
resize_time: 2025-10-18 07:30:15 +0800
|
|
362
|
+
filter_completed: true
|
|
363
|
+
filter_time: 2025-10-18 07:30:16 +0800
|
|
364
|
+
compress_completed: true
|
|
365
|
+
compress_time: 2025-10-18 07:30:17 +0800
|
|
366
|
+
tag_completed: true
|
|
367
|
+
tag_time: 2025-10-18 07:30:18 +0800
|
|
368
|
+
|
|
369
|
+
Image 2: Tagged image: Compressed image: Applied grayscale filter to: ...
|
|
370
|
+
...
|
|
371
|
+
|
|
372
|
+
Processing completed in 0.456789 seconds
|
|
373
|
+
----
|
|
374
|
+
|
|
375
|
+
== Learning Points
|
|
376
|
+
|
|
377
|
+
=== 1. Callback-Driven Pipeline Progression
|
|
378
|
+
|
|
379
|
+
The example uses callbacks to automatically create next-stage work:
|
|
380
|
+
|
|
381
|
+
[source,ruby]
|
|
382
|
+
----
|
|
383
|
+
@supervisor.results.on_new_result do |result|
|
|
384
|
+
next_stage = result.result[:next_stage]
|
|
385
|
+
|
|
386
|
+
if next_stage
|
|
387
|
+
# Automatically create and queue next stage work
|
|
388
|
+
new_work = MediaWork.new(
|
|
389
|
+
result.result[:processed_data],
|
|
390
|
+
next_stage,
|
|
391
|
+
result.result[:metadata]
|
|
392
|
+
)
|
|
393
|
+
@supervisor.add_work_item(new_work)
|
|
394
|
+
end
|
|
395
|
+
end
|
|
396
|
+
----
|
|
397
|
+
|
|
398
|
+
**Key insight**: The supervisor continues running while the callback adds new work, enabling seamless stage transitions.
|
|
399
|
+
|
|
400
|
+
=== 2. Stage Sequencing
|
|
401
|
+
|
|
402
|
+
The pipeline defines a fixed stage sequence:
|
|
403
|
+
|
|
404
|
+
[source,ruby]
|
|
405
|
+
----
|
|
406
|
+
stages = [:resize, :filter, :compress, :tag]
|
|
407
|
+
current_index = stages.index(work.stage)
|
|
408
|
+
next_stage = current_index < stages.size - 1 ? stages[current_index + 1] : nil
|
|
409
|
+
----
|
|
410
|
+
|
|
411
|
+
**Alternatives**:
|
|
412
|
+
|
|
413
|
+
* **Hash-based routing**: `NEXT_STAGE = { resize: :filter, filter: :compress, ... }`
|
|
414
|
+
* **State machine**: Use a formal state transition table
|
|
415
|
+
* **Dynamic routing**: Determine next stage based on data content
|
|
416
|
+
|
|
417
|
+
=== 3. Metadata Propagation
|
|
418
|
+
|
|
419
|
+
Each stage enriches metadata:
|
|
420
|
+
|
|
421
|
+
[source,ruby]
|
|
422
|
+
----
|
|
423
|
+
updated_metadata = work.metadata.merge(
|
|
424
|
+
"#{work.stage}_completed" => true,
|
|
425
|
+
"#{work.stage}_time" => Time.now.to_s
|
|
426
|
+
)
|
|
427
|
+
----
|
|
428
|
+
|
|
429
|
+
**Benefits**:
|
|
430
|
+
|
|
431
|
+
* **Complete audit trail**: Track when each stage completed
|
|
432
|
+
* **Performance analysis**: Measure per-stage processing time
|
|
433
|
+
* **Debugging support**: Identify which stage caused issues
|
|
434
|
+
* **Lineage tracking**: Full data processing history
|
|
435
|
+
|
|
436
|
+
=== 4. Pipeline vs. Workflow
|
|
437
|
+
|
|
438
|
+
**Pipeline Pattern** (this example):
|
|
439
|
+
* Fixed stage sequence
|
|
440
|
+
* Automatic progression
|
|
441
|
+
* All items follow same path
|
|
442
|
+
* Simple, linear flow
|
|
443
|
+
|
|
444
|
+
**Workflow Pattern** (see workflow examples):
|
|
445
|
+
* Flexible stage dependencies
|
|
446
|
+
* Conditional branching
|
|
447
|
+
* Parallel execution paths
|
|
448
|
+
* Complex orchestration
|
|
449
|
+
|
|
450
|
+
Choose pipelines for:
|
|
451
|
+
|
|
452
|
+
* ETL processes
|
|
453
|
+
* Media transformations
|
|
454
|
+
* Data validation chains
|
|
455
|
+
* Sequential transformations
|
|
456
|
+
|
|
457
|
+
=== 5. Error Handling in Pipelines
|
|
458
|
+
|
|
459
|
+
Current implementation processes all items. For production:
|
|
460
|
+
|
|
461
|
+
[source,ruby]
|
|
462
|
+
----
|
|
463
|
+
def process(work)
|
|
464
|
+
begin
|
|
465
|
+
result = case work.stage
|
|
466
|
+
when :resize then process_resize(work)
|
|
467
|
+
# ... other stages
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
# Return success with next stage
|
|
471
|
+
Fractor::WorkResult.new(result: { ... })
|
|
472
|
+
|
|
473
|
+
rescue StandardError => e
|
|
474
|
+
# Return error, stops this item's pipeline
|
|
475
|
+
Fractor::WorkResult.new(
|
|
476
|
+
error: "Stage #{work.stage} failed: #{e.message}",
|
|
477
|
+
work: work
|
|
478
|
+
)
|
|
479
|
+
end
|
|
480
|
+
end
|
|
481
|
+
----
|
|
482
|
+
|
|
483
|
+
**Error strategies**:
|
|
484
|
+
|
|
485
|
+
* **Fail-fast**: Stop item at failed stage
|
|
486
|
+
* **Retry**: Attempt stage again (add retry count to metadata)
|
|
487
|
+
* **Skip**: Continue to next stage with error flag
|
|
488
|
+
* **Rollback**: Undo previous stages (if possible)
|
|
489
|
+
|
|
490
|
+
=== 6. Performance Characteristics
|
|
491
|
+
|
|
492
|
+
**Throughput analysis**:
|
|
493
|
+
|
|
494
|
+
[source]
|
|
495
|
+
----
|
|
496
|
+
Total stages: 4
|
|
497
|
+
Items: 5
|
|
498
|
+
Workers: 4
|
|
499
|
+
|
|
500
|
+
Time per stage: 0.03s (average)
|
|
501
|
+
Sequential time: 5 items × 4 stages × 0.03s = 0.6s
|
|
502
|
+
|
|
503
|
+
With pipelining:
|
|
504
|
+
- Initial fill: 4 stages × 0.03s = 0.12s
|
|
505
|
+
- Steady state: 5 items × 0.03s = 0.15s
|
|
506
|
+
- Total: ~0.27s (2.2x speedup)
|
|
507
|
+
|
|
508
|
+
Actual speedup depends on stage duration variance and worker count.
|
|
509
|
+
----
|
|
510
|
+
|
|
511
|
+
== Use Cases and Patterns
|
|
512
|
+
|
|
513
|
+
=== ETL Pipeline
|
|
514
|
+
|
|
515
|
+
Extract, Transform, Load data processing:
|
|
516
|
+
|
|
517
|
+
[source,ruby]
|
|
518
|
+
----
|
|
519
|
+
stages = [:extract, :validate, :transform, :enrich, :load]
|
|
520
|
+
|
|
521
|
+
def process(work)
|
|
522
|
+
result = case work.stage
|
|
523
|
+
when :extract then read_from_source(work)
|
|
524
|
+
when :validate then check_data_quality(work)
|
|
525
|
+
when :transform then apply_business_rules(work)
|
|
526
|
+
when :enrich then add_derived_fields(work)
|
|
527
|
+
when :load then write_to_destination(work)
|
|
528
|
+
end
|
|
529
|
+
# ... next stage logic
|
|
530
|
+
end
|
|
531
|
+
----
|
|
532
|
+
|
|
533
|
+
=== Media Processing Pipeline
|
|
534
|
+
|
|
535
|
+
Video/image processing workflow:
|
|
536
|
+
|
|
537
|
+
[source,ruby]
|
|
538
|
+
----
|
|
539
|
+
stages = [:transcode, :watermark, :thumbnail, :upload]
|
|
540
|
+
|
|
541
|
+
def process(work)
|
|
542
|
+
result = case work.stage
|
|
543
|
+
when :transcode then convert_format(work)
|
|
544
|
+
when :watermark then apply_branding(work)
|
|
545
|
+
when :thumbnail then generate_previews(work)
|
|
546
|
+
when :upload then store_in_cdn(work)
|
|
547
|
+
end
|
|
548
|
+
# ... next stage logic
|
|
549
|
+
end
|
|
550
|
+
----
|
|
551
|
+
|
|
552
|
+
=== Data Validation Chain
|
|
553
|
+
|
|
554
|
+
Multi-stage data validation:
|
|
555
|
+
|
|
556
|
+
[source,ruby]
|
|
557
|
+
----
|
|
558
|
+
stages = [:format_check, :schema_validation, :business_rules, :duplicate_check]
|
|
559
|
+
|
|
560
|
+
def process(work)
|
|
561
|
+
result = case work.stage
|
|
562
|
+
when :format_check then validate_file_format(work)
|
|
563
|
+
when :schema_validation then check_against_schema(work)
|
|
564
|
+
when :business_rules then apply_domain_rules(work)
|
|
565
|
+
when :duplicate_check then find_duplicates(work)
|
|
566
|
+
end
|
|
567
|
+
# ... next stage logic
|
|
568
|
+
end
|
|
569
|
+
----
|
|
570
|
+
|
|
571
|
+
=== Document Processing
|
|
572
|
+
|
|
573
|
+
Multi-step document transformation:
|
|
574
|
+
|
|
575
|
+
[source,ruby]
|
|
576
|
+
----
|
|
577
|
+
stages = [:parse, :extract_text, :classify, :index]
|
|
578
|
+
|
|
579
|
+
def process(work)
|
|
580
|
+
result = case work.stage
|
|
581
|
+
when :parse then parse_pdf(work)
|
|
582
|
+
when :extract_text then ocr_if_needed(work)
|
|
583
|
+
when :classify then categorize_document(work)
|
|
584
|
+
when :index then add_to_search_engine(work)
|
|
585
|
+
end
|
|
586
|
+
# ... next stage logic
|
|
587
|
+
end
|
|
588
|
+
----
|
|
589
|
+
|
|
590
|
+
== Advanced Patterns
|
|
591
|
+
|
|
592
|
+
=== Conditional Stages
|
|
593
|
+
|
|
594
|
+
Add stage skipping logic:
|
|
595
|
+
|
|
596
|
+
[source,ruby]
|
|
597
|
+
----
|
|
598
|
+
def determine_next_stage(work, result)
|
|
599
|
+
case work.stage
|
|
600
|
+
when :validate
|
|
601
|
+
result[:valid] ? :transform : :error_queue
|
|
602
|
+
when :transform
|
|
603
|
+
result[:needs_enrichment] ? :enrich : :load
|
|
604
|
+
else
|
|
605
|
+
# Default sequential progression
|
|
606
|
+
stages[stages.index(work.stage) + 1]
|
|
607
|
+
end
|
|
608
|
+
end
|
|
609
|
+
----
|
|
610
|
+
|
|
611
|
+
=== Parallel Sub-Pipelines
|
|
612
|
+
|
|
613
|
+
Fork into parallel processing paths:
|
|
614
|
+
|
|
615
|
+
[source,ruby]
|
|
616
|
+
----
|
|
617
|
+
@supervisor.results.on_new_result do |result|
|
|
618
|
+
if result.result[:current_stage] == :split
|
|
619
|
+
# Create multiple parallel work items
|
|
620
|
+
result.result[:chunks].each do |chunk|
|
|
621
|
+
@supervisor.add_work_item(
|
|
622
|
+
MediaWork.new(chunk, :process_chunk, result.result[:metadata])
|
|
623
|
+
)
|
|
624
|
+
end
|
|
625
|
+
elsif result.result[:current_stage] == :process_chunk
|
|
626
|
+
# Collect for merging
|
|
627
|
+
@completed_chunks << result
|
|
628
|
+
if all_chunks_complete?
|
|
629
|
+
@supervisor.add_work_item(
|
|
630
|
+
MediaWork.new(merged_data, :merge, metadata)
|
|
631
|
+
)
|
|
632
|
+
end
|
|
633
|
+
else
|
|
634
|
+
# Normal progression
|
|
635
|
+
# ...
|
|
636
|
+
end
|
|
637
|
+
end
|
|
638
|
+
----
|
|
639
|
+
|
|
640
|
+
=== Stage Retry Logic
|
|
641
|
+
|
|
642
|
+
Add automatic retry for failed stages:
|
|
643
|
+
|
|
644
|
+
[source,ruby]
|
|
645
|
+
----
|
|
646
|
+
MAX_RETRIES = 3
|
|
647
|
+
|
|
648
|
+
def process(work)
|
|
649
|
+
retry_count = work.metadata["#{work.stage}_retries"] || 0
|
|
650
|
+
|
|
651
|
+
begin
|
|
652
|
+
result = execute_stage(work)
|
|
653
|
+
Fractor::WorkResult.new(result: { ... })
|
|
654
|
+
|
|
655
|
+
rescue StandardError => e
|
|
656
|
+
if retry_count < MAX_RETRIES
|
|
657
|
+
# Retry same stage
|
|
658
|
+
@supervisor.add_work_item(
|
|
659
|
+
MediaWork.new(
|
|
660
|
+
work.data,
|
|
661
|
+
work.stage, # Same stage
|
|
662
|
+
work.metadata.merge("#{work.stage}_retries" => retry_count + 1)
|
|
663
|
+
)
|
|
664
|
+
)
|
|
665
|
+
Fractor::WorkResult.new(result: { retrying: true })
|
|
666
|
+
else
|
|
667
|
+
# Max retries exceeded
|
|
668
|
+
Fractor::WorkResult.new(error: "Failed after #{retry_count} retries: #{e}")
|
|
669
|
+
end
|
|
670
|
+
end
|
|
671
|
+
end
|
|
672
|
+
----
|
|
673
|
+
|
|
674
|
+
=== Pipeline Monitoring
|
|
675
|
+
|
|
676
|
+
Add detailed progress tracking:
|
|
677
|
+
|
|
678
|
+
[source,ruby]
|
|
679
|
+
----
|
|
680
|
+
def initialize(worker_count = 4)
|
|
681
|
+
@stage_metrics = Hash.new { |h, k| h[k] = { count: 0, total_time: 0 } }
|
|
682
|
+
|
|
683
|
+
@supervisor.results.on_new_result do |result|
|
|
684
|
+
stage = result.result[:current_stage]
|
|
685
|
+
duration = calculate_duration(result)
|
|
686
|
+
|
|
687
|
+
@stage_metrics[stage][:count] += 1
|
|
688
|
+
@stage_metrics[stage][:total_time] += duration
|
|
689
|
+
|
|
690
|
+
log_progress(result)
|
|
691
|
+
create_next_stage_work(result)
|
|
692
|
+
end
|
|
693
|
+
end
|
|
694
|
+
|
|
695
|
+
def print_metrics
|
|
696
|
+
@stage_metrics.each do |stage, metrics|
|
|
697
|
+
avg_time = metrics[:total_time] / metrics[:count]
|
|
698
|
+
puts "#{stage}: #{metrics[:count]} items, avg #{avg_time}s"
|
|
699
|
+
end
|
|
700
|
+
end
|
|
701
|
+
----
|
|
702
|
+
|
|
703
|
+
== Performance Tuning
|
|
704
|
+
|
|
705
|
+
=== Worker Pool Sizing
|
|
706
|
+
|
|
707
|
+
[source,ruby]
|
|
708
|
+
----
|
|
709
|
+
# For CPU-bound stages (encoding, compression)
|
|
710
|
+
worker_count = Etc.nprocessors
|
|
711
|
+
|
|
712
|
+
# For I/O-bound stages (reading, writing)
|
|
713
|
+
worker_count = Etc.nprocessors * 2
|
|
714
|
+
|
|
715
|
+
# For mixed workloads
|
|
716
|
+
worker_count = (Etc.nprocessors * 1.5).to_i
|
|
717
|
+
----
|
|
718
|
+
|
|
719
|
+
=== Batching for Efficiency
|
|
720
|
+
|
|
721
|
+
Process multiple items per work unit:
|
|
722
|
+
|
|
723
|
+
[source,ruby]
|
|
724
|
+
----
|
|
725
|
+
def process_images(images)
|
|
726
|
+
batch_size = 10
|
|
727
|
+
batches = images.each_slice(batch_size).to_a
|
|
728
|
+
|
|
729
|
+
batches.map do |batch|
|
|
730
|
+
MediaWork.new(batch, :resize_batch, { batch_size: batch.size })
|
|
731
|
+
end
|
|
732
|
+
end
|
|
733
|
+
----
|
|
734
|
+
|
|
735
|
+
=== Stage-Specific Optimization
|
|
736
|
+
|
|
737
|
+
Optimize each stage individually:
|
|
738
|
+
|
|
739
|
+
[source,ruby]
|
|
740
|
+
----
|
|
741
|
+
def process_resize(work)
|
|
742
|
+
# Use faster resize for small images
|
|
743
|
+
if work.data.size < 1_000_000
|
|
744
|
+
quick_resize(work)
|
|
745
|
+
else
|
|
746
|
+
high_quality_resize(work)
|
|
747
|
+
end
|
|
748
|
+
end
|
|
749
|
+
----
|
|
750
|
+
|
|
751
|
+
== Next Steps
|
|
752
|
+
|
|
753
|
+
After understanding pipeline processing, explore:
|
|
754
|
+
|
|
755
|
+
* **link:../producer_subscriber/README.adoc[Producer-Subscriber]**: Streaming data patterns
|
|
756
|
+
* **link:../scatter_gather/README.adoc[Scatter-Gather]**: Dynamic distribution and collection
|
|
757
|
+
* **link:../workflow/README.adoc[Workflow System]**: Complex multi-path pipelines with branching
|
|
758
|
+
* **link:../hierarchical_hasher/README.adoc[Hierarchical Hasher]**: Map-reduce aggregation patterns
|