fractor 0.1.4 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-main-ci-rubocop-yml +552 -0
- data/.rubocop.yml +14 -8
- data/.rubocop_todo.yml +284 -43
- data/README.adoc +111 -950
- data/docs/.lycheeignore +16 -0
- data/docs/Gemfile +24 -0
- data/docs/README.md +157 -0
- data/docs/_config.yml +151 -0
- data/docs/_features/error-handling.adoc +1192 -0
- data/docs/_features/index.adoc +80 -0
- data/docs/_features/monitoring.adoc +589 -0
- data/docs/_features/signal-handling.adoc +202 -0
- data/docs/_features/workflows.adoc +1235 -0
- data/docs/_guides/continuous-mode.adoc +736 -0
- data/docs/_guides/cookbook.adoc +1133 -0
- data/docs/_guides/index.adoc +55 -0
- data/docs/_guides/pipeline-mode.adoc +730 -0
- data/docs/_guides/troubleshooting.adoc +358 -0
- data/docs/_pages/architecture.adoc +1390 -0
- data/docs/_pages/core-concepts.adoc +1392 -0
- data/docs/_pages/design-principles.adoc +862 -0
- data/docs/_pages/getting-started.adoc +290 -0
- data/docs/_pages/installation.adoc +143 -0
- data/docs/_reference/api.adoc +1080 -0
- data/docs/_reference/error-reporting.adoc +670 -0
- data/docs/_reference/examples.adoc +181 -0
- data/docs/_reference/index.adoc +96 -0
- data/docs/_reference/troubleshooting.adoc +862 -0
- data/docs/_tutorials/complex-workflows.adoc +1022 -0
- data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
- data/docs/_tutorials/first-application.adoc +384 -0
- data/docs/_tutorials/index.adoc +48 -0
- data/docs/_tutorials/long-running-services.adoc +931 -0
- data/docs/assets/images/favicon-16.png +0 -0
- data/docs/assets/images/favicon-32.png +0 -0
- data/docs/assets/images/favicon-48.png +0 -0
- data/docs/assets/images/favicon.ico +0 -0
- data/docs/assets/images/favicon.png +0 -0
- data/docs/assets/images/favicon.svg +45 -0
- data/docs/assets/images/fractor-icon.svg +49 -0
- data/docs/assets/images/fractor-logo.svg +61 -0
- data/docs/index.adoc +131 -0
- data/docs/lychee.toml +39 -0
- data/examples/api_aggregator/README.adoc +627 -0
- data/examples/api_aggregator/api_aggregator.rb +376 -0
- data/examples/auto_detection/README.adoc +407 -29
- data/examples/auto_detection/auto_detection.rb +9 -9
- data/examples/continuous_chat_common/message_protocol.rb +53 -0
- data/examples/continuous_chat_fractor/README.adoc +217 -0
- data/examples/continuous_chat_fractor/chat_client.rb +303 -0
- data/examples/continuous_chat_fractor/chat_common.rb +83 -0
- data/examples/continuous_chat_fractor/chat_server.rb +167 -0
- data/examples/continuous_chat_fractor/simulate.rb +345 -0
- data/examples/continuous_chat_server/README.adoc +135 -0
- data/examples/continuous_chat_server/chat_client.rb +303 -0
- data/examples/continuous_chat_server/chat_server.rb +359 -0
- data/examples/continuous_chat_server/simulate.rb +343 -0
- data/examples/error_reporting.rb +207 -0
- data/examples/file_processor/README.adoc +170 -0
- data/examples/file_processor/file_processor.rb +615 -0
- data/examples/file_processor/sample_files/invalid.csv +1 -0
- data/examples/file_processor/sample_files/orders.xml +24 -0
- data/examples/file_processor/sample_files/products.json +23 -0
- data/examples/file_processor/sample_files/users.csv +6 -0
- data/examples/hierarchical_hasher/README.adoc +629 -41
- data/examples/hierarchical_hasher/hierarchical_hasher.rb +12 -8
- data/examples/image_processor/README.adoc +610 -0
- data/examples/image_processor/image_processor.rb +349 -0
- data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
- data/examples/image_processor/test_images/sample_1.png +1 -0
- data/examples/image_processor/test_images/sample_10.png +1 -0
- data/examples/image_processor/test_images/sample_2.png +1 -0
- data/examples/image_processor/test_images/sample_3.png +1 -0
- data/examples/image_processor/test_images/sample_4.png +1 -0
- data/examples/image_processor/test_images/sample_5.png +1 -0
- data/examples/image_processor/test_images/sample_6.png +1 -0
- data/examples/image_processor/test_images/sample_7.png +1 -0
- data/examples/image_processor/test_images/sample_8.png +1 -0
- data/examples/image_processor/test_images/sample_9.png +1 -0
- data/examples/log_analyzer/README.adoc +662 -0
- data/examples/log_analyzer/log_analyzer.rb +579 -0
- data/examples/log_analyzer/sample_logs/apache.log +20 -0
- data/examples/log_analyzer/sample_logs/json.log +15 -0
- data/examples/log_analyzer/sample_logs/nginx.log +15 -0
- data/examples/log_analyzer/sample_logs/rails.log +29 -0
- data/examples/multi_work_type/README.adoc +576 -26
- data/examples/multi_work_type/multi_work_type.rb +30 -29
- data/examples/performance_monitoring.rb +120 -0
- data/examples/pipeline_processing/README.adoc +740 -26
- data/examples/pipeline_processing/pipeline_processing.rb +16 -16
- data/examples/priority_work_example.rb +155 -0
- data/examples/producer_subscriber/README.adoc +889 -46
- data/examples/producer_subscriber/producer_subscriber.rb +20 -16
- data/examples/scatter_gather/README.adoc +829 -27
- data/examples/scatter_gather/scatter_gather.rb +29 -28
- data/examples/simple/README.adoc +347 -0
- data/examples/simple/sample.rb +5 -5
- data/examples/specialized_workers/README.adoc +622 -26
- data/examples/specialized_workers/specialized_workers.rb +88 -45
- data/examples/stream_processor/README.adoc +206 -0
- data/examples/stream_processor/stream_processor.rb +284 -0
- data/examples/web_scraper/README.adoc +625 -0
- data/examples/web_scraper/web_scraper.rb +285 -0
- data/examples/workflow/README.adoc +406 -0
- data/examples/workflow/circuit_breaker/README.adoc +360 -0
- data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
- data/examples/workflow/conditional/README.adoc +483 -0
- data/examples/workflow/conditional/conditional_workflow.rb +215 -0
- data/examples/workflow/dead_letter_queue/README.adoc +374 -0
- data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
- data/examples/workflow/fan_out/README.adoc +381 -0
- data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
- data/examples/workflow/retry/README.adoc +248 -0
- data/examples/workflow/retry/retry_workflow.rb +195 -0
- data/examples/workflow/simple_linear/README.adoc +267 -0
- data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
- data/examples/workflow/simplified/README.adoc +329 -0
- data/examples/workflow/simplified/simplified_workflow.rb +222 -0
- data/exe/fractor +10 -0
- data/lib/fractor/cli.rb +288 -0
- data/lib/fractor/configuration.rb +307 -0
- data/lib/fractor/continuous_server.rb +183 -0
- data/lib/fractor/error_formatter.rb +72 -0
- data/lib/fractor/error_report_generator.rb +152 -0
- data/lib/fractor/error_reporter.rb +244 -0
- data/lib/fractor/error_statistics.rb +147 -0
- data/lib/fractor/execution_tracer.rb +162 -0
- data/lib/fractor/logger.rb +230 -0
- data/lib/fractor/main_loop_handler.rb +406 -0
- data/lib/fractor/main_loop_handler3.rb +135 -0
- data/lib/fractor/main_loop_handler4.rb +299 -0
- data/lib/fractor/performance_metrics_collector.rb +181 -0
- data/lib/fractor/performance_monitor.rb +215 -0
- data/lib/fractor/performance_report_generator.rb +202 -0
- data/lib/fractor/priority_work.rb +93 -0
- data/lib/fractor/priority_work_queue.rb +189 -0
- data/lib/fractor/result_aggregator.rb +33 -1
- data/lib/fractor/shutdown_handler.rb +168 -0
- data/lib/fractor/signal_handler.rb +80 -0
- data/lib/fractor/supervisor.rb +430 -144
- data/lib/fractor/supervisor_logger.rb +88 -0
- data/lib/fractor/version.rb +1 -1
- data/lib/fractor/work.rb +12 -0
- data/lib/fractor/work_distribution_manager.rb +151 -0
- data/lib/fractor/work_queue.rb +88 -0
- data/lib/fractor/work_result.rb +181 -9
- data/lib/fractor/worker.rb +75 -1
- data/lib/fractor/workflow/builder.rb +210 -0
- data/lib/fractor/workflow/chain_builder.rb +169 -0
- data/lib/fractor/workflow/circuit_breaker.rb +183 -0
- data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
- data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
- data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
- data/lib/fractor/workflow/execution_hooks.rb +39 -0
- data/lib/fractor/workflow/execution_strategy.rb +225 -0
- data/lib/fractor/workflow/execution_trace.rb +134 -0
- data/lib/fractor/workflow/helpers.rb +191 -0
- data/lib/fractor/workflow/job.rb +290 -0
- data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
- data/lib/fractor/workflow/logger.rb +110 -0
- data/lib/fractor/workflow/pre_execution_context.rb +193 -0
- data/lib/fractor/workflow/retry_config.rb +156 -0
- data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
- data/lib/fractor/workflow/retry_strategy.rb +93 -0
- data/lib/fractor/workflow/structured_logger.rb +30 -0
- data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
- data/lib/fractor/workflow/visualizer.rb +211 -0
- data/lib/fractor/workflow/workflow_context.rb +132 -0
- data/lib/fractor/workflow/workflow_executor.rb +669 -0
- data/lib/fractor/workflow/workflow_result.rb +55 -0
- data/lib/fractor/workflow/workflow_validator.rb +295 -0
- data/lib/fractor/workflow.rb +333 -0
- data/lib/fractor/wrapped_ractor.rb +66 -91
- data/lib/fractor/wrapped_ractor3.rb +161 -0
- data/lib/fractor/wrapped_ractor4.rb +242 -0
- data/lib/fractor.rb +93 -3
- metadata +192 -6
- data/tests/sample.rb.bak +0 -309
- data/tests/sample_working.rb.bak +0 -209
|
@@ -0,0 +1,1235 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: default
|
|
3
|
+
title: Workflows
|
|
4
|
+
nav_order: 2
|
|
5
|
+
---
|
|
6
|
+
== Workflows
|
|
7
|
+
|
|
8
|
+
Fractor provides a declarative workflow system for defining complex data processing pipelines.
|
|
9
|
+
|
|
10
|
+
== Overview
|
|
11
|
+
|
|
12
|
+
Workflow features:
|
|
13
|
+
|
|
14
|
+
* GitHub Actions-style declarative DSL
|
|
15
|
+
* Type-safe data flow between jobs
|
|
16
|
+
* Dependency management and topological sorting
|
|
17
|
+
* Multiple execution patterns (linear, fan-out/fan-in, conditional)
|
|
18
|
+
* Simplified syntax with smart defaults
|
|
19
|
+
* Structured logging and execution tracing
|
|
20
|
+
* Workflow visualization (Mermaid, DOT, ASCII)
|
|
21
|
+
|
|
22
|
+
== Workflow definition approaches
|
|
23
|
+
|
|
24
|
+
Fractor supports three complementary ways to define workflows:
|
|
25
|
+
|
|
26
|
+
=== Ruby DSL (recommended)
|
|
27
|
+
|
|
28
|
+
Define workflows directly in Ruby code:
|
|
29
|
+
|
|
30
|
+
[source,ruby]
|
|
31
|
+
----
|
|
32
|
+
class MyWorkflow < Fractor::Workflow
|
|
33
|
+
workflow "my-workflow" do
|
|
34
|
+
input_type InputData
|
|
35
|
+
output_type OutputData
|
|
36
|
+
|
|
37
|
+
job "process" do
|
|
38
|
+
runs_with ProcessWorker
|
|
39
|
+
inputs_from_workflow
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
job "finalize" do
|
|
43
|
+
needs "process"
|
|
44
|
+
runs_with FinalizeWorker
|
|
45
|
+
inputs_from_job "process"
|
|
46
|
+
outputs_to_workflow
|
|
47
|
+
terminates_workflow
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
----
|
|
52
|
+
|
|
53
|
+
=== Simplified syntax
|
|
54
|
+
|
|
55
|
+
70% code reduction using smart defaults:
|
|
56
|
+
|
|
57
|
+
[source,ruby]
|
|
58
|
+
----
|
|
59
|
+
# Using Workflow.define
|
|
60
|
+
workflow = Fractor::Workflow.define("simple") do
|
|
61
|
+
job :step1, Step1Worker
|
|
62
|
+
job :step2, Step2Worker, needs: :step1
|
|
63
|
+
job :step3, Step3Worker, needs: :step2
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Using Chain API for linear workflows
|
|
67
|
+
workflow = Fractor::Workflow.chain("linear")
|
|
68
|
+
.step(:step1, Step1Worker)
|
|
69
|
+
.step(:step2, Step2Worker)
|
|
70
|
+
.step(:step3, Step3Worker)
|
|
71
|
+
.build
|
|
72
|
+
----
|
|
73
|
+
|
|
74
|
+
=== YAML workflows
|
|
75
|
+
|
|
76
|
+
Configuration-driven workflow definitions.
|
|
77
|
+
|
|
78
|
+
See link:../examples/workflow/README/[Workflow Examples] for complete documentation.
|
|
79
|
+
|
|
80
|
+
== Workflow features
|
|
81
|
+
|
|
82
|
+
=== Error handling and resilience
|
|
83
|
+
|
|
84
|
+
Fractor workflows support production-ready error handling with automatic retry logic, error handlers, and fallback strategies.
|
|
85
|
+
|
|
86
|
+
==== Retry with backoff strategies
|
|
87
|
+
|
|
88
|
+
Jobs can automatically retry on failure with configurable backoff strategies:
|
|
89
|
+
|
|
90
|
+
[source,ruby]
|
|
91
|
+
----
|
|
92
|
+
job "fetch_api_data" do
|
|
93
|
+
runs_with ApiWorker
|
|
94
|
+
inputs_from_workflow
|
|
95
|
+
|
|
96
|
+
# Retry up to 3 times with exponential backoff
|
|
97
|
+
retry_on_error max_attempts: 3,
|
|
98
|
+
backoff: :exponential,
|
|
99
|
+
initial_delay: 1,
|
|
100
|
+
max_delay: 60
|
|
101
|
+
end
|
|
102
|
+
----
|
|
103
|
+
|
|
104
|
+
Available backoff strategies:
|
|
105
|
+
|
|
106
|
+
* *Exponential* (default): Delays increase exponentially (1s → 2s → 4s → 8s)
|
|
107
|
+
* *Linear*: Delays increase linearly (1s → 2s → 3s → 4s)
|
|
108
|
+
* *Constant*: Fixed delay between retries (2s → 2s → 2s)
|
|
109
|
+
* *None*: No retry (fail immediately)
|
|
110
|
+
|
|
111
|
+
Configuration options:
|
|
112
|
+
|
|
113
|
+
[cols="1,1,3"]
|
|
114
|
+
|===
|
|
115
|
+
|Option |Default |Description
|
|
116
|
+
|
|
117
|
+
|`max_attempts`
|
|
118
|
+
|3
|
|
119
|
+
|Maximum number of attempts (including initial attempt)
|
|
120
|
+
|
|
121
|
+
|`backoff`
|
|
122
|
+
|`:exponential`
|
|
123
|
+
|Retry strategy (`:exponential`, `:linear`, `:constant`, `:none`)
|
|
124
|
+
|
|
125
|
+
|`initial_delay`
|
|
126
|
+
|1
|
|
127
|
+
|Initial delay in seconds
|
|
128
|
+
|
|
129
|
+
|`max_delay`
|
|
130
|
+
|nil
|
|
131
|
+
|Maximum delay cap in seconds
|
|
132
|
+
|
|
133
|
+
|`increment`
|
|
134
|
+
|1
|
|
135
|
+
|(Linear only) Delay increment per attempt
|
|
136
|
+
|
|
137
|
+
|`multiplier`
|
|
138
|
+
|2
|
|
139
|
+
|(Exponential only) Delay multiplier per attempt
|
|
140
|
+
|
|
141
|
+
|`delay`
|
|
142
|
+
|1
|
|
143
|
+
|(Constant only) Fixed delay in seconds
|
|
144
|
+
|
|
145
|
+
|`retryable_errors`
|
|
146
|
+
|`[StandardError]`
|
|
147
|
+
|List of error classes that trigger retry
|
|
148
|
+
|===
|
|
149
|
+
|
|
150
|
+
==== Error handlers
|
|
151
|
+
|
|
152
|
+
Add custom error handling logic to jobs:
|
|
153
|
+
|
|
154
|
+
[source,ruby]
|
|
155
|
+
----
|
|
156
|
+
job "process_payment" do
|
|
157
|
+
runs_with PaymentWorker
|
|
158
|
+
|
|
159
|
+
on_error do |error, context|
|
|
160
|
+
# Log to monitoring service
|
|
161
|
+
ErrorTracker.notify(error, context: context.to_h)
|
|
162
|
+
|
|
163
|
+
# Send alert
|
|
164
|
+
AlertService.send_alert("Payment failed: #{error.message}")
|
|
165
|
+
|
|
166
|
+
# Update metrics
|
|
167
|
+
Metrics.increment("payment_errors")
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
----
|
|
171
|
+
|
|
172
|
+
Error handlers receive:
|
|
173
|
+
|
|
174
|
+
* `error`: The exception that occurred
|
|
175
|
+
* `context`: The workflow execution context
|
|
176
|
+
|
|
177
|
+
Multiple error handlers can be registered and will execute in order.
|
|
178
|
+
|
|
179
|
+
==== Fallback jobs
|
|
180
|
+
|
|
181
|
+
Provide alternative execution paths when retries are exhausted:
|
|
182
|
+
|
|
183
|
+
[source,ruby]
|
|
184
|
+
----
|
|
185
|
+
job "fetch_live_data" do
|
|
186
|
+
runs_with LiveDataWorker
|
|
187
|
+
retry_on_error max_attempts: 3, backoff: :exponential
|
|
188
|
+
fallback_to "fetch_cached_data"
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
job "fetch_cached_data" do
|
|
192
|
+
runs_with CachedDataWorker
|
|
193
|
+
inputs_from_workflow
|
|
194
|
+
end
|
|
195
|
+
----
|
|
196
|
+
|
|
197
|
+
If `fetch_live_data` fails after all retry attempts, the workflow automatically executes `fetch_cached_data` instead.
|
|
198
|
+
|
|
199
|
+
==== Selective error retry
|
|
200
|
+
|
|
201
|
+
Only retry specific error types:
|
|
202
|
+
|
|
203
|
+
[source,ruby]
|
|
204
|
+
----
|
|
205
|
+
job "api_call" do
|
|
206
|
+
runs_with ApiWorker
|
|
207
|
+
retry_on_error max_attempts: 5,
|
|
208
|
+
retryable_errors: [Net::HTTPRetriableError, Timeout::Error]
|
|
209
|
+
end
|
|
210
|
+
----
|
|
211
|
+
|
|
212
|
+
Errors not in the `retryable_errors` list will fail immediately without retry.
|
|
213
|
+
|
|
214
|
+
==== Complete error handling example
|
|
215
|
+
|
|
216
|
+
[source,ruby]
|
|
217
|
+
----
|
|
218
|
+
class ResilientWorkflow < Fractor::Workflow
|
|
219
|
+
workflow "resilient-api-workflow" do
|
|
220
|
+
job "fetch_data" do
|
|
221
|
+
runs_with ExternalApiWorker
|
|
222
|
+
inputs_from_workflow
|
|
223
|
+
|
|
224
|
+
# Retry configuration
|
|
225
|
+
retry_on_error max_attempts: 5,
|
|
226
|
+
backoff: :exponential,
|
|
227
|
+
initial_delay: 1,
|
|
228
|
+
max_delay: 30,
|
|
229
|
+
retryable_errors: [Net::HTTPRetriableError, Timeout::Error]
|
|
230
|
+
|
|
231
|
+
# Error handler
|
|
232
|
+
on_error do |error, context|
|
|
233
|
+
ErrorLogger.log(
|
|
234
|
+
job: "fetch_data",
|
|
235
|
+
error: error.class.name,
|
|
236
|
+
message: error.message,
|
|
237
|
+
attempt: context.metadata[:attempt]
|
|
238
|
+
)
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Fallback strategy
|
|
242
|
+
fallback_to "use_cached_data"
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
job "use_cached_data" do
|
|
246
|
+
runs_with CachedDataWorker
|
|
247
|
+
inputs_from_workflow
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
job "process" do
|
|
251
|
+
runs_with ProcessWorker
|
|
252
|
+
needs "fetch_data"
|
|
253
|
+
outputs_to_workflow
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
----
|
|
258
|
+
|
|
259
|
+
See link:../examples/workflow/retry/README/[Retry Workflow Example] for complete examples of retry patterns.
|
|
260
|
+
|
|
261
|
+
==== Circuit breaker
|
|
262
|
+
|
|
263
|
+
Protect workflows from cascading failures with circuit breaker pattern:
|
|
264
|
+
|
|
265
|
+
[source,ruby]
|
|
266
|
+
----
|
|
267
|
+
job "external_api_call" do
|
|
268
|
+
runs_with ExternalApiWorker
|
|
269
|
+
inputs_from_workflow
|
|
270
|
+
|
|
271
|
+
# Circuit breaker configuration
|
|
272
|
+
circuit_breaker threshold: 5, # Open after 5 failures
|
|
273
|
+
timeout: 60, # Stay open for 60 seconds
|
|
274
|
+
half_open_calls: 3 # Test with 3 calls before closing
|
|
275
|
+
|
|
276
|
+
# Use fallback when circuit is open
|
|
277
|
+
fallback_to "use_cached_data"
|
|
278
|
+
|
|
279
|
+
on_error do |error, context|
|
|
280
|
+
if error.is_a?(Fractor::Workflow::CircuitOpenError)
|
|
281
|
+
Logger.warn("Circuit breaker open for #{context.job_id}")
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
job "use_cached_data" do
|
|
287
|
+
runs_with CachedDataWorker
|
|
288
|
+
inputs_from_workflow
|
|
289
|
+
outputs_to_workflow
|
|
290
|
+
end
|
|
291
|
+
----
|
|
292
|
+
|
|
293
|
+
The circuit breaker has three states:
|
|
294
|
+
|
|
295
|
+
* *Closed*: Normal operation, requests pass through
|
|
296
|
+
* *Open*: Failure threshold exceeded, requests fail fast without calling the service
|
|
297
|
+
* *Half-Open*: Testing if service recovered with limited test calls
|
|
298
|
+
|
|
299
|
+
Configuration options:
|
|
300
|
+
|
|
301
|
+
[cols="1,1,3"]
|
|
302
|
+
|===
|
|
303
|
+
|Option |Default |Description
|
|
304
|
+
|
|
305
|
+
|`threshold`
|
|
306
|
+
|5
|
|
307
|
+
|Number of failures before opening circuit
|
|
308
|
+
|
|
309
|
+
|`timeout`
|
|
310
|
+
|60
|
|
311
|
+
|Seconds to wait in open state before testing recovery
|
|
312
|
+
|
|
313
|
+
|`half_open_calls`
|
|
314
|
+
|3
|
|
315
|
+
|Number of successful test calls needed to close circuit
|
|
316
|
+
|
|
317
|
+
|`shared_key`
|
|
318
|
+
|`nil`
|
|
319
|
+
|Optional key for sharing circuit breaker across jobs
|
|
320
|
+
|===
|
|
321
|
+
|
|
322
|
+
===== Shared circuit breakers
|
|
323
|
+
|
|
324
|
+
Multiple jobs can share a circuit breaker using `shared_key`:
|
|
325
|
+
|
|
326
|
+
[source,ruby]
|
|
327
|
+
----
|
|
328
|
+
job "fetch_user_data" do
|
|
329
|
+
runs_with UserApiWorker
|
|
330
|
+
circuit_breaker threshold: 5,
|
|
331
|
+
timeout: 60,
|
|
332
|
+
shared_key: "user_service" # Same key = shared breaker
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
job "fetch_profile_data" do
|
|
336
|
+
runs_with ProfileApiWorker
|
|
337
|
+
circuit_breaker threshold: 5,
|
|
338
|
+
timeout: 60,
|
|
339
|
+
shared_key: "user_service" # Same key = shared breaker
|
|
340
|
+
end
|
|
341
|
+
----
|
|
342
|
+
|
|
343
|
+
When jobs share a circuit breaker:
|
|
344
|
+
|
|
345
|
+
* Failures from any job contribute to the shared threshold
|
|
346
|
+
* When one job triggers the circuit, all jobs using the same key are protected
|
|
347
|
+
* Prevents multiple jobs from hammering a failing service
|
|
348
|
+
|
|
349
|
+
===== Circuit breaker with retry
|
|
350
|
+
|
|
351
|
+
Combine circuit breaker with retry for comprehensive protection:
|
|
352
|
+
|
|
353
|
+
[source,ruby]
|
|
354
|
+
----
|
|
355
|
+
job "resilient_api_call" do
|
|
356
|
+
runs_with ApiWorker
|
|
357
|
+
|
|
358
|
+
# First: Retry transient failures
|
|
359
|
+
retry_on_error max_attempts: 3,
|
|
360
|
+
backoff: :exponential,
|
|
361
|
+
initial_delay: 1
|
|
362
|
+
|
|
363
|
+
# Second: Circuit breaker for sustained failures
|
|
364
|
+
circuit_breaker threshold: 10,
|
|
365
|
+
timeout: 60
|
|
366
|
+
|
|
367
|
+
# Final: Fallback when circuit opens
|
|
368
|
+
fallback_to "use_cache"
|
|
369
|
+
end
|
|
370
|
+
----
|
|
371
|
+
|
|
372
|
+
This layered approach:
|
|
373
|
+
|
|
374
|
+
1. *Retry* handles transient failures (network blips, temporary unavailability)
|
|
375
|
+
2. *Circuit breaker* protects against sustained failures (service down, timeout)
|
|
376
|
+
3. *Fallback* provides degraded service when all else fails
|
|
377
|
+
|
|
378
|
+
See link:../examples/workflow/circuit_breaker/README/[Circuit Breaker Example] for complete examples and best practices.
|
|
379
|
+
|
|
380
|
+
==== Dead Letter Queue
|
|
381
|
+
|
|
382
|
+
Capture permanently failed work for manual inspection and retry:
|
|
383
|
+
|
|
384
|
+
[source,ruby]
|
|
385
|
+
----
|
|
386
|
+
class WorkflowWithDLQ < Fractor::Workflow
|
|
387
|
+
workflow "dlq-workflow" do
|
|
388
|
+
# Configure Dead Letter Queue
|
|
389
|
+
configure_dead_letter_queue max_size: 1000
|
|
390
|
+
|
|
391
|
+
job "process_data" do
|
|
392
|
+
runs_with DataProcessor
|
|
393
|
+
inputs_from_workflow
|
|
394
|
+
|
|
395
|
+
# Retry before adding to DLQ
|
|
396
|
+
retry_on_error max_attempts: 3,
|
|
397
|
+
backoff: :exponential,
|
|
398
|
+
initial_delay: 1
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
job "finalize" do
|
|
402
|
+
needs "process_data"
|
|
403
|
+
runs_with FinalizeWorker
|
|
404
|
+
outputs_to_workflow
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
# Execute workflow
|
|
410
|
+
workflow = WorkflowWithDLQ.new(input_data)
|
|
411
|
+
begin
|
|
412
|
+
result = workflow.execute
|
|
413
|
+
rescue Fractor::Workflow::WorkflowExecutionError => e
|
|
414
|
+
# Check Dead Letter Queue
|
|
415
|
+
dlq = workflow.dead_letter_queue
|
|
416
|
+
puts "DLQ has #{dlq.size} failed items"
|
|
417
|
+
|
|
418
|
+
# Query failed entries
|
|
419
|
+
dlq.all.each do |entry|
|
|
420
|
+
puts "Failed: #{entry.error.message}"
|
|
421
|
+
puts "Context: #{entry.context.inspect}"
|
|
422
|
+
puts "Metadata: #{entry.metadata.inspect}"
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
----
|
|
426
|
+
|
|
427
|
+
The Dead Letter Queue (DLQ) automatically captures work that:
|
|
428
|
+
|
|
429
|
+
* Fails after retry attempts are exhausted
|
|
430
|
+
* Has non-retryable errors (when using `retryable_errors`)
|
|
431
|
+
* Cannot be processed due to persistent failures
|
|
432
|
+
|
|
433
|
+
Configuration options:
|
|
434
|
+
|
|
435
|
+
[cols="1,1,3"]
|
|
436
|
+
|===
|
|
437
|
+
|Option |Default |Description
|
|
438
|
+
|
|
439
|
+
|`max_size`
|
|
440
|
+
|1000
|
|
441
|
+
|Maximum number of DLQ entries to retain
|
|
442
|
+
|
|
443
|
+
|`persister`
|
|
444
|
+
|`nil`
|
|
445
|
+
|Optional persistence strategy (file, Redis, database)
|
|
446
|
+
|
|
447
|
+
|`on_add`
|
|
448
|
+
|`nil`
|
|
449
|
+
|Callback when entry is added to DLQ
|
|
450
|
+
|===
|
|
451
|
+
|
|
452
|
+
===== DLQ with custom notification
|
|
453
|
+
|
|
454
|
+
Get notified when work is added to the DLQ:
|
|
455
|
+
|
|
456
|
+
[source,ruby]
|
|
457
|
+
----
|
|
458
|
+
configure_dead_letter_queue(
|
|
459
|
+
max_size: 500,
|
|
460
|
+
on_add: lambda { |entry|
|
|
461
|
+
# Send alert
|
|
462
|
+
AlertService.send(
|
|
463
|
+
"Work failed permanently",
|
|
464
|
+
error: entry.error.message,
|
|
465
|
+
job: entry.metadata[:job_name],
|
|
466
|
+
workflow: entry.metadata[:workflow_name]
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
# Log to monitoring
|
|
470
|
+
Logger.error(
|
|
471
|
+
"DLQ entry added",
|
|
472
|
+
error_class: entry.error.class.name,
|
|
473
|
+
retry_attempts: entry.metadata[:retry_attempts],
|
|
474
|
+
timestamp: entry.timestamp
|
|
475
|
+
)
|
|
476
|
+
}
|
|
477
|
+
)
|
|
478
|
+
----
|
|
479
|
+
|
|
480
|
+
===== DLQ with file persistence
|
|
481
|
+
|
|
482
|
+
Persist failed work to disk for durability:
|
|
483
|
+
|
|
484
|
+
[source,ruby]
|
|
485
|
+
----
|
|
486
|
+
require 'fractor/workflow/dead_letter_queue'
|
|
487
|
+
|
|
488
|
+
configure_dead_letter_queue(
|
|
489
|
+
max_size: 10000,
|
|
490
|
+
persister: Fractor::Workflow::DeadLetterQueue::FilePersister.new(
|
|
491
|
+
directory: "tmp/dlq"
|
|
492
|
+
)
|
|
493
|
+
)
|
|
494
|
+
----
|
|
495
|
+
|
|
496
|
+
Each failed work item is saved as a JSON file containing:
|
|
497
|
+
|
|
498
|
+
* Work payload and context
|
|
499
|
+
* Error details and stack trace
|
|
500
|
+
* Retry history and metadata
|
|
501
|
+
* Workflow state at time of failure
|
|
502
|
+
|
|
503
|
+
===== Querying the DLQ
|
|
504
|
+
|
|
505
|
+
Filter and inspect failed work:
|
|
506
|
+
|
|
507
|
+
[source,ruby]
|
|
508
|
+
----
|
|
509
|
+
dlq = workflow.dead_letter_queue
|
|
510
|
+
|
|
511
|
+
# Get all entries
|
|
512
|
+
all_failures = dlq.all
|
|
513
|
+
|
|
514
|
+
# Filter by error class
|
|
515
|
+
network_errors = dlq.by_error_class(Net::HTTPError)
|
|
516
|
+
timeout_errors = dlq.by_error_class(Timeout::Error)
|
|
517
|
+
|
|
518
|
+
# Filter by time range
|
|
519
|
+
recent = dlq.by_time_range(Time.now - 3600, Time.now)
|
|
520
|
+
yesterday = dlq.by_time_range(Time.now - 86400, Time.now - 86400 + 86400)
|
|
521
|
+
|
|
522
|
+
# Custom filtering
|
|
523
|
+
job_failures = dlq.filter do |entry|
|
|
524
|
+
entry.metadata[:job_name] == "process_data"
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
# Get statistics
|
|
528
|
+
stats = dlq.stats
|
|
529
|
+
puts "Total: #{stats[:total]}"
|
|
530
|
+
puts "Error types: #{stats[:error_types]}"
|
|
531
|
+
puts "Jobs: #{stats[:jobs]}"
|
|
532
|
+
----
|
|
533
|
+
|
|
534
|
+
===== Retrying from the DLQ
|
|
535
|
+
|
|
536
|
+
Manually retry failed work after fixing issues:
|
|
537
|
+
|
|
538
|
+
[source,ruby]
|
|
539
|
+
----
|
|
540
|
+
dlq = workflow.dead_letter_queue
|
|
541
|
+
|
|
542
|
+
# Retry single entry
|
|
543
|
+
entry = dlq.all.first
|
|
544
|
+
dlq.retry_entry(entry) do |work, error, context|
|
|
545
|
+
# Custom retry logic
|
|
546
|
+
MyWorker.perform(work)
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
# Retry all entries
|
|
550
|
+
success_count = 0
|
|
551
|
+
dlq.retry_all do |work, error, context|
|
|
552
|
+
begin
|
|
553
|
+
result = MyWorker.perform(work)
|
|
554
|
+
success_count += 1
|
|
555
|
+
result
|
|
556
|
+
rescue StandardError => e
|
|
557
|
+
Logger.warn("Retry failed: #{e.message}")
|
|
558
|
+
nil # Don't fail the batch
|
|
559
|
+
end
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
puts "Successfully retried #{success_count} items"
|
|
563
|
+
----
|
|
564
|
+
|
|
565
|
+
===== DLQ entry structure
|
|
566
|
+
|
|
567
|
+
Each DLQ entry contains:
|
|
568
|
+
|
|
569
|
+
[source,ruby]
|
|
570
|
+
----
|
|
571
|
+
entry.work # Original Work object with payload
|
|
572
|
+
entry.error # Exception that caused failure
|
|
573
|
+
entry.timestamp # When added to DLQ
|
|
574
|
+
entry.context # Workflow context at time of failure
|
|
575
|
+
entry.metadata # Additional information:
|
|
576
|
+
# - job_name: Name of failed job
|
|
577
|
+
# - worker_class: Worker class name
|
|
578
|
+
# - correlation_id: Workflow correlation ID
|
|
579
|
+
# - workflow_name: Workflow name
|
|
580
|
+
# - retry_attempts: Number of retry attempts made
|
|
581
|
+
# - total_retry_time: Total time spent retrying
|
|
582
|
+
# - all_errors: All errors encountered during retries
|
|
583
|
+
----
|
|
584
|
+
|
|
585
|
+
===== DLQ best practices
|
|
586
|
+
|
|
587
|
+
* *Set appropriate max_size*: Based on your error rate and retention needs
|
|
588
|
+
* *Monitor DLQ growth*: Alert when size exceeds thresholds
|
|
589
|
+
* *Regular cleanup*: Review and remove old/resolved entries
|
|
590
|
+
* *Use persistence*: For production systems requiring durability
|
|
591
|
+
* *Implement retry logic*: Have a strategy for re-processing failed work
|
|
592
|
+
* *Integrate with monitoring*: Track DLQ metrics and error patterns
|
|
593
|
+
|
|
594
|
+
See link:../examples/workflow/dead_letter_queue/README/[Dead Letter Queue Example] for complete examples and best practices.
|
|
595
|
+
|
|
596
|
+
==== Performance monitoring
|
|
597
|
+
|
|
598
|
+
Monitor workflow execution metrics in real-time with comprehensive performance tracking:
|
|
599
|
+
|
|
600
|
+
[source,ruby]
|
|
601
|
+
----
|
|
602
|
+
require 'fractor/performance_monitor'
|
|
603
|
+
|
|
604
|
+
# Create a supervisor
|
|
605
|
+
supervisor = Fractor::Supervisor.new(
|
|
606
|
+
worker_class: DataProcessor,
|
|
607
|
+
num_workers: 4,
|
|
608
|
+
max_queue_size: 100
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
# Attach performance monitor
|
|
612
|
+
monitor = Fractor::PerformanceMonitor.new(
|
|
613
|
+
supervisor,
|
|
614
|
+
sample_interval: 1.0 # Sample metrics every second
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
# Start monitoring
|
|
618
|
+
monitor.start
|
|
619
|
+
|
|
620
|
+
# Add work to supervisor
|
|
621
|
+
100.times do |i|
|
|
622
|
+
supervisor.add_work(Fractor::Work.new(payload: { id: i }))
|
|
623
|
+
end
|
|
624
|
+
|
|
625
|
+
# Wait for completion
|
|
626
|
+
sleep 5
|
|
627
|
+
|
|
628
|
+
# Get current snapshot
|
|
629
|
+
snapshot = monitor.snapshot
|
|
630
|
+
puts "Jobs processed: #{snapshot[:jobs_processed]}"
|
|
631
|
+
puts "Average latency: #{snapshot[:average_latency]}ms"
|
|
632
|
+
puts "Worker utilization: #{snapshot[:worker_utilization]}%"
|
|
633
|
+
|
|
634
|
+
# Generate human-readable report
|
|
635
|
+
puts monitor.report
|
|
636
|
+
|
|
637
|
+
# Stop monitoring
|
|
638
|
+
monitor.stop
|
|
639
|
+
----
|
|
640
|
+
|
|
641
|
+
The Performance Monitor provides comprehensive metrics collection and analysis for Fractor supervisors and workflows.
|
|
642
|
+
|
|
643
|
+
===== Available metrics
|
|
644
|
+
|
|
645
|
+
The monitor tracks the following metrics:
|
|
646
|
+
|
|
647
|
+
[cols="1,3"]
|
|
648
|
+
|===
|
|
649
|
+
|Metric |Description
|
|
650
|
+
|
|
651
|
+
|`jobs_processed`
|
|
652
|
+
|Total number of jobs completed
|
|
653
|
+
|
|
654
|
+
|`jobs_succeeded`
|
|
655
|
+
|Number of jobs that completed successfully
|
|
656
|
+
|
|
657
|
+
|`jobs_failed`
|
|
658
|
+
|Number of jobs that failed
|
|
659
|
+
|
|
660
|
+
|`average_latency`
|
|
661
|
+
|Mean job execution time in milliseconds
|
|
662
|
+
|
|
663
|
+
|`p50_latency`
|
|
664
|
+
|50th percentile latency (median) in milliseconds
|
|
665
|
+
|
|
666
|
+
|`p95_latency`
|
|
667
|
+
|95th percentile latency in milliseconds
|
|
668
|
+
|
|
669
|
+
|`p99_latency`
|
|
670
|
+
|99th percentile latency in milliseconds
|
|
671
|
+
|
|
672
|
+
|`throughput`
|
|
673
|
+
|Jobs processed per second
|
|
674
|
+
|
|
675
|
+
|`queue_depth`
|
|
676
|
+
|Current number of pending jobs in queue
|
|
677
|
+
|
|
678
|
+
|`worker_count`
|
|
679
|
+
|Total number of workers
|
|
680
|
+
|
|
681
|
+
|`active_workers`
|
|
682
|
+
|Number of workers currently processing jobs
|
|
683
|
+
|
|
684
|
+
|`worker_utilization`
|
|
685
|
+
|Percentage of workers actively processing (0-100)
|
|
686
|
+
|
|
687
|
+
|`memory_mb`
|
|
688
|
+
|Current process memory usage in megabytes
|
|
689
|
+
|
|
690
|
+
|`uptime`
|
|
691
|
+
|Monitor uptime in seconds
|
|
692
|
+
|===
|
|
693
|
+
|
|
694
|
+
===== Configuration options
|
|
695
|
+
|
|
696
|
+
[cols="1,1,3"]
|
|
697
|
+
|===
|
|
698
|
+
|Option |Default |Description
|
|
699
|
+
|
|
700
|
+
|`sample_interval`
|
|
701
|
+
|1.0
|
|
702
|
+
|How often to sample metrics (in seconds)
|
|
703
|
+
|===
|
|
704
|
+
|
|
705
|
+
===== Export formats
|
|
706
|
+
|
|
707
|
+
The Performance Monitor supports multiple export formats for integration with monitoring systems:
|
|
708
|
+
|
|
709
|
+
====== Human-readable report
|
|
710
|
+
|
|
711
|
+
[source,ruby]
|
|
712
|
+
----
|
|
713
|
+
puts monitor.report
|
|
714
|
+
|
|
715
|
+
# Output:
|
|
716
|
+
# === Performance Report ===
|
|
717
|
+
# Uptime: 10.5s
|
|
718
|
+
#
|
|
719
|
+
# Jobs:
|
|
720
|
+
# Total: 150
|
|
721
|
+
# Succeeded: 145
|
|
722
|
+
# Failed: 5
|
|
723
|
+
# Success Rate: 96.67%
|
|
724
|
+
#
|
|
725
|
+
# Latency (ms):
|
|
726
|
+
# Average: 23.5
|
|
727
|
+
# p50: 20.0
|
|
728
|
+
# p95: 45.0
|
|
729
|
+
# p99: 67.0
|
|
730
|
+
#
|
|
731
|
+
# Throughput:
|
|
732
|
+
# Current: 14.3 jobs/sec
|
|
733
|
+
#
|
|
734
|
+
# Queue:
|
|
735
|
+
# Depth: 25 jobs
|
|
736
|
+
#
|
|
737
|
+
# Workers:
|
|
738
|
+
# Total: 4
|
|
739
|
+
# Active: 3
|
|
740
|
+
# Utilization: 75.00%
|
|
741
|
+
#
|
|
742
|
+
# Memory:
|
|
743
|
+
# Current: 127.5 MB
|
|
744
|
+
----
|
|
745
|
+
|
|
746
|
+
====== JSON export
|
|
747
|
+
|
|
748
|
+
Export metrics as structured JSON for programmatic consumption:
|
|
749
|
+
|
|
750
|
+
[source,ruby]
|
|
751
|
+
----
|
|
752
|
+
json_data = monitor.to_json
|
|
753
|
+
puts json_data
|
|
754
|
+
|
|
755
|
+
# Output:
|
|
756
|
+
# {
|
|
757
|
+
# "jobs_processed": 150,
|
|
758
|
+
# "jobs_succeeded": 145,
|
|
759
|
+
# "jobs_failed": 5,
|
|
760
|
+
# "average_latency": 23.5,
|
|
761
|
+
# "p50_latency": 20.0,
|
|
762
|
+
# "p95_latency": 45.0,
|
|
763
|
+
# "p99_latency": 67.0,
|
|
764
|
+
# "throughput": 14.3,
|
|
765
|
+
# "queue_depth": 25,
|
|
766
|
+
# "worker_count": 4,
|
|
767
|
+
# "active_workers": 3,
|
|
768
|
+
# "worker_utilization": 75.0,
|
|
769
|
+
# "memory_mb": 127.5,
|
|
770
|
+
# "uptime": 10.5
|
|
771
|
+
# }
|
|
772
|
+
----
|
|
773
|
+
|
|
774
|
+
====== Prometheus format
|
|
775
|
+
|
|
776
|
+
Export metrics in Prometheus text format for scraping:
|
|
777
|
+
|
|
778
|
+
[source,ruby]
|
|
779
|
+
----
|
|
780
|
+
puts monitor.to_prometheus
|
|
781
|
+
|
|
782
|
+
# Output:
|
|
783
|
+
# # HELP fractor_jobs_processed Total number of jobs processed
|
|
784
|
+
# # TYPE fractor_jobs_processed counter
|
|
785
|
+
# fractor_jobs_processed 150
|
|
786
|
+
#
|
|
787
|
+
# # HELP fractor_jobs_succeeded Number of jobs that succeeded
|
|
788
|
+
# # TYPE fractor_jobs_succeeded counter
|
|
789
|
+
# fractor_jobs_succeeded 145
|
|
790
|
+
#
|
|
791
|
+
# # HELP fractor_jobs_failed Number of jobs that failed
|
|
792
|
+
# # TYPE fractor_jobs_failed counter
|
|
793
|
+
# fractor_jobs_failed 5
|
|
794
|
+
#
|
|
795
|
+
# # HELP fractor_latency_average Average job latency in milliseconds
|
|
796
|
+
# # TYPE fractor_latency_average gauge
|
|
797
|
+
# fractor_latency_average 23.5
|
|
798
|
+
#
|
|
799
|
+
# # HELP fractor_latency_p50 50th percentile latency in milliseconds
|
|
800
|
+
# # TYPE fractor_latency_p50 gauge
|
|
801
|
+
# fractor_latency_p50 20.0
|
|
802
|
+
# ...
|
|
803
|
+
----
|
|
804
|
+
|
|
805
|
+
===== Integration with Prometheus
|
|
806
|
+
|
|
807
|
+
Set up an HTTP endpoint for Prometheus scraping:
|
|
808
|
+
|
|
809
|
+
[source,ruby]
|
|
810
|
+
----
|
|
811
|
+
require 'webrick'
|
|
812
|
+
|
|
813
|
+
# Create monitor
|
|
814
|
+
monitor = Fractor::PerformanceMonitor.new(supervisor)
|
|
815
|
+
monitor.start
|
|
816
|
+
|
|
817
|
+
# Create metrics endpoint
|
|
818
|
+
server = WEBrick::HTTPServer.new(Port: 9090)
|
|
819
|
+
server.mount_proc '/metrics' do |req, res|
|
|
820
|
+
res['Content-Type'] = 'text/plain; version=0.0.4'
|
|
821
|
+
res.body = monitor.to_prometheus
|
|
822
|
+
end
|
|
823
|
+
|
|
824
|
+
# Start server
|
|
825
|
+
trap('INT') { server.shutdown }
|
|
826
|
+
server.start
|
|
827
|
+
----
|
|
828
|
+
|
|
829
|
+
Configure Prometheus to scrape the endpoint:
|
|
830
|
+
|
|
831
|
+
[source,yaml]
|
|
832
|
+
----
|
|
833
|
+
scrape_configs:
|
|
834
|
+
- job_name: 'fractor'
|
|
835
|
+
static_configs:
|
|
836
|
+
- targets: ['localhost:9090']
|
|
837
|
+
scrape_interval: 15s
|
|
838
|
+
----
|
|
839
|
+
|
|
840
|
+
===== Workflow integration
|
|
841
|
+
|
|
842
|
+
Monitor workflow execution performance:
|
|
843
|
+
|
|
844
|
+
[source,ruby]
|
|
845
|
+
----
|
|
846
|
+
class MonitoredWorkflow < Fractor::Workflow
|
|
847
|
+
workflow "monitored-workflow" do
|
|
848
|
+
input_type InputData
|
|
849
|
+
output_type OutputData
|
|
850
|
+
|
|
851
|
+
job "process" do
|
|
852
|
+
runs_with ProcessWorker
|
|
853
|
+
inputs_from_workflow
|
|
854
|
+
end
|
|
855
|
+
|
|
856
|
+
job "finalize" do
|
|
857
|
+
needs "process"
|
|
858
|
+
runs_with FinalizeWorker
|
|
859
|
+
inputs_from_job "process"
|
|
860
|
+
outputs_to_workflow
|
|
861
|
+
terminates_workflow
|
|
862
|
+
end
|
|
863
|
+
end
|
|
864
|
+
end
|
|
865
|
+
|
|
866
|
+
# Create workflow with monitoring
|
|
867
|
+
workflow = MonitoredWorkflow.new(input_data)
|
|
868
|
+
supervisor = workflow.supervisor # Get internal supervisor
|
|
869
|
+
|
|
870
|
+
# Attach monitor
|
|
871
|
+
monitor = Fractor::PerformanceMonitor.new(supervisor)
|
|
872
|
+
monitor.start
|
|
873
|
+
|
|
874
|
+
# Execute workflow
|
|
875
|
+
result = workflow.execute
|
|
876
|
+
|
|
877
|
+
# Review metrics
|
|
878
|
+
puts monitor.report
|
|
879
|
+
monitor.stop
|
|
880
|
+
----
|
|
881
|
+
|
|
882
|
+
===== Custom metrics collection
|
|
883
|
+
|
|
884
|
+
Record job latency manually for fine-grained tracking:
|
|
885
|
+
|
|
886
|
+
[source,ruby]
|
|
887
|
+
----
|
|
888
|
+
monitor = Fractor::PerformanceMonitor.new(supervisor)
|
|
889
|
+
monitor.start
|
|
890
|
+
|
|
891
|
+
# Record job execution
|
|
892
|
+
start_time = Time.now
|
|
893
|
+
begin
|
|
894
|
+
result = perform_job(work)
|
|
895
|
+
latency = ((Time.now - start_time) * 1000).round(2)
|
|
896
|
+
monitor.record_job(latency, success: true)
|
|
897
|
+
rescue StandardError => e
|
|
898
|
+
latency = ((Time.now - start_time) * 1000).round(2)
|
|
899
|
+
monitor.record_job(latency, success: false)
|
|
900
|
+
raise
|
|
901
|
+
end
|
|
902
|
+
----
|
|
903
|
+
|
|
904
|
+
===== Real-time monitoring
|
|
905
|
+
|
|
906
|
+
Display live metrics during execution:
|
|
907
|
+
|
|
908
|
+
[source,ruby]
|
|
909
|
+
----
|
|
910
|
+
monitor = Fractor::PerformanceMonitor.new(supervisor, sample_interval: 1.0)
|
|
911
|
+
monitor.start
|
|
912
|
+
|
|
913
|
+
# Background thread for live updates
|
|
914
|
+
Thread.new do
|
|
915
|
+
loop do
|
|
916
|
+
sleep 5
|
|
917
|
+
snapshot = monitor.snapshot
|
|
918
|
+
puts "\nLive Metrics:"
|
|
919
|
+
puts " Processed: #{snapshot[:jobs_processed]}"
|
|
920
|
+
puts " Throughput: #{snapshot[:throughput]} jobs/sec"
|
|
921
|
+
puts " Queue: #{snapshot[:queue_depth]} pending"
|
|
922
|
+
puts " Workers: #{snapshot[:active_workers]}/#{snapshot[:worker_count]} active"
|
|
923
|
+
puts " Latency (p95): #{snapshot[:p95_latency]}ms"
|
|
924
|
+
end
|
|
925
|
+
end
|
|
926
|
+
|
|
927
|
+
# Execute work
|
|
928
|
+
supervisor.add_work(work_items)
|
|
929
|
+
supervisor.wait_for_completion
|
|
930
|
+
monitor.stop
|
|
931
|
+
----
|
|
932
|
+
|
|
933
|
+
===== Performance monitoring best practices
|
|
934
|
+
|
|
935
|
+
* *Choose appropriate sample interval*: Balance accuracy with overhead (1-5 seconds recommended)
|
|
936
|
+
* *Monitor in production*: Track real workload performance to identify issues
|
|
937
|
+
* *Set up alerts*: Configure monitoring system alerts for abnormal metrics
|
|
938
|
+
* *Track percentiles*: Use p95/p99 latency to identify outliers and tail latencies
|
|
939
|
+
* *Monitor worker utilization*: Low utilization may indicate queue starvation, high utilization may indicate overload
|
|
940
|
+
* *Export to time-series DB*: Store historical metrics for trend analysis
|
|
941
|
+
* *Correlate with business metrics*: Link performance metrics to business outcomes
|
|
942
|
+
* *Monitor memory usage*: Detect memory leaks and resource exhaustion
|
|
943
|
+
* *Use for capacity planning*: Analyze metrics to determine optimal worker counts and queue sizes
|
|
944
|
+
|
|
945
|
+
See link:../examples/performance_monitoring.rb[Performance Monitoring Example] for complete examples and integration patterns.
|
|
946
|
+
|
|
947
|
+
=== Structured logging
|
|
948
|
+
|
|
949
|
+
Correlation IDs for distributed tracing:
|
|
950
|
+
|
|
951
|
+
[source,ruby]
|
|
952
|
+
----
|
|
953
|
+
workflow.logger = Fractor::Workflow::WorkflowLogger.new
|
|
954
|
+
workflow.logger = Fractor::Workflow::StructuredLogger.new # JSON output
|
|
955
|
+
----
|
|
956
|
+
|
|
957
|
+
=== Execution tracing
|
|
958
|
+
|
|
959
|
+
Detailed timing and status tracking:
|
|
960
|
+
|
|
961
|
+
[source,ruby]
|
|
962
|
+
----
|
|
963
|
+
trace = workflow.execution_trace
|
|
964
|
+
trace.jobs.each do |job_id, job_trace|
|
|
965
|
+
puts "#{job_id}: #{job_trace.duration}s (#{job_trace.status})"
|
|
966
|
+
end
|
|
967
|
+
----
|
|
968
|
+
|
|
969
|
+
=== Workflow visualization
|
|
970
|
+
|
|
971
|
+
Generate visual representations:
|
|
972
|
+
|
|
973
|
+
[source,ruby]
|
|
974
|
+
----
|
|
975
|
+
visualizer = Fractor::Workflow::Visualizer.new(workflow)
|
|
976
|
+
puts visualizer.to_mermaid # Mermaid flowchart
|
|
977
|
+
puts visualizer.to_dot # Graphviz DOT
|
|
978
|
+
puts visualizer.to_ascii # ASCII art
|
|
979
|
+
----
|
|
980
|
+
|
|
981
|
+
== Workflow examples
|
|
982
|
+
|
|
983
|
+
The Workflow examples (link:../examples/workflow/[examples/workflow/]) demonstrate how to define and execute complex data processing workflows using a declarative GitHub Actions-style DSL.
|
|
984
|
+
|
|
985
|
+
Key features:
|
|
986
|
+
|
|
987
|
+
* *Declarative workflow DSL*: Define workflows similar to GitHub Actions
|
|
988
|
+
* *Type-safe data flow*: Input/output types declared for each job
|
|
989
|
+
* *Dependency management*: Automatic topological sorting and execution ordering
|
|
990
|
+
* *Multiple execution patterns*: Linear pipelines, fan-out/fan-in, conditional execution
|
|
991
|
+
* *Workflow validation*: Cycle detection, reachability checks, type validation
|
|
992
|
+
* *Composable jobs*: Reusable worker definitions with clear interfaces
|
|
993
|
+
|
|
994
|
+
=== Available examples
|
|
995
|
+
|
|
996
|
+
==== Simple Linear Workflow
|
|
997
|
+
|
|
998
|
+
link:../examples/workflow/simple_linear/simple_linear_workflow.rb[Simple Linear Workflow]: Three-job sequential pipeline demonstrating basic workflow concepts.
|
|
999
|
+
|
|
1000
|
+
==== Fan-Out Workflow
|
|
1001
|
+
|
|
1002
|
+
link:../examples/workflow/fan_out/fan_out_workflow.rb[Fan-Out Workflow]: One job feeding multiple parallel jobs, then aggregating results.
|
|
1003
|
+
|
|
1004
|
+
==== Conditional Workflow
|
|
1005
|
+
|
|
1006
|
+
link:../examples/workflow/conditional/conditional_workflow.rb[Conditional Workflow]: Jobs that execute based on runtime conditions.
|
|
1007
|
+
|
|
1008
|
+
==== Simplified Workflow
|
|
1009
|
+
|
|
1010
|
+
link:../examples/workflow/simplified/simplified_workflow.rb[Simplified Workflow]: Demonstrates the simplified syntax with 70% code reduction.
|
|
1011
|
+
|
|
1012
|
+
==== Retry Workflow
|
|
1013
|
+
|
|
1014
|
+
link:../examples/workflow/retry/retry_workflow.rb[Retry Workflow]: Demonstrates automatic retry with exponential, linear, and constant backoff strategies, error handlers, and fallback jobs.
|
|
1015
|
+
|
|
1016
|
+
==== Circuit Breaker Workflow
|
|
1017
|
+
|
|
1018
|
+
link:../examples/workflow/circuit_breaker/circuit_breaker_workflow.rb[Circuit Breaker Workflow]: Demonstrates circuit breaker pattern for protecting against cascading failures, with shared circuit breakers and integration with retry logic.
|
|
1019
|
+
|
|
1020
|
+
==== Dead Letter Queue Workflow
|
|
1021
|
+
|
|
1022
|
+
link:../examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb[Dead Letter Queue Workflow]: Demonstrates capturing permanently failed work, custom notification handlers, file persistence, querying and filtering, and manual retry strategies.
|
|
1023
|
+
|
|
1024
|
+
=== Alternative workflow definition methods
|
|
1025
|
+
|
|
1026
|
+
==== YAML workflows (declarative configuration)
|
|
1027
|
+
|
|
1028
|
+
Define workflows in YAML files similar to GitHub Actions syntax. Ideal for:
|
|
1029
|
+
|
|
1030
|
+
* Configuration-driven workflows
|
|
1031
|
+
* Non-programmer workflow definition
|
|
1032
|
+
* CI/CD integration
|
|
1033
|
+
* Version-controlled workflow definitions
|
|
1034
|
+
|
|
1035
|
+
[source,yaml]
|
|
1036
|
+
----
|
|
1037
|
+
name: my-workflow
|
|
1038
|
+
input_type: SimpleLinearExample::TextData
|
|
1039
|
+
output_type: SimpleLinearExample::FinalOutput
|
|
1040
|
+
|
|
1041
|
+
jobs:
|
|
1042
|
+
- id: uppercase
|
|
1043
|
+
worker: SimpleLinearExample::UppercaseWorker
|
|
1044
|
+
inputs: workflow
|
|
1045
|
+
outputs_to_workflow: false
|
|
1046
|
+
|
|
1047
|
+
- id: reverse
|
|
1048
|
+
worker: SimpleLinearExample::ReverseWorker
|
|
1049
|
+
needs: uppercase
|
|
1050
|
+
inputs: uppercase
|
|
1051
|
+
outputs_to_workflow: false
|
|
1052
|
+
|
|
1053
|
+
- id: finalize
|
|
1054
|
+
worker: SimpleLinearExample::FinalizeWorker
|
|
1055
|
+
needs: reverse
|
|
1056
|
+
inputs: reverse
|
|
1057
|
+
outputs_to_workflow: true
|
|
1058
|
+
terminates: true
|
|
1059
|
+
----
|
|
1060
|
+
|
|
1061
|
+
Load and execute YAML workflows:
|
|
1062
|
+
|
|
1063
|
+
[source,ruby]
|
|
1064
|
+
----
|
|
1065
|
+
require 'fractor/workflow/yaml_loader'
|
|
1066
|
+
|
|
1067
|
+
# Define worker registry for class name mapping
|
|
1068
|
+
worker_registry = {
|
|
1069
|
+
'SimpleLinearExample::UppercaseWorker' => SimpleLinearExample::UppercaseWorker,
|
|
1070
|
+
'SimpleLinearExample::ReverseWorker' => SimpleLinearExample::ReverseWorker,
|
|
1071
|
+
'SimpleLinearExample::FinalizeWorker' => SimpleLinearExample::FinalizeWorker,
|
|
1072
|
+
'SimpleLinearExample::TextData' => SimpleLinearExample::TextData,
|
|
1073
|
+
'SimpleLinearExample::FinalOutput' => SimpleLinearExample::FinalOutput
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
# Load workflow from YAML file
|
|
1077
|
+
workflow_class = Fractor::Workflow::YamlLoader.load_file(
|
|
1078
|
+
'path/to/workflow.yml',
|
|
1079
|
+
worker_registry: worker_registry
|
|
1080
|
+
)
|
|
1081
|
+
|
|
1082
|
+
# Execute the workflow
|
|
1083
|
+
result = workflow_class.new.execute(input_data)
|
|
1084
|
+
----
|
|
1085
|
+
|
|
1086
|
+
==== Programmatic Builder API (dynamic construction)
|
|
1087
|
+
|
|
1088
|
+
Build workflows programmatically using a fluent API. Ideal for:
|
|
1089
|
+
|
|
1090
|
+
* Dynamic workflow generation
|
|
1091
|
+
* Conditional workflow structures
|
|
1092
|
+
* Programmatic workflow templates
|
|
1093
|
+
* Runtime workflow modifications
|
|
1094
|
+
|
|
1095
|
+
[source,ruby]
|
|
1096
|
+
----
|
|
1097
|
+
require 'fractor/workflow/builder'
|
|
1098
|
+
|
|
1099
|
+
builder = Fractor::Workflow::Builder.new("dynamic-workflow")
|
|
1100
|
+
.input_type(InputData)
|
|
1101
|
+
.output_type(OutputData)
|
|
1102
|
+
.add_job("process", ProcessWorker, inputs: :workflow)
|
|
1103
|
+
.add_job("validate", ValidateWorker,
|
|
1104
|
+
needs: "process",
|
|
1105
|
+
inputs: "process")
|
|
1106
|
+
.add_job("finalize", FinalizeWorker,
|
|
1107
|
+
needs: "validate",
|
|
1108
|
+
inputs: "validate",
|
|
1109
|
+
outputs_to_workflow: true,
|
|
1110
|
+
terminates: true)
|
|
1111
|
+
|
|
1112
|
+
# Build and execute
|
|
1113
|
+
workflow_class = builder.build!
|
|
1114
|
+
result = workflow_class.new.execute(input_data)
|
|
1115
|
+
|
|
1116
|
+
# Or clone and modify for variants
|
|
1117
|
+
dev_builder = builder.clone
|
|
1118
|
+
dev_builder.add_job("debug", DebugWorker, needs: "validate")
|
|
1119
|
+
dev_workflow = dev_builder.build!
|
|
1120
|
+
----
|
|
1121
|
+
|
|
1122
|
+
==== Ruby DSL (embedded workflows)
|
|
1123
|
+
|
|
1124
|
+
Define workflows directly in Ruby code using the declarative DSL. Ideal for:
|
|
1125
|
+
|
|
1126
|
+
* In-code workflow definitions
|
|
1127
|
+
* Strong typing and IDE support
|
|
1128
|
+
* Complex workflow logic
|
|
1129
|
+
* Integration with application code
|
|
1130
|
+
|
|
1131
|
+
[source,ruby]
|
|
1132
|
+
----
|
|
1133
|
+
class MyWorkflow < Fractor::Workflow
|
|
1134
|
+
workflow "my-workflow" do
|
|
1135
|
+
input_type InputData
|
|
1136
|
+
output_type OutputData
|
|
1137
|
+
|
|
1138
|
+
start_with "process"
|
|
1139
|
+
end_with "finalize"
|
|
1140
|
+
|
|
1141
|
+
job "process" do
|
|
1142
|
+
runs_with ProcessWorker
|
|
1143
|
+
inputs_from_workflow
|
|
1144
|
+
end
|
|
1145
|
+
|
|
1146
|
+
job "finalize" do
|
|
1147
|
+
needs "process"
|
|
1148
|
+
runs_with FinalizeWorker
|
|
1149
|
+
inputs_from_job "process"
|
|
1150
|
+
outputs_to_workflow
|
|
1151
|
+
terminates_workflow
|
|
1152
|
+
end
|
|
1153
|
+
end
|
|
1154
|
+
end
|
|
1155
|
+
|
|
1156
|
+
result = MyWorkflow.new.execute(input_data)
|
|
1157
|
+
----
|
|
1158
|
+
|
|
1159
|
+
=== Helper worker base classes
|
|
1160
|
+
|
|
1161
|
+
Fractor provides helper base classes that reduce boilerplate for common worker patterns:
|
|
1162
|
+
|
|
1163
|
+
[source,ruby]
|
|
1164
|
+
----
|
|
1165
|
+
require 'fractor/workflow/helpers'
|
|
1166
|
+
|
|
1167
|
+
# Simple transformation pattern
|
|
1168
|
+
class UppercaseWorker < Fractor::Workflow::SimpleWorker
|
|
1169
|
+
input_type TextData
|
|
1170
|
+
output_type TextResult
|
|
1171
|
+
|
|
1172
|
+
def transform(input)
|
|
1173
|
+
TextResult.new(text: input.text.upcase)
|
|
1174
|
+
end
|
|
1175
|
+
end
|
|
1176
|
+
|
|
1177
|
+
# Collection mapping pattern
|
|
1178
|
+
class ProcessItemsWorker < Fractor::Workflow::MapWorker
|
|
1179
|
+
input_type ItemList
|
|
1180
|
+
output_type ProcessedList
|
|
1181
|
+
|
|
1182
|
+
def map_item(item)
|
|
1183
|
+
# Transform each item
|
|
1184
|
+
ProcessedItem.new(data: item.data.upcase)
|
|
1185
|
+
end
|
|
1186
|
+
end
|
|
1187
|
+
|
|
1188
|
+
# Collection filtering pattern
|
|
1189
|
+
class FilterValidWorker < Fractor::Workflow::FilterWorker
|
|
1190
|
+
input_type ItemList
|
|
1191
|
+
output_type FilteredList
|
|
1192
|
+
|
|
1193
|
+
def filter_item?(item)
|
|
1194
|
+
item.valid? && item.score > 0.5
|
|
1195
|
+
end
|
|
1196
|
+
end
|
|
1197
|
+
|
|
1198
|
+
# Collection aggregation pattern
|
|
1199
|
+
class SummarizeWorker < Fractor::Workflow::ReduceWorker
|
|
1200
|
+
input_type ItemList
|
|
1201
|
+
output_type Summary
|
|
1202
|
+
|
|
1203
|
+
def reduce_items(items)
|
|
1204
|
+
total = items.sum(&:value)
|
|
1205
|
+
Summary.new(total: total, count: items.size)
|
|
1206
|
+
end
|
|
1207
|
+
end
|
|
1208
|
+
|
|
1209
|
+
# Validation pattern with error collection
|
|
1210
|
+
class ValidateDataWorker < Fractor::Workflow::ValidationWorker
|
|
1211
|
+
input_type InputData
|
|
1212
|
+
output_type ValidationResult
|
|
1213
|
+
|
|
1214
|
+
def validate(input)
|
|
1215
|
+
errors = []
|
|
1216
|
+
errors << "Name is required" if input.name.nil?
|
|
1217
|
+
errors << "Age must be positive" if input.age <= 0
|
|
1218
|
+
|
|
1219
|
+
ValidationResult.new(
|
|
1220
|
+
valid: errors.empty?,
|
|
1221
|
+
errors: errors,
|
|
1222
|
+
data: input
|
|
1223
|
+
)
|
|
1224
|
+
end
|
|
1225
|
+
end
|
|
1226
|
+
----
|
|
1227
|
+
|
|
1228
|
+
These helper classes handle the boilerplate of creating WorkResult objects and managing the worker lifecycle, allowing you to focus on the core transformation logic.
|
|
1229
|
+
|
|
1230
|
+
== Next steps
|
|
1231
|
+
|
|
1232
|
+
* Read link:../examples/workflow/simplified/README/[Simplified Workflows] guide
|
|
1233
|
+
* Explore link:../examples/workflow/README/[Workflow Examples]
|
|
1234
|
+
* Learn about link:../pages/core-concepts/[Core Concepts]
|
|
1235
|
+
* See link:../guides/pipeline-mode/[Pipeline Mode] for batch processing workflows
|