fractor 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +227 -102
- data/README.adoc +113 -1940
- data/docs/.lycheeignore +16 -0
- data/docs/Gemfile +24 -0
- data/docs/README.md +157 -0
- data/docs/_config.yml +151 -0
- data/docs/_features/error-handling.adoc +1192 -0
- data/docs/_features/index.adoc +80 -0
- data/docs/_features/monitoring.adoc +589 -0
- data/docs/_features/signal-handling.adoc +202 -0
- data/docs/_features/workflows.adoc +1235 -0
- data/docs/_guides/continuous-mode.adoc +736 -0
- data/docs/_guides/cookbook.adoc +1133 -0
- data/docs/_guides/index.adoc +55 -0
- data/docs/_guides/pipeline-mode.adoc +730 -0
- data/docs/_guides/troubleshooting.adoc +358 -0
- data/docs/_pages/architecture.adoc +1390 -0
- data/docs/_pages/core-concepts.adoc +1392 -0
- data/docs/_pages/design-principles.adoc +862 -0
- data/docs/_pages/getting-started.adoc +290 -0
- data/docs/_pages/installation.adoc +143 -0
- data/docs/_reference/api.adoc +1080 -0
- data/docs/_reference/error-reporting.adoc +670 -0
- data/docs/_reference/examples.adoc +181 -0
- data/docs/_reference/index.adoc +96 -0
- data/docs/_reference/troubleshooting.adoc +862 -0
- data/docs/_tutorials/complex-workflows.adoc +1022 -0
- data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
- data/docs/_tutorials/first-application.adoc +384 -0
- data/docs/_tutorials/index.adoc +48 -0
- data/docs/_tutorials/long-running-services.adoc +931 -0
- data/docs/assets/images/favicon-16.png +0 -0
- data/docs/assets/images/favicon-32.png +0 -0
- data/docs/assets/images/favicon-48.png +0 -0
- data/docs/assets/images/favicon.ico +0 -0
- data/docs/assets/images/favicon.png +0 -0
- data/docs/assets/images/favicon.svg +45 -0
- data/docs/assets/images/fractor-icon.svg +49 -0
- data/docs/assets/images/fractor-logo.svg +61 -0
- data/docs/index.adoc +131 -0
- data/docs/lychee.toml +39 -0
- data/examples/api_aggregator/README.adoc +627 -0
- data/examples/api_aggregator/api_aggregator.rb +376 -0
- data/examples/auto_detection/README.adoc +407 -29
- data/examples/continuous_chat_common/message_protocol.rb +1 -1
- data/examples/error_reporting.rb +207 -0
- data/examples/file_processor/README.adoc +170 -0
- data/examples/file_processor/file_processor.rb +615 -0
- data/examples/file_processor/sample_files/invalid.csv +1 -0
- data/examples/file_processor/sample_files/orders.xml +24 -0
- data/examples/file_processor/sample_files/products.json +23 -0
- data/examples/file_processor/sample_files/users.csv +6 -0
- data/examples/hierarchical_hasher/README.adoc +629 -41
- data/examples/image_processor/README.adoc +610 -0
- data/examples/image_processor/image_processor.rb +349 -0
- data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
- data/examples/image_processor/test_images/sample_1.png +1 -0
- data/examples/image_processor/test_images/sample_10.png +1 -0
- data/examples/image_processor/test_images/sample_2.png +1 -0
- data/examples/image_processor/test_images/sample_3.png +1 -0
- data/examples/image_processor/test_images/sample_4.png +1 -0
- data/examples/image_processor/test_images/sample_5.png +1 -0
- data/examples/image_processor/test_images/sample_6.png +1 -0
- data/examples/image_processor/test_images/sample_7.png +1 -0
- data/examples/image_processor/test_images/sample_8.png +1 -0
- data/examples/image_processor/test_images/sample_9.png +1 -0
- data/examples/log_analyzer/README.adoc +662 -0
- data/examples/log_analyzer/log_analyzer.rb +579 -0
- data/examples/log_analyzer/sample_logs/apache.log +20 -0
- data/examples/log_analyzer/sample_logs/json.log +15 -0
- data/examples/log_analyzer/sample_logs/nginx.log +15 -0
- data/examples/log_analyzer/sample_logs/rails.log +29 -0
- data/examples/multi_work_type/README.adoc +576 -26
- data/examples/performance_monitoring.rb +120 -0
- data/examples/pipeline_processing/README.adoc +740 -26
- data/examples/pipeline_processing/pipeline_processing.rb +2 -2
- data/examples/priority_work_example.rb +155 -0
- data/examples/producer_subscriber/README.adoc +889 -46
- data/examples/scatter_gather/README.adoc +829 -27
- data/examples/simple/README.adoc +347 -0
- data/examples/specialized_workers/README.adoc +622 -26
- data/examples/specialized_workers/specialized_workers.rb +44 -8
- data/examples/stream_processor/README.adoc +206 -0
- data/examples/stream_processor/stream_processor.rb +284 -0
- data/examples/web_scraper/README.adoc +625 -0
- data/examples/web_scraper/web_scraper.rb +285 -0
- data/examples/workflow/README.adoc +406 -0
- data/examples/workflow/circuit_breaker/README.adoc +360 -0
- data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
- data/examples/workflow/conditional/README.adoc +483 -0
- data/examples/workflow/conditional/conditional_workflow.rb +215 -0
- data/examples/workflow/dead_letter_queue/README.adoc +374 -0
- data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
- data/examples/workflow/fan_out/README.adoc +381 -0
- data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
- data/examples/workflow/retry/README.adoc +248 -0
- data/examples/workflow/retry/retry_workflow.rb +195 -0
- data/examples/workflow/simple_linear/README.adoc +267 -0
- data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
- data/examples/workflow/simplified/README.adoc +329 -0
- data/examples/workflow/simplified/simplified_workflow.rb +222 -0
- data/exe/fractor +10 -0
- data/lib/fractor/cli.rb +288 -0
- data/lib/fractor/configuration.rb +307 -0
- data/lib/fractor/continuous_server.rb +60 -65
- data/lib/fractor/error_formatter.rb +72 -0
- data/lib/fractor/error_report_generator.rb +152 -0
- data/lib/fractor/error_reporter.rb +244 -0
- data/lib/fractor/error_statistics.rb +147 -0
- data/lib/fractor/execution_tracer.rb +162 -0
- data/lib/fractor/logger.rb +230 -0
- data/lib/fractor/main_loop_handler.rb +406 -0
- data/lib/fractor/main_loop_handler3.rb +135 -0
- data/lib/fractor/main_loop_handler4.rb +299 -0
- data/lib/fractor/performance_metrics_collector.rb +181 -0
- data/lib/fractor/performance_monitor.rb +215 -0
- data/lib/fractor/performance_report_generator.rb +202 -0
- data/lib/fractor/priority_work.rb +93 -0
- data/lib/fractor/priority_work_queue.rb +189 -0
- data/lib/fractor/result_aggregator.rb +32 -0
- data/lib/fractor/shutdown_handler.rb +168 -0
- data/lib/fractor/signal_handler.rb +80 -0
- data/lib/fractor/supervisor.rb +382 -269
- data/lib/fractor/supervisor_logger.rb +88 -0
- data/lib/fractor/version.rb +1 -1
- data/lib/fractor/work.rb +12 -0
- data/lib/fractor/work_distribution_manager.rb +151 -0
- data/lib/fractor/work_queue.rb +20 -0
- data/lib/fractor/work_result.rb +181 -9
- data/lib/fractor/worker.rb +73 -0
- data/lib/fractor/workflow/builder.rb +210 -0
- data/lib/fractor/workflow/chain_builder.rb +169 -0
- data/lib/fractor/workflow/circuit_breaker.rb +183 -0
- data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
- data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
- data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
- data/lib/fractor/workflow/execution_hooks.rb +39 -0
- data/lib/fractor/workflow/execution_strategy.rb +225 -0
- data/lib/fractor/workflow/execution_trace.rb +134 -0
- data/lib/fractor/workflow/helpers.rb +191 -0
- data/lib/fractor/workflow/job.rb +290 -0
- data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
- data/lib/fractor/workflow/logger.rb +110 -0
- data/lib/fractor/workflow/pre_execution_context.rb +193 -0
- data/lib/fractor/workflow/retry_config.rb +156 -0
- data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
- data/lib/fractor/workflow/retry_strategy.rb +93 -0
- data/lib/fractor/workflow/structured_logger.rb +30 -0
- data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
- data/lib/fractor/workflow/visualizer.rb +211 -0
- data/lib/fractor/workflow/workflow_context.rb +132 -0
- data/lib/fractor/workflow/workflow_executor.rb +669 -0
- data/lib/fractor/workflow/workflow_result.rb +55 -0
- data/lib/fractor/workflow/workflow_validator.rb +295 -0
- data/lib/fractor/workflow.rb +333 -0
- data/lib/fractor/wrapped_ractor.rb +66 -101
- data/lib/fractor/wrapped_ractor3.rb +161 -0
- data/lib/fractor/wrapped_ractor4.rb +242 -0
- data/lib/fractor.rb +92 -4
- metadata +179 -6
- data/tests/sample.rb.bak +0 -309
- data/tests/sample_working.rb.bak +0 -209
|
@@ -0,0 +1,669 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
require_relative "retry_config"
|
|
5
|
+
require_relative "circuit_breaker"
|
|
6
|
+
require_relative "circuit_breaker_registry"
|
|
7
|
+
require_relative "circuit_breaker_orchestrator"
|
|
8
|
+
require_relative "retry_orchestrator"
|
|
9
|
+
require_relative "pre_execution_context"
|
|
10
|
+
require_relative "execution_hooks"
|
|
11
|
+
require_relative "workflow_result"
|
|
12
|
+
|
|
13
|
+
module Fractor
|
|
14
|
+
class Workflow
|
|
15
|
+
# Orchestrates workflow execution by managing job execution order and data flow.
|
|
16
|
+
class WorkflowExecutor
|
|
17
|
+
attr_reader :workflow, :context, :completed_jobs, :failed_jobs,
|
|
18
|
+
:trace, :hooks, :pre_execution_context
|
|
19
|
+
|
|
20
|
+
def initialize(workflow, input, correlation_id: nil, logger: nil,
|
|
21
|
+
trace: false, dead_letter_queue: nil)
|
|
22
|
+
@workflow = workflow
|
|
23
|
+
@correlation_id = correlation_id
|
|
24
|
+
@logger = logger
|
|
25
|
+
@context = WorkflowContext.new(
|
|
26
|
+
input,
|
|
27
|
+
correlation_id: correlation_id,
|
|
28
|
+
logger: logger,
|
|
29
|
+
)
|
|
30
|
+
@completed_jobs = Set.new
|
|
31
|
+
@failed_jobs = Set.new
|
|
32
|
+
@hooks = ExecutionHooks.new
|
|
33
|
+
@trace = trace ? create_trace : nil
|
|
34
|
+
@circuit_breakers = Workflow::CircuitBreakerRegistry.new
|
|
35
|
+
@dead_letter_queue = dead_letter_queue
|
|
36
|
+
@pre_execution_context = PreExecutionContext.new(workflow, input)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Execute the workflow and return the result.
|
|
40
|
+
#
|
|
41
|
+
# @return [WorkflowResult] The execution result
|
|
42
|
+
def execute
|
|
43
|
+
# Run pre-execution validation
|
|
44
|
+
@pre_execution_context.validate!
|
|
45
|
+
|
|
46
|
+
log_workflow_start
|
|
47
|
+
@hooks.trigger(:workflow_start, workflow)
|
|
48
|
+
@trace&.start_job(
|
|
49
|
+
job_name: "workflow",
|
|
50
|
+
worker_class: workflow.class.name,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
execution_order = compute_execution_order
|
|
54
|
+
start_time = Time.now
|
|
55
|
+
|
|
56
|
+
execution_order.each do |job_group|
|
|
57
|
+
execute_job_group(job_group)
|
|
58
|
+
break if workflow_terminated?
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end_time = Time.now
|
|
62
|
+
@trace&.complete!
|
|
63
|
+
|
|
64
|
+
log_workflow_complete(end_time - start_time)
|
|
65
|
+
result = build_result(start_time, end_time)
|
|
66
|
+
@hooks.trigger(:workflow_complete, result)
|
|
67
|
+
result
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Register a hook for workflow/job lifecycle events
|
|
71
|
+
#
|
|
72
|
+
# @param event [Symbol] The event to hook into
|
|
73
|
+
# @param block [Proc] The callback to execute
|
|
74
|
+
def on(event, &)
|
|
75
|
+
@hooks.register(event, &)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Register a custom pre-execution validation hook.
|
|
79
|
+
# The hook receives the PreExecutionContext and can add errors/warnings.
|
|
80
|
+
#
|
|
81
|
+
# @param name [String, Symbol] Optional name for the validation
|
|
82
|
+
# @yield [context] Block that receives the pre-execution context
|
|
83
|
+
#
|
|
84
|
+
# @example Add custom validation
|
|
85
|
+
# executor.validate_before_execution(:check_api_key) do |ctx|
|
|
86
|
+
# unless ctx.input.api_key
|
|
87
|
+
# ctx.add_error("API key is required")
|
|
88
|
+
# end
|
|
89
|
+
# end
|
|
90
|
+
def validate_before_execution(name = nil, &)
|
|
91
|
+
@pre_execution_context.add_validation_hook(name, &)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private
|
|
95
|
+
|
|
96
|
+
def compute_execution_order
|
|
97
|
+
# Topological sort to determine execution order
|
|
98
|
+
# Returns array of arrays (each inner array is a group of parallelizable jobs)
|
|
99
|
+
jobs = workflow.class.jobs
|
|
100
|
+
order = []
|
|
101
|
+
remaining = jobs.keys.to_set
|
|
102
|
+
processed = Set.new
|
|
103
|
+
|
|
104
|
+
until remaining.empty?
|
|
105
|
+
# Find jobs whose dependencies are all satisfied
|
|
106
|
+
ready = remaining.select do |job_name|
|
|
107
|
+
job = jobs[job_name]
|
|
108
|
+
job.dependencies.all? { |dep| processed.include?(dep) }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
if ready.empty?
|
|
112
|
+
# This should not happen if validation was done correctly
|
|
113
|
+
raise WorkflowExecutionError,
|
|
114
|
+
"Cannot find next jobs to execute. Remaining: #{remaining.to_a.join(', ')}"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
order << ready
|
|
118
|
+
ready.each do |job_name|
|
|
119
|
+
processed.add(job_name)
|
|
120
|
+
remaining.delete(job_name)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
puts "Execution order: #{order.inspect}" if ENV["FRACTOR_DEBUG"]
|
|
125
|
+
order
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def execute_job_group(job_names)
|
|
129
|
+
puts "Executing job group: #{job_names.inspect}" if ENV["FRACTOR_DEBUG"]
|
|
130
|
+
jobs = job_names.map { |name| workflow.class.jobs[name] }
|
|
131
|
+
|
|
132
|
+
# Filter jobs based on conditions
|
|
133
|
+
executable_jobs = jobs.select { |job| job.should_execute?(@context) }
|
|
134
|
+
|
|
135
|
+
# Mark skipped jobs
|
|
136
|
+
(jobs - executable_jobs).each do |job|
|
|
137
|
+
job.state(:skipped)
|
|
138
|
+
puts "Job '#{job.name}' skipped due to condition" if ENV["FRACTOR_DEBUG"]
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
return if executable_jobs.empty?
|
|
142
|
+
|
|
143
|
+
if executable_jobs.size == 1
|
|
144
|
+
# Single job - execute directly
|
|
145
|
+
execute_job(executable_jobs.first)
|
|
146
|
+
else
|
|
147
|
+
# Multiple jobs - execute sequentially (not parallel to avoid Ractor issues)
|
|
148
|
+
puts "Executing #{executable_jobs.size} jobs sequentially" if ENV["FRACTOR_DEBUG"]
|
|
149
|
+
executable_jobs.each do |job|
|
|
150
|
+
execute_job(job)
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def execute_job(job)
|
|
156
|
+
puts "Executing job: #{job.name}" if ENV["FRACTOR_DEBUG"]
|
|
157
|
+
job.state(:running)
|
|
158
|
+
|
|
159
|
+
# Start job trace
|
|
160
|
+
job_trace = @trace&.start_job(
|
|
161
|
+
job_name: job.name,
|
|
162
|
+
worker_class: job.worker_class.name,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Log and trigger hook
|
|
166
|
+
log_job_start(job)
|
|
167
|
+
@hooks.trigger(:job_start, job, @context)
|
|
168
|
+
|
|
169
|
+
start_time = Time.now
|
|
170
|
+
|
|
171
|
+
begin
|
|
172
|
+
# Execute with retry logic if configured
|
|
173
|
+
output = if job.retry_enabled?
|
|
174
|
+
execute_job_with_retry(job, job_trace)
|
|
175
|
+
else
|
|
176
|
+
execute_job_once(job, job_trace)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Calculate duration
|
|
180
|
+
duration = Time.now - start_time
|
|
181
|
+
|
|
182
|
+
# Store output in context
|
|
183
|
+
@context.store_job_output(job.name, output)
|
|
184
|
+
@completed_jobs.add(job.name)
|
|
185
|
+
job.state(:completed)
|
|
186
|
+
|
|
187
|
+
# Update trace
|
|
188
|
+
job_trace&.complete!(output: output)
|
|
189
|
+
|
|
190
|
+
# Log and trigger hook
|
|
191
|
+
log_job_complete(job, duration)
|
|
192
|
+
@hooks.trigger(:job_complete, job, output, duration)
|
|
193
|
+
|
|
194
|
+
puts "Job '#{job.name}' completed successfully" if ENV["FRACTOR_DEBUG"]
|
|
195
|
+
rescue StandardError => e
|
|
196
|
+
Time.now - start_time
|
|
197
|
+
@failed_jobs.add(job.name)
|
|
198
|
+
job.state(:failed)
|
|
199
|
+
|
|
200
|
+
# Update trace
|
|
201
|
+
job_trace&.fail!(error: e)
|
|
202
|
+
|
|
203
|
+
# Execute error handlers
|
|
204
|
+
job.handle_error(e, @context)
|
|
205
|
+
|
|
206
|
+
# Log and trigger hook
|
|
207
|
+
log_job_error(job, e, has_fallback: !!job.fallback_job)
|
|
208
|
+
@hooks.trigger(:job_error, job, e, @context)
|
|
209
|
+
|
|
210
|
+
puts "Job '#{job.name}' failed: #{e.message}" if ENV["FRACTOR_DEBUG"]
|
|
211
|
+
|
|
212
|
+
# Try fallback job if configured
|
|
213
|
+
if job.fallback_job
|
|
214
|
+
execute_fallback_job(job, e, start_time, job_trace)
|
|
215
|
+
else
|
|
216
|
+
raise WorkflowExecutionError,
|
|
217
|
+
"Job '#{job.name}' failed: #{e.message}\n#{e.backtrace.join("\n")}"
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def execute_job_once(job, job_trace)
|
|
223
|
+
# Build input for this job
|
|
224
|
+
job_input = @context.build_job_input(job)
|
|
225
|
+
job_trace&.set_input(job_input)
|
|
226
|
+
|
|
227
|
+
# Create work item - if job_input is already a Work object, use it directly
|
|
228
|
+
# to avoid double-wrapping (e.g., when using custom Work subclasses)
|
|
229
|
+
work = if job_input.is_a?(Work)
|
|
230
|
+
job_input
|
|
231
|
+
else
|
|
232
|
+
Work.new(job_input)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Execute with circuit breaker if configured
|
|
236
|
+
if job.circuit_breaker_enabled?
|
|
237
|
+
execute_with_circuit_breaker(job, work)
|
|
238
|
+
else
|
|
239
|
+
execute_job_with_supervisor(job, work)
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def execute_job_with_retry(job, job_trace)
|
|
244
|
+
retry_config = job.retry_config
|
|
245
|
+
|
|
246
|
+
# Create retry orchestrator with the job's retry configuration
|
|
247
|
+
orchestrator = RetryOrchestrator.new(retry_config,
|
|
248
|
+
debug: ENV["FRACTOR_DEBUG"] == "1")
|
|
249
|
+
|
|
250
|
+
# Execute with retry logic
|
|
251
|
+
orchestrator.execute_with_retry(job) do |j|
|
|
252
|
+
execute_job_once(j, job_trace)
|
|
253
|
+
end
|
|
254
|
+
rescue StandardError => e
|
|
255
|
+
# Get retry state for DLQ entry
|
|
256
|
+
retry_state = orchestrator.state
|
|
257
|
+
add_to_dead_letter_queue(job, e, retry_state)
|
|
258
|
+
raise e
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def execute_fallback_job(job, error, start_time, job_trace)
|
|
262
|
+
fallback_job_name = job.fallback_job
|
|
263
|
+
fallback_job = workflow.class.jobs[fallback_job_name]
|
|
264
|
+
|
|
265
|
+
unless fallback_job
|
|
266
|
+
raise WorkflowExecutionError,
|
|
267
|
+
"Fallback job '#{fallback_job_name}' not found for job '#{job.name}'"
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
log_fallback_execution(job, fallback_job, error)
|
|
271
|
+
|
|
272
|
+
begin
|
|
273
|
+
# Execute fallback job
|
|
274
|
+
execute_job(fallback_job)
|
|
275
|
+
|
|
276
|
+
# Use fallback job's output
|
|
277
|
+
output = @context.job_output(fallback_job_name)
|
|
278
|
+
duration = Time.now - start_time
|
|
279
|
+
|
|
280
|
+
# Store output under original job name as well
|
|
281
|
+
@context.store_job_output(job.name, output)
|
|
282
|
+
@completed_jobs.add(job.name)
|
|
283
|
+
job.state(:completed)
|
|
284
|
+
|
|
285
|
+
# Update trace
|
|
286
|
+
job_trace&.complete!(output: output)
|
|
287
|
+
|
|
288
|
+
log_job_complete(job, duration)
|
|
289
|
+
@hooks.trigger(:job_complete, job, output, duration)
|
|
290
|
+
rescue StandardError => e
|
|
291
|
+
log_fallback_failed(job, fallback_job, e)
|
|
292
|
+
raise WorkflowExecutionError,
|
|
293
|
+
"Job '#{job.name}' and fallback '#{fallback_job_name}' both failed"
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def execute_jobs_parallel(jobs)
|
|
298
|
+
puts "Executing #{jobs.size} jobs in parallel: #{jobs.map(&:name).join(', ')}" if ENV["FRACTOR_DEBUG"]
|
|
299
|
+
|
|
300
|
+
# Create supervisors for each job
|
|
301
|
+
supervisors = jobs.map do |job|
|
|
302
|
+
job.state(:running)
|
|
303
|
+
job_input = @context.build_job_input(job)
|
|
304
|
+
work = Work.new(job_input)
|
|
305
|
+
|
|
306
|
+
supervisor = Supervisor.new(
|
|
307
|
+
worker_pools: [
|
|
308
|
+
{
|
|
309
|
+
worker_class: job.worker_class,
|
|
310
|
+
num_workers: job.num_workers || 1,
|
|
311
|
+
},
|
|
312
|
+
],
|
|
313
|
+
)
|
|
314
|
+
supervisor.add_work_item(work)
|
|
315
|
+
|
|
316
|
+
{ job: job, supervisor: supervisor }
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# Run all supervisors in parallel using threads
|
|
320
|
+
threads = supervisors.map do |spec|
|
|
321
|
+
Thread.new do
|
|
322
|
+
spec[:supervisor].run
|
|
323
|
+
{ job: spec[:job], success: true, supervisor: spec[:supervisor] }
|
|
324
|
+
rescue StandardError => e
|
|
325
|
+
{ job: spec[:job], success: false, error: e }
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Wait for all to complete and process results
|
|
330
|
+
threads.each do |thread|
|
|
331
|
+
result = thread.value
|
|
332
|
+
job = result[:job]
|
|
333
|
+
|
|
334
|
+
if result[:success]
|
|
335
|
+
# Extract output from supervisor results
|
|
336
|
+
job_results = result[:supervisor].results.results
|
|
337
|
+
if job_results.empty?
|
|
338
|
+
raise WorkflowExecutionError,
|
|
339
|
+
"Job '#{job.name}' produced no results"
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
output = job_results.first.result
|
|
343
|
+
@context.store_job_output(job.name, output)
|
|
344
|
+
@completed_jobs.add(job.name)
|
|
345
|
+
job.state(:completed)
|
|
346
|
+
|
|
347
|
+
puts "Job '#{job.name}' completed successfully" if ENV["FRACTOR_DEBUG"]
|
|
348
|
+
else
|
|
349
|
+
@failed_jobs.add(job.name)
|
|
350
|
+
job.state(:failed)
|
|
351
|
+
error = result[:error]
|
|
352
|
+
puts "Job '#{job.name}' failed: #{error.message}" if ENV["FRACTOR_DEBUG"]
|
|
353
|
+
raise WorkflowExecutionError,
|
|
354
|
+
"Job '#{job.name}' failed: #{error.message}"
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
def execute_job_with_supervisor(job, work)
|
|
360
|
+
supervisor = Supervisor.new(
|
|
361
|
+
worker_pools: [
|
|
362
|
+
{
|
|
363
|
+
worker_class: job.worker_class,
|
|
364
|
+
num_workers: job.num_workers || 1,
|
|
365
|
+
},
|
|
366
|
+
],
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
supervisor.add_work_item(work)
|
|
370
|
+
supervisor.run
|
|
371
|
+
|
|
372
|
+
# Check for errors first (before checking results)
|
|
373
|
+
unless supervisor.results.errors.empty?
|
|
374
|
+
error = supervisor.results.errors.first
|
|
375
|
+
raise WorkflowExecutionError,
|
|
376
|
+
"Job '#{job.name}' encountered error: #{error.error}"
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
# Get the result
|
|
380
|
+
results = supervisor.results.results
|
|
381
|
+
if results.empty?
|
|
382
|
+
raise WorkflowExecutionError, "Job '#{job.name}' produced no results"
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
results.first.result
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def workflow_terminated?
|
|
389
|
+
# Check if any terminating job has completed
|
|
390
|
+
workflow.class.jobs.each do |name, job|
|
|
391
|
+
return true if job.terminates && @completed_jobs.include?(name)
|
|
392
|
+
end
|
|
393
|
+
false
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
def create_trace
|
|
397
|
+
require "securerandom"
|
|
398
|
+
execution_id = "exec-#{SecureRandom.hex(8)}"
|
|
399
|
+
ExecutionTrace.new(
|
|
400
|
+
workflow_name: workflow.class.workflow_name,
|
|
401
|
+
execution_id: execution_id,
|
|
402
|
+
correlation_id: @context.correlation_id,
|
|
403
|
+
)
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
def log_workflow_start
|
|
407
|
+
return unless @context.logger
|
|
408
|
+
|
|
409
|
+
@context.logger.info(
|
|
410
|
+
"Workflow starting",
|
|
411
|
+
workflow: workflow.class.workflow_name,
|
|
412
|
+
correlation_id: @context.correlation_id,
|
|
413
|
+
)
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
def log_workflow_complete(duration)
|
|
417
|
+
return unless @context.logger
|
|
418
|
+
|
|
419
|
+
@context.logger.info(
|
|
420
|
+
"Workflow complete",
|
|
421
|
+
workflow: workflow.class.workflow_name,
|
|
422
|
+
duration_ms: (duration * 1000).round(2),
|
|
423
|
+
jobs_completed: @completed_jobs.size,
|
|
424
|
+
jobs_failed: @failed_jobs.size,
|
|
425
|
+
)
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
def log_job_start(job)
|
|
429
|
+
return unless @context.logger
|
|
430
|
+
|
|
431
|
+
@context.logger.info(
|
|
432
|
+
"Job starting",
|
|
433
|
+
job: job.name,
|
|
434
|
+
worker: job.worker_class.name,
|
|
435
|
+
)
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
def log_job_complete(job, duration)
|
|
439
|
+
return unless @context.logger
|
|
440
|
+
|
|
441
|
+
@context.logger.info(
|
|
442
|
+
"Job complete",
|
|
443
|
+
job: job.name,
|
|
444
|
+
duration_ms: (duration * 1000).round(2),
|
|
445
|
+
)
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
def log_job_error(job, error, has_fallback: false)
|
|
449
|
+
return unless @context.logger
|
|
450
|
+
|
|
451
|
+
# Log at WARN level if fallback is available (error is handled),
|
|
452
|
+
# otherwise log at ERROR level (error causes workflow failure)
|
|
453
|
+
log_method = has_fallback ? @context.logger.method(:warn) : @context.logger.method(:error)
|
|
454
|
+
|
|
455
|
+
log_method.call(
|
|
456
|
+
"Job '#{job.name}' encountered error: #{error}",
|
|
457
|
+
job: job.name,
|
|
458
|
+
error: error.class.name,
|
|
459
|
+
)
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
def log_retry_attempt(job, retry_state, delay)
|
|
463
|
+
return unless @context.logger
|
|
464
|
+
|
|
465
|
+
@context.logger.warn(
|
|
466
|
+
"Job retry attempt",
|
|
467
|
+
job: job.name,
|
|
468
|
+
attempt: retry_state.attempt,
|
|
469
|
+
max_attempts: job.retry_config.max_attempts,
|
|
470
|
+
delay_seconds: delay,
|
|
471
|
+
last_error: retry_state.last_error&.message,
|
|
472
|
+
)
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
def log_retry_success(job, retry_state)
|
|
476
|
+
return unless @context.logger
|
|
477
|
+
|
|
478
|
+
@context.logger.info(
|
|
479
|
+
"Job retry succeeded",
|
|
480
|
+
job: job.name,
|
|
481
|
+
successful_attempt: retry_state.attempt,
|
|
482
|
+
total_attempts: retry_state.attempt,
|
|
483
|
+
total_time: retry_state.total_time,
|
|
484
|
+
)
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
def log_retry_exhausted(job, retry_state)
|
|
488
|
+
return unless @context.logger
|
|
489
|
+
|
|
490
|
+
@context.logger.error(
|
|
491
|
+
"Job retry attempts exhausted",
|
|
492
|
+
job: job.name,
|
|
493
|
+
total_attempts: retry_state.attempt - 1,
|
|
494
|
+
total_time: retry_state.total_time,
|
|
495
|
+
errors: retry_state.summary[:errors],
|
|
496
|
+
)
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
def log_fallback_execution(job, fallback_job, error)
|
|
500
|
+
return unless @context.logger
|
|
501
|
+
|
|
502
|
+
@context.logger.warn(
|
|
503
|
+
"Executing fallback job",
|
|
504
|
+
job: job.name,
|
|
505
|
+
fallback_job: fallback_job.name,
|
|
506
|
+
original_error: error.message,
|
|
507
|
+
)
|
|
508
|
+
end
|
|
509
|
+
|
|
510
|
+
def log_fallback_failed(job, fallback_job, error)
|
|
511
|
+
return unless @context.logger
|
|
512
|
+
|
|
513
|
+
@context.logger.error(
|
|
514
|
+
"Fallback job failed",
|
|
515
|
+
job: job.name,
|
|
516
|
+
fallback_job: fallback_job.name,
|
|
517
|
+
error: error.message,
|
|
518
|
+
)
|
|
519
|
+
end
|
|
520
|
+
|
|
521
|
+
def execute_with_circuit_breaker(job, work)
|
|
522
|
+
breaker_key = job.circuit_breaker_key
|
|
523
|
+
|
|
524
|
+
# Get or create circuit breaker orchestrator for this job
|
|
525
|
+
orchestrator = @circuit_breakers.get_or_create_orchestrator(
|
|
526
|
+
breaker_key,
|
|
527
|
+
**job.circuit_breaker_config.slice(:threshold, :timeout,
|
|
528
|
+
:half_open_calls),
|
|
529
|
+
job_name: job.name,
|
|
530
|
+
debug: ENV["FRACTOR_DEBUG"] == "1",
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
# Log circuit state before execution
|
|
534
|
+
log_circuit_breaker_state(job, orchestrator)
|
|
535
|
+
|
|
536
|
+
begin
|
|
537
|
+
orchestrator.execute_with_breaker(job) do
|
|
538
|
+
execute_job_with_supervisor(job, work)
|
|
539
|
+
end
|
|
540
|
+
rescue Workflow::CircuitOpenError => e
|
|
541
|
+
log_circuit_breaker_open(job, orchestrator)
|
|
542
|
+
raise WorkflowExecutionError,
|
|
543
|
+
"Circuit breaker open for job '#{job.name}': #{e.message}"
|
|
544
|
+
end
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
def log_circuit_breaker_state(job, breaker)
|
|
548
|
+
return unless @context.logger
|
|
549
|
+
return if breaker.closed?
|
|
550
|
+
|
|
551
|
+
@context.logger.warn(
|
|
552
|
+
"Circuit breaker state",
|
|
553
|
+
job: job.name,
|
|
554
|
+
state: breaker.state,
|
|
555
|
+
failure_count: breaker.failure_count,
|
|
556
|
+
threshold: breaker.threshold,
|
|
557
|
+
)
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
def log_circuit_breaker_open(job, breaker)
|
|
561
|
+
return unless @context.logger
|
|
562
|
+
|
|
563
|
+
@context.logger.error(
|
|
564
|
+
"Circuit breaker open",
|
|
565
|
+
job: job.name,
|
|
566
|
+
failure_count: breaker.failure_count,
|
|
567
|
+
threshold: breaker.threshold,
|
|
568
|
+
last_failure: breaker.last_failure_time,
|
|
569
|
+
)
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
def add_to_dead_letter_queue(job, error, retry_state = nil)
|
|
573
|
+
return unless @dead_letter_queue
|
|
574
|
+
|
|
575
|
+
# Build job input for DLQ entry
|
|
576
|
+
job_input = @context.build_job_input(job)
|
|
577
|
+
work = Work.new(job_input)
|
|
578
|
+
|
|
579
|
+
# Add metadata about the failure
|
|
580
|
+
metadata = {
|
|
581
|
+
job_name: job.name,
|
|
582
|
+
worker_class: job.worker_class.name,
|
|
583
|
+
correlation_id: @context.correlation_id,
|
|
584
|
+
workflow_name: @workflow.class.workflow_name,
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
# Add retry information if available
|
|
588
|
+
if retry_state
|
|
589
|
+
# Handle both RetryState object and Hash from orchestrator
|
|
590
|
+
if retry_state.is_a?(Hash)
|
|
591
|
+
# From RetryOrchestrator.state
|
|
592
|
+
metadata[:retry_attempts] = retry_state[:attempts] - 1
|
|
593
|
+
metadata[:max_attempts] = retry_state[:max_attempts]
|
|
594
|
+
metadata[:last_error] = retry_state[:last_error]
|
|
595
|
+
metadata[:total_retry_time] = retry_state[:total_time]
|
|
596
|
+
metadata[:all_errors] = retry_state[:all_errors]
|
|
597
|
+
else
|
|
598
|
+
# From RetryState object
|
|
599
|
+
metadata[:retry_attempts] = retry_state.attempt - 1
|
|
600
|
+
metadata[:total_retry_time] = retry_state.total_time
|
|
601
|
+
metadata[:all_errors] = retry_state.summary[:errors]
|
|
602
|
+
end
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
# Add context from workflow
|
|
606
|
+
context = {
|
|
607
|
+
workflow_input: @context.workflow_input,
|
|
608
|
+
completed_jobs: @completed_jobs.to_a,
|
|
609
|
+
failed_jobs: @failed_jobs.to_a,
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
@dead_letter_queue.add(work, error, context: context,
|
|
613
|
+
metadata: metadata)
|
|
614
|
+
|
|
615
|
+
log_added_to_dlq(job, error) if @context.logger
|
|
616
|
+
end
|
|
617
|
+
|
|
618
|
+
def log_added_to_dlq(job, error)
|
|
619
|
+
@context.logger.warn(
|
|
620
|
+
"Work added to Dead Letter Queue",
|
|
621
|
+
job: job.name,
|
|
622
|
+
error: error.class.name,
|
|
623
|
+
message: error.message,
|
|
624
|
+
dlq_size: @dead_letter_queue.size,
|
|
625
|
+
)
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
def build_result(start_time, end_time)
|
|
629
|
+
# Find the output from the end job
|
|
630
|
+
output = find_workflow_output
|
|
631
|
+
|
|
632
|
+
WorkflowResult.new(
|
|
633
|
+
workflow_name: workflow.class.workflow_name,
|
|
634
|
+
output: output,
|
|
635
|
+
completed_jobs: @completed_jobs.to_a,
|
|
636
|
+
failed_jobs: @failed_jobs.to_a,
|
|
637
|
+
execution_time: end_time - start_time,
|
|
638
|
+
success: @failed_jobs.empty?,
|
|
639
|
+
trace: @trace,
|
|
640
|
+
correlation_id: @context.correlation_id,
|
|
641
|
+
)
|
|
642
|
+
end
|
|
643
|
+
|
|
644
|
+
def find_workflow_output
|
|
645
|
+
# Look for jobs that map to workflow output
|
|
646
|
+
workflow.class.jobs.each do |name, job|
|
|
647
|
+
if job.outputs_to_workflow? && @completed_jobs.include?(name)
|
|
648
|
+
output = @context.job_output(name)
|
|
649
|
+
puts "Found workflow output from job '#{name}': #{output.class}" if ENV["FRACTOR_DEBUG"]
|
|
650
|
+
return output
|
|
651
|
+
end
|
|
652
|
+
end
|
|
653
|
+
|
|
654
|
+
# Fallback: return output from the first end job that completed
|
|
655
|
+
workflow.class.end_job_names.each do |end_job_spec|
|
|
656
|
+
job_name = end_job_spec[:name]
|
|
657
|
+
if @completed_jobs.include?(job_name)
|
|
658
|
+
output = @context.job_output(job_name)
|
|
659
|
+
puts "Using end job '#{job_name}' output: #{output.class}" if ENV["FRACTOR_DEBUG"]
|
|
660
|
+
return output
|
|
661
|
+
end
|
|
662
|
+
end
|
|
663
|
+
|
|
664
|
+
puts "Warning: No workflow output found!" if ENV["FRACTOR_DEBUG"]
|
|
665
|
+
nil
|
|
666
|
+
end
|
|
667
|
+
end
|
|
668
|
+
end
|
|
669
|
+
end
|