fractor 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +227 -102
  3. data/README.adoc +113 -1940
  4. data/docs/.lycheeignore +16 -0
  5. data/docs/Gemfile +24 -0
  6. data/docs/README.md +157 -0
  7. data/docs/_config.yml +151 -0
  8. data/docs/_features/error-handling.adoc +1192 -0
  9. data/docs/_features/index.adoc +80 -0
  10. data/docs/_features/monitoring.adoc +589 -0
  11. data/docs/_features/signal-handling.adoc +202 -0
  12. data/docs/_features/workflows.adoc +1235 -0
  13. data/docs/_guides/continuous-mode.adoc +736 -0
  14. data/docs/_guides/cookbook.adoc +1133 -0
  15. data/docs/_guides/index.adoc +55 -0
  16. data/docs/_guides/pipeline-mode.adoc +730 -0
  17. data/docs/_guides/troubleshooting.adoc +358 -0
  18. data/docs/_pages/architecture.adoc +1390 -0
  19. data/docs/_pages/core-concepts.adoc +1392 -0
  20. data/docs/_pages/design-principles.adoc +862 -0
  21. data/docs/_pages/getting-started.adoc +290 -0
  22. data/docs/_pages/installation.adoc +143 -0
  23. data/docs/_reference/api.adoc +1080 -0
  24. data/docs/_reference/error-reporting.adoc +670 -0
  25. data/docs/_reference/examples.adoc +181 -0
  26. data/docs/_reference/index.adoc +96 -0
  27. data/docs/_reference/troubleshooting.adoc +862 -0
  28. data/docs/_tutorials/complex-workflows.adoc +1022 -0
  29. data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
  30. data/docs/_tutorials/first-application.adoc +384 -0
  31. data/docs/_tutorials/index.adoc +48 -0
  32. data/docs/_tutorials/long-running-services.adoc +931 -0
  33. data/docs/assets/images/favicon-16.png +0 -0
  34. data/docs/assets/images/favicon-32.png +0 -0
  35. data/docs/assets/images/favicon-48.png +0 -0
  36. data/docs/assets/images/favicon.ico +0 -0
  37. data/docs/assets/images/favicon.png +0 -0
  38. data/docs/assets/images/favicon.svg +45 -0
  39. data/docs/assets/images/fractor-icon.svg +49 -0
  40. data/docs/assets/images/fractor-logo.svg +61 -0
  41. data/docs/index.adoc +131 -0
  42. data/docs/lychee.toml +39 -0
  43. data/examples/api_aggregator/README.adoc +627 -0
  44. data/examples/api_aggregator/api_aggregator.rb +376 -0
  45. data/examples/auto_detection/README.adoc +407 -29
  46. data/examples/continuous_chat_common/message_protocol.rb +1 -1
  47. data/examples/error_reporting.rb +207 -0
  48. data/examples/file_processor/README.adoc +170 -0
  49. data/examples/file_processor/file_processor.rb +615 -0
  50. data/examples/file_processor/sample_files/invalid.csv +1 -0
  51. data/examples/file_processor/sample_files/orders.xml +24 -0
  52. data/examples/file_processor/sample_files/products.json +23 -0
  53. data/examples/file_processor/sample_files/users.csv +6 -0
  54. data/examples/hierarchical_hasher/README.adoc +629 -41
  55. data/examples/image_processor/README.adoc +610 -0
  56. data/examples/image_processor/image_processor.rb +349 -0
  57. data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
  58. data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
  59. data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
  60. data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
  61. data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
  62. data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
  63. data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
  64. data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
  65. data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
  66. data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
  67. data/examples/image_processor/test_images/sample_1.png +1 -0
  68. data/examples/image_processor/test_images/sample_10.png +1 -0
  69. data/examples/image_processor/test_images/sample_2.png +1 -0
  70. data/examples/image_processor/test_images/sample_3.png +1 -0
  71. data/examples/image_processor/test_images/sample_4.png +1 -0
  72. data/examples/image_processor/test_images/sample_5.png +1 -0
  73. data/examples/image_processor/test_images/sample_6.png +1 -0
  74. data/examples/image_processor/test_images/sample_7.png +1 -0
  75. data/examples/image_processor/test_images/sample_8.png +1 -0
  76. data/examples/image_processor/test_images/sample_9.png +1 -0
  77. data/examples/log_analyzer/README.adoc +662 -0
  78. data/examples/log_analyzer/log_analyzer.rb +579 -0
  79. data/examples/log_analyzer/sample_logs/apache.log +20 -0
  80. data/examples/log_analyzer/sample_logs/json.log +15 -0
  81. data/examples/log_analyzer/sample_logs/nginx.log +15 -0
  82. data/examples/log_analyzer/sample_logs/rails.log +29 -0
  83. data/examples/multi_work_type/README.adoc +576 -26
  84. data/examples/performance_monitoring.rb +120 -0
  85. data/examples/pipeline_processing/README.adoc +740 -26
  86. data/examples/pipeline_processing/pipeline_processing.rb +2 -2
  87. data/examples/priority_work_example.rb +155 -0
  88. data/examples/producer_subscriber/README.adoc +889 -46
  89. data/examples/scatter_gather/README.adoc +829 -27
  90. data/examples/simple/README.adoc +347 -0
  91. data/examples/specialized_workers/README.adoc +622 -26
  92. data/examples/specialized_workers/specialized_workers.rb +44 -8
  93. data/examples/stream_processor/README.adoc +206 -0
  94. data/examples/stream_processor/stream_processor.rb +284 -0
  95. data/examples/web_scraper/README.adoc +625 -0
  96. data/examples/web_scraper/web_scraper.rb +285 -0
  97. data/examples/workflow/README.adoc +406 -0
  98. data/examples/workflow/circuit_breaker/README.adoc +360 -0
  99. data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
  100. data/examples/workflow/conditional/README.adoc +483 -0
  101. data/examples/workflow/conditional/conditional_workflow.rb +215 -0
  102. data/examples/workflow/dead_letter_queue/README.adoc +374 -0
  103. data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
  104. data/examples/workflow/fan_out/README.adoc +381 -0
  105. data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
  106. data/examples/workflow/retry/README.adoc +248 -0
  107. data/examples/workflow/retry/retry_workflow.rb +195 -0
  108. data/examples/workflow/simple_linear/README.adoc +267 -0
  109. data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
  110. data/examples/workflow/simplified/README.adoc +329 -0
  111. data/examples/workflow/simplified/simplified_workflow.rb +222 -0
  112. data/exe/fractor +10 -0
  113. data/lib/fractor/cli.rb +288 -0
  114. data/lib/fractor/configuration.rb +307 -0
  115. data/lib/fractor/continuous_server.rb +60 -65
  116. data/lib/fractor/error_formatter.rb +72 -0
  117. data/lib/fractor/error_report_generator.rb +152 -0
  118. data/lib/fractor/error_reporter.rb +244 -0
  119. data/lib/fractor/error_statistics.rb +147 -0
  120. data/lib/fractor/execution_tracer.rb +162 -0
  121. data/lib/fractor/logger.rb +230 -0
  122. data/lib/fractor/main_loop_handler.rb +406 -0
  123. data/lib/fractor/main_loop_handler3.rb +135 -0
  124. data/lib/fractor/main_loop_handler4.rb +299 -0
  125. data/lib/fractor/performance_metrics_collector.rb +181 -0
  126. data/lib/fractor/performance_monitor.rb +215 -0
  127. data/lib/fractor/performance_report_generator.rb +202 -0
  128. data/lib/fractor/priority_work.rb +93 -0
  129. data/lib/fractor/priority_work_queue.rb +189 -0
  130. data/lib/fractor/result_aggregator.rb +32 -0
  131. data/lib/fractor/shutdown_handler.rb +168 -0
  132. data/lib/fractor/signal_handler.rb +80 -0
  133. data/lib/fractor/supervisor.rb +382 -269
  134. data/lib/fractor/supervisor_logger.rb +88 -0
  135. data/lib/fractor/version.rb +1 -1
  136. data/lib/fractor/work.rb +12 -0
  137. data/lib/fractor/work_distribution_manager.rb +151 -0
  138. data/lib/fractor/work_queue.rb +20 -0
  139. data/lib/fractor/work_result.rb +181 -9
  140. data/lib/fractor/worker.rb +73 -0
  141. data/lib/fractor/workflow/builder.rb +210 -0
  142. data/lib/fractor/workflow/chain_builder.rb +169 -0
  143. data/lib/fractor/workflow/circuit_breaker.rb +183 -0
  144. data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
  145. data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
  146. data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
  147. data/lib/fractor/workflow/execution_hooks.rb +39 -0
  148. data/lib/fractor/workflow/execution_strategy.rb +225 -0
  149. data/lib/fractor/workflow/execution_trace.rb +134 -0
  150. data/lib/fractor/workflow/helpers.rb +191 -0
  151. data/lib/fractor/workflow/job.rb +290 -0
  152. data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
  153. data/lib/fractor/workflow/logger.rb +110 -0
  154. data/lib/fractor/workflow/pre_execution_context.rb +193 -0
  155. data/lib/fractor/workflow/retry_config.rb +156 -0
  156. data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
  157. data/lib/fractor/workflow/retry_strategy.rb +93 -0
  158. data/lib/fractor/workflow/structured_logger.rb +30 -0
  159. data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
  160. data/lib/fractor/workflow/visualizer.rb +211 -0
  161. data/lib/fractor/workflow/workflow_context.rb +132 -0
  162. data/lib/fractor/workflow/workflow_executor.rb +669 -0
  163. data/lib/fractor/workflow/workflow_result.rb +55 -0
  164. data/lib/fractor/workflow/workflow_validator.rb +295 -0
  165. data/lib/fractor/workflow.rb +333 -0
  166. data/lib/fractor/wrapped_ractor.rb +66 -101
  167. data/lib/fractor/wrapped_ractor3.rb +161 -0
  168. data/lib/fractor/wrapped_ractor4.rb +242 -0
  169. data/lib/fractor.rb +92 -4
  170. metadata +179 -6
  171. data/tests/sample.rb.bak +0 -309
  172. data/tests/sample_working.rb.bak +0 -209
@@ -0,0 +1,669 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require_relative "retry_config"
5
+ require_relative "circuit_breaker"
6
+ require_relative "circuit_breaker_registry"
7
+ require_relative "circuit_breaker_orchestrator"
8
+ require_relative "retry_orchestrator"
9
+ require_relative "pre_execution_context"
10
+ require_relative "execution_hooks"
11
+ require_relative "workflow_result"
12
+
13
+ module Fractor
14
+ class Workflow
15
+ # Orchestrates workflow execution by managing job execution order and data flow.
16
+ class WorkflowExecutor
17
+ attr_reader :workflow, :context, :completed_jobs, :failed_jobs,
18
+ :trace, :hooks, :pre_execution_context
19
+
20
+ def initialize(workflow, input, correlation_id: nil, logger: nil,
21
+ trace: false, dead_letter_queue: nil)
22
+ @workflow = workflow
23
+ @correlation_id = correlation_id
24
+ @logger = logger
25
+ @context = WorkflowContext.new(
26
+ input,
27
+ correlation_id: correlation_id,
28
+ logger: logger,
29
+ )
30
+ @completed_jobs = Set.new
31
+ @failed_jobs = Set.new
32
+ @hooks = ExecutionHooks.new
33
+ @trace = trace ? create_trace : nil
34
+ @circuit_breakers = Workflow::CircuitBreakerRegistry.new
35
+ @dead_letter_queue = dead_letter_queue
36
+ @pre_execution_context = PreExecutionContext.new(workflow, input)
37
+ end
38
+
39
+ # Execute the workflow and return the result.
40
+ #
41
+ # @return [WorkflowResult] The execution result
42
+ def execute
43
+ # Run pre-execution validation
44
+ @pre_execution_context.validate!
45
+
46
+ log_workflow_start
47
+ @hooks.trigger(:workflow_start, workflow)
48
+ @trace&.start_job(
49
+ job_name: "workflow",
50
+ worker_class: workflow.class.name,
51
+ )
52
+
53
+ execution_order = compute_execution_order
54
+ start_time = Time.now
55
+
56
+ execution_order.each do |job_group|
57
+ execute_job_group(job_group)
58
+ break if workflow_terminated?
59
+ end
60
+
61
+ end_time = Time.now
62
+ @trace&.complete!
63
+
64
+ log_workflow_complete(end_time - start_time)
65
+ result = build_result(start_time, end_time)
66
+ @hooks.trigger(:workflow_complete, result)
67
+ result
68
+ end
69
+
70
+ # Register a hook for workflow/job lifecycle events
71
+ #
72
+ # @param event [Symbol] The event to hook into
73
+ # @param block [Proc] The callback to execute
74
+ def on(event, &)
75
+ @hooks.register(event, &)
76
+ end
77
+
78
+ # Register a custom pre-execution validation hook.
79
+ # The hook receives the PreExecutionContext and can add errors/warnings.
80
+ #
81
+ # @param name [String, Symbol] Optional name for the validation
82
+ # @yield [context] Block that receives the pre-execution context
83
+ #
84
+ # @example Add custom validation
85
+ # executor.validate_before_execution(:check_api_key) do |ctx|
86
+ # unless ctx.input.api_key
87
+ # ctx.add_error("API key is required")
88
+ # end
89
+ # end
90
+ def validate_before_execution(name = nil, &)
91
+ @pre_execution_context.add_validation_hook(name, &)
92
+ end
93
+
94
+ private
95
+
96
+ def compute_execution_order
97
+ # Topological sort to determine execution order
98
+ # Returns array of arrays (each inner array is a group of parallelizable jobs)
99
+ jobs = workflow.class.jobs
100
+ order = []
101
+ remaining = jobs.keys.to_set
102
+ processed = Set.new
103
+
104
+ until remaining.empty?
105
+ # Find jobs whose dependencies are all satisfied
106
+ ready = remaining.select do |job_name|
107
+ job = jobs[job_name]
108
+ job.dependencies.all? { |dep| processed.include?(dep) }
109
+ end
110
+
111
+ if ready.empty?
112
+ # This should not happen if validation was done correctly
113
+ raise WorkflowExecutionError,
114
+ "Cannot find next jobs to execute. Remaining: #{remaining.to_a.join(', ')}"
115
+ end
116
+
117
+ order << ready
118
+ ready.each do |job_name|
119
+ processed.add(job_name)
120
+ remaining.delete(job_name)
121
+ end
122
+ end
123
+
124
+ puts "Execution order: #{order.inspect}" if ENV["FRACTOR_DEBUG"]
125
+ order
126
+ end
127
+
128
+ def execute_job_group(job_names)
129
+ puts "Executing job group: #{job_names.inspect}" if ENV["FRACTOR_DEBUG"]
130
+ jobs = job_names.map { |name| workflow.class.jobs[name] }
131
+
132
+ # Filter jobs based on conditions
133
+ executable_jobs = jobs.select { |job| job.should_execute?(@context) }
134
+
135
+ # Mark skipped jobs
136
+ (jobs - executable_jobs).each do |job|
137
+ job.state(:skipped)
138
+ puts "Job '#{job.name}' skipped due to condition" if ENV["FRACTOR_DEBUG"]
139
+ end
140
+
141
+ return if executable_jobs.empty?
142
+
143
+ if executable_jobs.size == 1
144
+ # Single job - execute directly
145
+ execute_job(executable_jobs.first)
146
+ else
147
+ # Multiple jobs - execute sequentially (not parallel to avoid Ractor issues)
148
+ puts "Executing #{executable_jobs.size} jobs sequentially" if ENV["FRACTOR_DEBUG"]
149
+ executable_jobs.each do |job|
150
+ execute_job(job)
151
+ end
152
+ end
153
+ end
154
+
155
+ def execute_job(job)
156
+ puts "Executing job: #{job.name}" if ENV["FRACTOR_DEBUG"]
157
+ job.state(:running)
158
+
159
+ # Start job trace
160
+ job_trace = @trace&.start_job(
161
+ job_name: job.name,
162
+ worker_class: job.worker_class.name,
163
+ )
164
+
165
+ # Log and trigger hook
166
+ log_job_start(job)
167
+ @hooks.trigger(:job_start, job, @context)
168
+
169
+ start_time = Time.now
170
+
171
+ begin
172
+ # Execute with retry logic if configured
173
+ output = if job.retry_enabled?
174
+ execute_job_with_retry(job, job_trace)
175
+ else
176
+ execute_job_once(job, job_trace)
177
+ end
178
+
179
+ # Calculate duration
180
+ duration = Time.now - start_time
181
+
182
+ # Store output in context
183
+ @context.store_job_output(job.name, output)
184
+ @completed_jobs.add(job.name)
185
+ job.state(:completed)
186
+
187
+ # Update trace
188
+ job_trace&.complete!(output: output)
189
+
190
+ # Log and trigger hook
191
+ log_job_complete(job, duration)
192
+ @hooks.trigger(:job_complete, job, output, duration)
193
+
194
+ puts "Job '#{job.name}' completed successfully" if ENV["FRACTOR_DEBUG"]
195
+ rescue StandardError => e
196
+ Time.now - start_time
197
+ @failed_jobs.add(job.name)
198
+ job.state(:failed)
199
+
200
+ # Update trace
201
+ job_trace&.fail!(error: e)
202
+
203
+ # Execute error handlers
204
+ job.handle_error(e, @context)
205
+
206
+ # Log and trigger hook
207
+ log_job_error(job, e, has_fallback: !!job.fallback_job)
208
+ @hooks.trigger(:job_error, job, e, @context)
209
+
210
+ puts "Job '#{job.name}' failed: #{e.message}" if ENV["FRACTOR_DEBUG"]
211
+
212
+ # Try fallback job if configured
213
+ if job.fallback_job
214
+ execute_fallback_job(job, e, start_time, job_trace)
215
+ else
216
+ raise WorkflowExecutionError,
217
+ "Job '#{job.name}' failed: #{e.message}\n#{e.backtrace.join("\n")}"
218
+ end
219
+ end
220
+ end
221
+
222
+ def execute_job_once(job, job_trace)
223
+ # Build input for this job
224
+ job_input = @context.build_job_input(job)
225
+ job_trace&.set_input(job_input)
226
+
227
+ # Create work item - if job_input is already a Work object, use it directly
228
+ # to avoid double-wrapping (e.g., when using custom Work subclasses)
229
+ work = if job_input.is_a?(Work)
230
+ job_input
231
+ else
232
+ Work.new(job_input)
233
+ end
234
+
235
+ # Execute with circuit breaker if configured
236
+ if job.circuit_breaker_enabled?
237
+ execute_with_circuit_breaker(job, work)
238
+ else
239
+ execute_job_with_supervisor(job, work)
240
+ end
241
+ end
242
+
243
+ def execute_job_with_retry(job, job_trace)
244
+ retry_config = job.retry_config
245
+
246
+ # Create retry orchestrator with the job's retry configuration
247
+ orchestrator = RetryOrchestrator.new(retry_config,
248
+ debug: ENV["FRACTOR_DEBUG"] == "1")
249
+
250
+ # Execute with retry logic
251
+ orchestrator.execute_with_retry(job) do |j|
252
+ execute_job_once(j, job_trace)
253
+ end
254
+ rescue StandardError => e
255
+ # Get retry state for DLQ entry
256
+ retry_state = orchestrator.state
257
+ add_to_dead_letter_queue(job, e, retry_state)
258
+ raise e
259
+ end
260
+
261
+ def execute_fallback_job(job, error, start_time, job_trace)
262
+ fallback_job_name = job.fallback_job
263
+ fallback_job = workflow.class.jobs[fallback_job_name]
264
+
265
+ unless fallback_job
266
+ raise WorkflowExecutionError,
267
+ "Fallback job '#{fallback_job_name}' not found for job '#{job.name}'"
268
+ end
269
+
270
+ log_fallback_execution(job, fallback_job, error)
271
+
272
+ begin
273
+ # Execute fallback job
274
+ execute_job(fallback_job)
275
+
276
+ # Use fallback job's output
277
+ output = @context.job_output(fallback_job_name)
278
+ duration = Time.now - start_time
279
+
280
+ # Store output under original job name as well
281
+ @context.store_job_output(job.name, output)
282
+ @completed_jobs.add(job.name)
283
+ job.state(:completed)
284
+
285
+ # Update trace
286
+ job_trace&.complete!(output: output)
287
+
288
+ log_job_complete(job, duration)
289
+ @hooks.trigger(:job_complete, job, output, duration)
290
+ rescue StandardError => e
291
+ log_fallback_failed(job, fallback_job, e)
292
+ raise WorkflowExecutionError,
293
+ "Job '#{job.name}' and fallback '#{fallback_job_name}' both failed"
294
+ end
295
+ end
296
+
297
+ def execute_jobs_parallel(jobs)
298
+ puts "Executing #{jobs.size} jobs in parallel: #{jobs.map(&:name).join(', ')}" if ENV["FRACTOR_DEBUG"]
299
+
300
+ # Create supervisors for each job
301
+ supervisors = jobs.map do |job|
302
+ job.state(:running)
303
+ job_input = @context.build_job_input(job)
304
+ work = Work.new(job_input)
305
+
306
+ supervisor = Supervisor.new(
307
+ worker_pools: [
308
+ {
309
+ worker_class: job.worker_class,
310
+ num_workers: job.num_workers || 1,
311
+ },
312
+ ],
313
+ )
314
+ supervisor.add_work_item(work)
315
+
316
+ { job: job, supervisor: supervisor }
317
+ end
318
+
319
+ # Run all supervisors in parallel using threads
320
+ threads = supervisors.map do |spec|
321
+ Thread.new do
322
+ spec[:supervisor].run
323
+ { job: spec[:job], success: true, supervisor: spec[:supervisor] }
324
+ rescue StandardError => e
325
+ { job: spec[:job], success: false, error: e }
326
+ end
327
+ end
328
+
329
+ # Wait for all to complete and process results
330
+ threads.each do |thread|
331
+ result = thread.value
332
+ job = result[:job]
333
+
334
+ if result[:success]
335
+ # Extract output from supervisor results
336
+ job_results = result[:supervisor].results.results
337
+ if job_results.empty?
338
+ raise WorkflowExecutionError,
339
+ "Job '#{job.name}' produced no results"
340
+ end
341
+
342
+ output = job_results.first.result
343
+ @context.store_job_output(job.name, output)
344
+ @completed_jobs.add(job.name)
345
+ job.state(:completed)
346
+
347
+ puts "Job '#{job.name}' completed successfully" if ENV["FRACTOR_DEBUG"]
348
+ else
349
+ @failed_jobs.add(job.name)
350
+ job.state(:failed)
351
+ error = result[:error]
352
+ puts "Job '#{job.name}' failed: #{error.message}" if ENV["FRACTOR_DEBUG"]
353
+ raise WorkflowExecutionError,
354
+ "Job '#{job.name}' failed: #{error.message}"
355
+ end
356
+ end
357
+ end
358
+
359
+ def execute_job_with_supervisor(job, work)
360
+ supervisor = Supervisor.new(
361
+ worker_pools: [
362
+ {
363
+ worker_class: job.worker_class,
364
+ num_workers: job.num_workers || 1,
365
+ },
366
+ ],
367
+ )
368
+
369
+ supervisor.add_work_item(work)
370
+ supervisor.run
371
+
372
+ # Check for errors first (before checking results)
373
+ unless supervisor.results.errors.empty?
374
+ error = supervisor.results.errors.first
375
+ raise WorkflowExecutionError,
376
+ "Job '#{job.name}' encountered error: #{error.error}"
377
+ end
378
+
379
+ # Get the result
380
+ results = supervisor.results.results
381
+ if results.empty?
382
+ raise WorkflowExecutionError, "Job '#{job.name}' produced no results"
383
+ end
384
+
385
+ results.first.result
386
+ end
387
+
388
+ def workflow_terminated?
389
+ # Check if any terminating job has completed
390
+ workflow.class.jobs.each do |name, job|
391
+ return true if job.terminates && @completed_jobs.include?(name)
392
+ end
393
+ false
394
+ end
395
+
396
+ def create_trace
397
+ require "securerandom"
398
+ execution_id = "exec-#{SecureRandom.hex(8)}"
399
+ ExecutionTrace.new(
400
+ workflow_name: workflow.class.workflow_name,
401
+ execution_id: execution_id,
402
+ correlation_id: @context.correlation_id,
403
+ )
404
+ end
405
+
406
+ def log_workflow_start
407
+ return unless @context.logger
408
+
409
+ @context.logger.info(
410
+ "Workflow starting",
411
+ workflow: workflow.class.workflow_name,
412
+ correlation_id: @context.correlation_id,
413
+ )
414
+ end
415
+
416
+ def log_workflow_complete(duration)
417
+ return unless @context.logger
418
+
419
+ @context.logger.info(
420
+ "Workflow complete",
421
+ workflow: workflow.class.workflow_name,
422
+ duration_ms: (duration * 1000).round(2),
423
+ jobs_completed: @completed_jobs.size,
424
+ jobs_failed: @failed_jobs.size,
425
+ )
426
+ end
427
+
428
+ def log_job_start(job)
429
+ return unless @context.logger
430
+
431
+ @context.logger.info(
432
+ "Job starting",
433
+ job: job.name,
434
+ worker: job.worker_class.name,
435
+ )
436
+ end
437
+
438
+ def log_job_complete(job, duration)
439
+ return unless @context.logger
440
+
441
+ @context.logger.info(
442
+ "Job complete",
443
+ job: job.name,
444
+ duration_ms: (duration * 1000).round(2),
445
+ )
446
+ end
447
+
448
+ def log_job_error(job, error, has_fallback: false)
449
+ return unless @context.logger
450
+
451
+ # Log at WARN level if fallback is available (error is handled),
452
+ # otherwise log at ERROR level (error causes workflow failure)
453
+ log_method = has_fallback ? @context.logger.method(:warn) : @context.logger.method(:error)
454
+
455
+ log_method.call(
456
+ "Job '#{job.name}' encountered error: #{error}",
457
+ job: job.name,
458
+ error: error.class.name,
459
+ )
460
+ end
461
+
462
+ def log_retry_attempt(job, retry_state, delay)
463
+ return unless @context.logger
464
+
465
+ @context.logger.warn(
466
+ "Job retry attempt",
467
+ job: job.name,
468
+ attempt: retry_state.attempt,
469
+ max_attempts: job.retry_config.max_attempts,
470
+ delay_seconds: delay,
471
+ last_error: retry_state.last_error&.message,
472
+ )
473
+ end
474
+
475
+ def log_retry_success(job, retry_state)
476
+ return unless @context.logger
477
+
478
+ @context.logger.info(
479
+ "Job retry succeeded",
480
+ job: job.name,
481
+ successful_attempt: retry_state.attempt,
482
+ total_attempts: retry_state.attempt,
483
+ total_time: retry_state.total_time,
484
+ )
485
+ end
486
+
487
+ def log_retry_exhausted(job, retry_state)
488
+ return unless @context.logger
489
+
490
+ @context.logger.error(
491
+ "Job retry attempts exhausted",
492
+ job: job.name,
493
+ total_attempts: retry_state.attempt - 1,
494
+ total_time: retry_state.total_time,
495
+ errors: retry_state.summary[:errors],
496
+ )
497
+ end
498
+
499
+ def log_fallback_execution(job, fallback_job, error)
500
+ return unless @context.logger
501
+
502
+ @context.logger.warn(
503
+ "Executing fallback job",
504
+ job: job.name,
505
+ fallback_job: fallback_job.name,
506
+ original_error: error.message,
507
+ )
508
+ end
509
+
510
+ def log_fallback_failed(job, fallback_job, error)
511
+ return unless @context.logger
512
+
513
+ @context.logger.error(
514
+ "Fallback job failed",
515
+ job: job.name,
516
+ fallback_job: fallback_job.name,
517
+ error: error.message,
518
+ )
519
+ end
520
+
521
+ def execute_with_circuit_breaker(job, work)
522
+ breaker_key = job.circuit_breaker_key
523
+
524
+ # Get or create circuit breaker orchestrator for this job
525
+ orchestrator = @circuit_breakers.get_or_create_orchestrator(
526
+ breaker_key,
527
+ **job.circuit_breaker_config.slice(:threshold, :timeout,
528
+ :half_open_calls),
529
+ job_name: job.name,
530
+ debug: ENV["FRACTOR_DEBUG"] == "1",
531
+ )
532
+
533
+ # Log circuit state before execution
534
+ log_circuit_breaker_state(job, orchestrator)
535
+
536
+ begin
537
+ orchestrator.execute_with_breaker(job) do
538
+ execute_job_with_supervisor(job, work)
539
+ end
540
+ rescue Workflow::CircuitOpenError => e
541
+ log_circuit_breaker_open(job, orchestrator)
542
+ raise WorkflowExecutionError,
543
+ "Circuit breaker open for job '#{job.name}': #{e.message}"
544
+ end
545
+ end
546
+
547
+ def log_circuit_breaker_state(job, breaker)
548
+ return unless @context.logger
549
+ return if breaker.closed?
550
+
551
+ @context.logger.warn(
552
+ "Circuit breaker state",
553
+ job: job.name,
554
+ state: breaker.state,
555
+ failure_count: breaker.failure_count,
556
+ threshold: breaker.threshold,
557
+ )
558
+ end
559
+
560
+ def log_circuit_breaker_open(job, breaker)
561
+ return unless @context.logger
562
+
563
+ @context.logger.error(
564
+ "Circuit breaker open",
565
+ job: job.name,
566
+ failure_count: breaker.failure_count,
567
+ threshold: breaker.threshold,
568
+ last_failure: breaker.last_failure_time,
569
+ )
570
+ end
571
+
572
+ def add_to_dead_letter_queue(job, error, retry_state = nil)
573
+ return unless @dead_letter_queue
574
+
575
+ # Build job input for DLQ entry
576
+ job_input = @context.build_job_input(job)
577
+ work = Work.new(job_input)
578
+
579
+ # Add metadata about the failure
580
+ metadata = {
581
+ job_name: job.name,
582
+ worker_class: job.worker_class.name,
583
+ correlation_id: @context.correlation_id,
584
+ workflow_name: @workflow.class.workflow_name,
585
+ }
586
+
587
+ # Add retry information if available
588
+ if retry_state
589
+ # Handle both RetryState object and Hash from orchestrator
590
+ if retry_state.is_a?(Hash)
591
+ # From RetryOrchestrator.state
592
+ metadata[:retry_attempts] = retry_state[:attempts] - 1
593
+ metadata[:max_attempts] = retry_state[:max_attempts]
594
+ metadata[:last_error] = retry_state[:last_error]
595
+ metadata[:total_retry_time] = retry_state[:total_time]
596
+ metadata[:all_errors] = retry_state[:all_errors]
597
+ else
598
+ # From RetryState object
599
+ metadata[:retry_attempts] = retry_state.attempt - 1
600
+ metadata[:total_retry_time] = retry_state.total_time
601
+ metadata[:all_errors] = retry_state.summary[:errors]
602
+ end
603
+ end
604
+
605
+ # Add context from workflow
606
+ context = {
607
+ workflow_input: @context.workflow_input,
608
+ completed_jobs: @completed_jobs.to_a,
609
+ failed_jobs: @failed_jobs.to_a,
610
+ }
611
+
612
+ @dead_letter_queue.add(work, error, context: context,
613
+ metadata: metadata)
614
+
615
+ log_added_to_dlq(job, error) if @context.logger
616
+ end
617
+
618
+ def log_added_to_dlq(job, error)
619
+ @context.logger.warn(
620
+ "Work added to Dead Letter Queue",
621
+ job: job.name,
622
+ error: error.class.name,
623
+ message: error.message,
624
+ dlq_size: @dead_letter_queue.size,
625
+ )
626
+ end
627
+
628
+ def build_result(start_time, end_time)
629
+ # Find the output from the end job
630
+ output = find_workflow_output
631
+
632
+ WorkflowResult.new(
633
+ workflow_name: workflow.class.workflow_name,
634
+ output: output,
635
+ completed_jobs: @completed_jobs.to_a,
636
+ failed_jobs: @failed_jobs.to_a,
637
+ execution_time: end_time - start_time,
638
+ success: @failed_jobs.empty?,
639
+ trace: @trace,
640
+ correlation_id: @context.correlation_id,
641
+ )
642
+ end
643
+
644
+ def find_workflow_output
645
+ # Look for jobs that map to workflow output
646
+ workflow.class.jobs.each do |name, job|
647
+ if job.outputs_to_workflow? && @completed_jobs.include?(name)
648
+ output = @context.job_output(name)
649
+ puts "Found workflow output from job '#{name}': #{output.class}" if ENV["FRACTOR_DEBUG"]
650
+ return output
651
+ end
652
+ end
653
+
654
+ # Fallback: return output from the first end job that completed
655
+ workflow.class.end_job_names.each do |end_job_spec|
656
+ job_name = end_job_spec[:name]
657
+ if @completed_jobs.include?(job_name)
658
+ output = @context.job_output(job_name)
659
+ puts "Using end job '#{job_name}' output: #{output.class}" if ENV["FRACTOR_DEBUG"]
660
+ return output
661
+ end
662
+ end
663
+
664
+ puts "Warning: No workflow output found!" if ENV["FRACTOR_DEBUG"]
665
+ nil
666
+ end
667
+ end
668
+ end
669
+ end