pyworkflow-engine 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. dashboard/backend/app/__init__.py +1 -0
  2. dashboard/backend/app/config.py +32 -0
  3. dashboard/backend/app/controllers/__init__.py +6 -0
  4. dashboard/backend/app/controllers/run_controller.py +86 -0
  5. dashboard/backend/app/controllers/workflow_controller.py +33 -0
  6. dashboard/backend/app/dependencies/__init__.py +5 -0
  7. dashboard/backend/app/dependencies/storage.py +50 -0
  8. dashboard/backend/app/repositories/__init__.py +6 -0
  9. dashboard/backend/app/repositories/run_repository.py +80 -0
  10. dashboard/backend/app/repositories/workflow_repository.py +27 -0
  11. dashboard/backend/app/rest/__init__.py +8 -0
  12. dashboard/backend/app/rest/v1/__init__.py +12 -0
  13. dashboard/backend/app/rest/v1/health.py +33 -0
  14. dashboard/backend/app/rest/v1/runs.py +133 -0
  15. dashboard/backend/app/rest/v1/workflows.py +41 -0
  16. dashboard/backend/app/schemas/__init__.py +23 -0
  17. dashboard/backend/app/schemas/common.py +16 -0
  18. dashboard/backend/app/schemas/event.py +24 -0
  19. dashboard/backend/app/schemas/hook.py +25 -0
  20. dashboard/backend/app/schemas/run.py +54 -0
  21. dashboard/backend/app/schemas/step.py +28 -0
  22. dashboard/backend/app/schemas/workflow.py +31 -0
  23. dashboard/backend/app/server.py +87 -0
  24. dashboard/backend/app/services/__init__.py +6 -0
  25. dashboard/backend/app/services/run_service.py +240 -0
  26. dashboard/backend/app/services/workflow_service.py +155 -0
  27. dashboard/backend/main.py +18 -0
  28. docs/concepts/cancellation.mdx +362 -0
  29. docs/concepts/continue-as-new.mdx +434 -0
  30. docs/concepts/events.mdx +266 -0
  31. docs/concepts/fault-tolerance.mdx +370 -0
  32. docs/concepts/hooks.mdx +552 -0
  33. docs/concepts/limitations.mdx +167 -0
  34. docs/concepts/schedules.mdx +775 -0
  35. docs/concepts/sleep.mdx +312 -0
  36. docs/concepts/steps.mdx +301 -0
  37. docs/concepts/workflows.mdx +255 -0
  38. docs/guides/cli.mdx +942 -0
  39. docs/guides/configuration.mdx +560 -0
  40. docs/introduction.mdx +155 -0
  41. docs/quickstart.mdx +279 -0
  42. examples/__init__.py +1 -0
  43. examples/celery/__init__.py +1 -0
  44. examples/celery/durable/docker-compose.yml +55 -0
  45. examples/celery/durable/pyworkflow.config.yaml +12 -0
  46. examples/celery/durable/workflows/__init__.py +122 -0
  47. examples/celery/durable/workflows/basic.py +87 -0
  48. examples/celery/durable/workflows/batch_processing.py +102 -0
  49. examples/celery/durable/workflows/cancellation.py +273 -0
  50. examples/celery/durable/workflows/child_workflow_patterns.py +240 -0
  51. examples/celery/durable/workflows/child_workflows.py +202 -0
  52. examples/celery/durable/workflows/continue_as_new.py +260 -0
  53. examples/celery/durable/workflows/fault_tolerance.py +210 -0
  54. examples/celery/durable/workflows/hooks.py +211 -0
  55. examples/celery/durable/workflows/idempotency.py +112 -0
  56. examples/celery/durable/workflows/long_running.py +99 -0
  57. examples/celery/durable/workflows/retries.py +101 -0
  58. examples/celery/durable/workflows/schedules.py +209 -0
  59. examples/celery/transient/01_basic_workflow.py +91 -0
  60. examples/celery/transient/02_fault_tolerance.py +257 -0
  61. examples/celery/transient/__init__.py +20 -0
  62. examples/celery/transient/pyworkflow.config.yaml +25 -0
  63. examples/local/__init__.py +1 -0
  64. examples/local/durable/01_basic_workflow.py +94 -0
  65. examples/local/durable/02_file_storage.py +132 -0
  66. examples/local/durable/03_retries.py +169 -0
  67. examples/local/durable/04_long_running.py +119 -0
  68. examples/local/durable/05_event_log.py +145 -0
  69. examples/local/durable/06_idempotency.py +148 -0
  70. examples/local/durable/07_hooks.py +334 -0
  71. examples/local/durable/08_cancellation.py +233 -0
  72. examples/local/durable/09_child_workflows.py +198 -0
  73. examples/local/durable/10_child_workflow_patterns.py +265 -0
  74. examples/local/durable/11_continue_as_new.py +249 -0
  75. examples/local/durable/12_schedules.py +198 -0
  76. examples/local/durable/__init__.py +1 -0
  77. examples/local/transient/01_quick_tasks.py +87 -0
  78. examples/local/transient/02_retries.py +130 -0
  79. examples/local/transient/03_sleep.py +141 -0
  80. examples/local/transient/__init__.py +1 -0
  81. pyworkflow/__init__.py +256 -0
  82. pyworkflow/aws/__init__.py +68 -0
  83. pyworkflow/aws/context.py +234 -0
  84. pyworkflow/aws/handler.py +184 -0
  85. pyworkflow/aws/testing.py +310 -0
  86. pyworkflow/celery/__init__.py +41 -0
  87. pyworkflow/celery/app.py +198 -0
  88. pyworkflow/celery/scheduler.py +315 -0
  89. pyworkflow/celery/tasks.py +1746 -0
  90. pyworkflow/cli/__init__.py +132 -0
  91. pyworkflow/cli/__main__.py +6 -0
  92. pyworkflow/cli/commands/__init__.py +1 -0
  93. pyworkflow/cli/commands/hooks.py +640 -0
  94. pyworkflow/cli/commands/quickstart.py +495 -0
  95. pyworkflow/cli/commands/runs.py +773 -0
  96. pyworkflow/cli/commands/scheduler.py +130 -0
  97. pyworkflow/cli/commands/schedules.py +794 -0
  98. pyworkflow/cli/commands/setup.py +703 -0
  99. pyworkflow/cli/commands/worker.py +413 -0
  100. pyworkflow/cli/commands/workflows.py +1257 -0
  101. pyworkflow/cli/output/__init__.py +1 -0
  102. pyworkflow/cli/output/formatters.py +321 -0
  103. pyworkflow/cli/output/styles.py +121 -0
  104. pyworkflow/cli/utils/__init__.py +1 -0
  105. pyworkflow/cli/utils/async_helpers.py +30 -0
  106. pyworkflow/cli/utils/config.py +130 -0
  107. pyworkflow/cli/utils/config_generator.py +344 -0
  108. pyworkflow/cli/utils/discovery.py +53 -0
  109. pyworkflow/cli/utils/docker_manager.py +651 -0
  110. pyworkflow/cli/utils/interactive.py +364 -0
  111. pyworkflow/cli/utils/storage.py +115 -0
  112. pyworkflow/config.py +329 -0
  113. pyworkflow/context/__init__.py +63 -0
  114. pyworkflow/context/aws.py +230 -0
  115. pyworkflow/context/base.py +416 -0
  116. pyworkflow/context/local.py +930 -0
  117. pyworkflow/context/mock.py +381 -0
  118. pyworkflow/core/__init__.py +0 -0
  119. pyworkflow/core/exceptions.py +353 -0
  120. pyworkflow/core/registry.py +313 -0
  121. pyworkflow/core/scheduled.py +328 -0
  122. pyworkflow/core/step.py +494 -0
  123. pyworkflow/core/workflow.py +294 -0
  124. pyworkflow/discovery.py +248 -0
  125. pyworkflow/engine/__init__.py +0 -0
  126. pyworkflow/engine/events.py +879 -0
  127. pyworkflow/engine/executor.py +682 -0
  128. pyworkflow/engine/replay.py +273 -0
  129. pyworkflow/observability/__init__.py +19 -0
  130. pyworkflow/observability/logging.py +234 -0
  131. pyworkflow/primitives/__init__.py +33 -0
  132. pyworkflow/primitives/child_handle.py +174 -0
  133. pyworkflow/primitives/child_workflow.py +372 -0
  134. pyworkflow/primitives/continue_as_new.py +101 -0
  135. pyworkflow/primitives/define_hook.py +150 -0
  136. pyworkflow/primitives/hooks.py +97 -0
  137. pyworkflow/primitives/resume_hook.py +210 -0
  138. pyworkflow/primitives/schedule.py +545 -0
  139. pyworkflow/primitives/shield.py +96 -0
  140. pyworkflow/primitives/sleep.py +100 -0
  141. pyworkflow/runtime/__init__.py +21 -0
  142. pyworkflow/runtime/base.py +179 -0
  143. pyworkflow/runtime/celery.py +310 -0
  144. pyworkflow/runtime/factory.py +101 -0
  145. pyworkflow/runtime/local.py +706 -0
  146. pyworkflow/scheduler/__init__.py +9 -0
  147. pyworkflow/scheduler/local.py +248 -0
  148. pyworkflow/serialization/__init__.py +0 -0
  149. pyworkflow/serialization/decoder.py +146 -0
  150. pyworkflow/serialization/encoder.py +162 -0
  151. pyworkflow/storage/__init__.py +54 -0
  152. pyworkflow/storage/base.py +612 -0
  153. pyworkflow/storage/config.py +185 -0
  154. pyworkflow/storage/dynamodb.py +1315 -0
  155. pyworkflow/storage/file.py +827 -0
  156. pyworkflow/storage/memory.py +549 -0
  157. pyworkflow/storage/postgres.py +1161 -0
  158. pyworkflow/storage/schemas.py +486 -0
  159. pyworkflow/storage/sqlite.py +1136 -0
  160. pyworkflow/utils/__init__.py +0 -0
  161. pyworkflow/utils/duration.py +177 -0
  162. pyworkflow/utils/schedule.py +391 -0
  163. pyworkflow_engine-0.1.7.dist-info/METADATA +687 -0
  164. pyworkflow_engine-0.1.7.dist-info/RECORD +196 -0
  165. pyworkflow_engine-0.1.7.dist-info/WHEEL +5 -0
  166. pyworkflow_engine-0.1.7.dist-info/entry_points.txt +2 -0
  167. pyworkflow_engine-0.1.7.dist-info/licenses/LICENSE +21 -0
  168. pyworkflow_engine-0.1.7.dist-info/top_level.txt +5 -0
  169. tests/examples/__init__.py +0 -0
  170. tests/integration/__init__.py +0 -0
  171. tests/integration/test_cancellation.py +330 -0
  172. tests/integration/test_child_workflows.py +439 -0
  173. tests/integration/test_continue_as_new.py +428 -0
  174. tests/integration/test_dynamodb_storage.py +1146 -0
  175. tests/integration/test_fault_tolerance.py +369 -0
  176. tests/integration/test_schedule_storage.py +484 -0
  177. tests/unit/__init__.py +0 -0
  178. tests/unit/backends/__init__.py +1 -0
  179. tests/unit/backends/test_dynamodb_storage.py +1554 -0
  180. tests/unit/backends/test_postgres_storage.py +1281 -0
  181. tests/unit/backends/test_sqlite_storage.py +1460 -0
  182. tests/unit/conftest.py +41 -0
  183. tests/unit/test_cancellation.py +364 -0
  184. tests/unit/test_child_workflows.py +680 -0
  185. tests/unit/test_continue_as_new.py +441 -0
  186. tests/unit/test_event_limits.py +316 -0
  187. tests/unit/test_executor.py +320 -0
  188. tests/unit/test_fault_tolerance.py +334 -0
  189. tests/unit/test_hooks.py +495 -0
  190. tests/unit/test_registry.py +261 -0
  191. tests/unit/test_replay.py +420 -0
  192. tests/unit/test_schedule_schemas.py +285 -0
  193. tests/unit/test_schedule_utils.py +286 -0
  194. tests/unit/test_scheduled_workflow.py +274 -0
  195. tests/unit/test_step.py +353 -0
  196. tests/unit/test_workflow.py +243 -0
@@ -0,0 +1,1746 @@
1
+ """
2
+ Celery tasks for distributed workflow and step execution.
3
+
4
+ These tasks enable:
5
+ - Distributed step execution across workers
6
+ - Automatic retry with exponential backoff
7
+ - Scheduled sleep resumption
8
+ - Workflow orchestration
9
+ - Fault tolerance with automatic recovery on worker failures
10
+ """
11
+
12
+ import asyncio
13
+ import uuid
14
+ from collections.abc import Callable
15
+ from datetime import UTC, datetime
16
+ from typing import Any
17
+
18
+ from celery import Task
19
+ from celery.exceptions import WorkerLostError
20
+ from loguru import logger
21
+
22
+ from pyworkflow.celery.app import celery_app
23
+ from pyworkflow.core.exceptions import (
24
+ CancellationError,
25
+ ContinueAsNewSignal,
26
+ FatalError,
27
+ RetryableError,
28
+ SuspensionSignal,
29
+ )
30
+ from pyworkflow.core.registry import WorkflowMetadata, get_workflow
31
+ from pyworkflow.core.workflow import execute_workflow_with_context
32
+ from pyworkflow.engine.events import (
33
+ EventType,
34
+ create_child_workflow_cancelled_event,
35
+ create_workflow_cancelled_event,
36
+ create_workflow_continued_as_new_event,
37
+ create_workflow_interrupted_event,
38
+ create_workflow_started_event,
39
+ )
40
+ from pyworkflow.serialization.decoder import deserialize_args, deserialize_kwargs
41
+ from pyworkflow.serialization.encoder import serialize_args, serialize_kwargs
42
+ from pyworkflow.storage.base import StorageBackend
43
+ from pyworkflow.storage.schemas import RunStatus, WorkflowRun
44
+
45
+
46
+ class WorkflowTask(Task):
47
+ """Base task class for workflow execution with custom error handling."""
48
+
49
+ autoretry_for = (RetryableError,)
50
+ retry_kwargs = {"max_retries": 3}
51
+ retry_backoff = True
52
+ retry_backoff_max = 600
53
+ retry_jitter = True
54
+
55
+ def on_failure(self, exc, task_id, args, kwargs, einfo):
56
+ """
57
+ Handle task failure.
58
+
59
+ Detects worker loss and handles recovery appropriately:
60
+ - WorkerLostError: Infrastructure failure, may trigger recovery
61
+ - Other exceptions: Application failure
62
+ """
63
+ is_worker_loss = isinstance(exc, WorkerLostError)
64
+
65
+ if is_worker_loss:
66
+ logger.warning(
67
+ f"Task {self.name} interrupted due to worker loss",
68
+ task_id=task_id,
69
+ error=str(exc),
70
+ )
71
+ # Note: Recovery is handled when the task is requeued and picked up
72
+ # by another worker. See _handle_workflow_recovery() for logic.
73
+ else:
74
+ logger.error(
75
+ f"Task {self.name} failed",
76
+ task_id=task_id,
77
+ error=str(exc),
78
+ traceback=einfo.traceback if einfo else None,
79
+ )
80
+
81
+ def on_retry(self, exc, task_id, args, kwargs, einfo):
82
+ """Handle task retry."""
83
+ logger.warning(
84
+ f"Task {self.name} retrying",
85
+ task_id=task_id,
86
+ error=str(exc),
87
+ retry_count=self.request.retries,
88
+ )
89
+
90
+
91
+ @celery_app.task(
92
+ name="pyworkflow.execute_step",
93
+ base=WorkflowTask,
94
+ bind=True,
95
+ queue="pyworkflow.steps",
96
+ )
97
+ def execute_step_task(
98
+ self: WorkflowTask,
99
+ step_name: str,
100
+ args_json: str,
101
+ kwargs_json: str,
102
+ run_id: str,
103
+ step_id: str,
104
+ max_retries: int = 3,
105
+ storage_config: dict[str, Any] | None = None,
106
+ ) -> Any:
107
+ """
108
+ Execute a workflow step in a Celery worker.
109
+
110
+ This task runs a single step and handles retries automatically.
111
+
112
+ Args:
113
+ step_name: Name of the step function
114
+ args_json: Serialized positional arguments
115
+ kwargs_json: Serialized keyword arguments
116
+ run_id: Workflow run ID
117
+ step_id: Step execution ID
118
+ max_retries: Maximum retry attempts
119
+ storage_config: Storage backend configuration
120
+
121
+ Returns:
122
+ Step result (serialized)
123
+
124
+ Raises:
125
+ FatalError: For non-retriable errors
126
+ RetryableError: For retriable errors (triggers automatic retry)
127
+ """
128
+ from pyworkflow.core.registry import _registry
129
+
130
+ logger.info(
131
+ f"Executing step: {step_name}",
132
+ run_id=run_id,
133
+ step_id=step_id,
134
+ attempt=self.request.retries + 1,
135
+ )
136
+
137
+ # Get step metadata
138
+ step_meta = _registry.get_step(step_name)
139
+ if not step_meta:
140
+ raise FatalError(f"Step '{step_name}' not found in registry")
141
+
142
+ # Deserialize arguments
143
+ args = deserialize_args(args_json)
144
+ kwargs = deserialize_kwargs(kwargs_json)
145
+
146
+ # Execute step function
147
+ try:
148
+ # Get the original function (unwrapped from decorator)
149
+ step_func = step_meta.original_func
150
+
151
+ # Execute the step
152
+ if asyncio.iscoroutinefunction(step_func):
153
+ result = asyncio.run(step_func(*args, **kwargs))
154
+ else:
155
+ result = step_func(*args, **kwargs)
156
+
157
+ logger.info(
158
+ f"Step completed: {step_name}",
159
+ run_id=run_id,
160
+ step_id=step_id,
161
+ )
162
+
163
+ return result
164
+
165
+ except FatalError:
166
+ logger.error(f"Step failed (fatal): {step_name}", run_id=run_id, step_id=step_id)
167
+ raise
168
+
169
+ except RetryableError as e:
170
+ logger.warning(
171
+ f"Step failed (retriable): {step_name}",
172
+ run_id=run_id,
173
+ step_id=step_id,
174
+ retry_after=e.retry_after,
175
+ )
176
+ # Let Celery handle the retry
177
+ raise self.retry(exc=e, countdown=e.get_retry_delay_seconds() or 60)
178
+
179
+ except Exception as e:
180
+ logger.error(
181
+ f"Step failed (unexpected): {step_name}",
182
+ run_id=run_id,
183
+ step_id=step_id,
184
+ error=str(e),
185
+ exc_info=True,
186
+ )
187
+ # Treat unexpected errors as retriable
188
+ raise self.retry(exc=RetryableError(str(e)), countdown=60)
189
+
190
+
191
+ @celery_app.task(
192
+ name="pyworkflow.start_workflow",
193
+ queue="pyworkflow.workflows",
194
+ )
195
+ def start_workflow_task(
196
+ workflow_name: str,
197
+ args_json: str,
198
+ kwargs_json: str,
199
+ run_id: str,
200
+ storage_config: dict[str, Any] | None = None,
201
+ idempotency_key: str | None = None,
202
+ ) -> str:
203
+ """
204
+ Start a workflow execution.
205
+
206
+ This task executes on Celery workers and runs the workflow directly.
207
+
208
+ Args:
209
+ workflow_name: Name of the workflow
210
+ args_json: Serialized positional arguments
211
+ kwargs_json: Serialized keyword arguments
212
+ run_id: Workflow run ID (generated by the caller)
213
+ storage_config: Storage backend configuration
214
+ idempotency_key: Optional idempotency key
215
+
216
+ Returns:
217
+ Workflow run ID
218
+ """
219
+ logger.info(f"Starting workflow on worker: {workflow_name}", run_id=run_id)
220
+
221
+ # Get workflow metadata
222
+ workflow_meta = get_workflow(workflow_name)
223
+ if not workflow_meta:
224
+ raise ValueError(f"Workflow '{workflow_name}' not found in registry")
225
+
226
+ # Deserialize arguments
227
+ args = deserialize_args(args_json)
228
+ kwargs = deserialize_kwargs(kwargs_json)
229
+
230
+ # Get storage backend
231
+ storage = _get_storage_backend(storage_config)
232
+
233
+ # Execute workflow directly on worker
234
+ result_run_id = asyncio.run(
235
+ _start_workflow_on_worker(
236
+ workflow_meta=workflow_meta,
237
+ args=args,
238
+ kwargs=kwargs,
239
+ storage=storage,
240
+ storage_config=storage_config,
241
+ idempotency_key=idempotency_key,
242
+ run_id=run_id,
243
+ )
244
+ )
245
+
246
+ logger.info(f"Workflow execution initiated: {workflow_name}", run_id=result_run_id)
247
+ return result_run_id
248
+
249
+
250
+ @celery_app.task(
251
+ name="pyworkflow.start_child_workflow",
252
+ queue="pyworkflow.workflows",
253
+ )
254
+ def start_child_workflow_task(
255
+ workflow_name: str,
256
+ args_json: str,
257
+ kwargs_json: str,
258
+ child_run_id: str,
259
+ storage_config: dict[str, Any] | None,
260
+ parent_run_id: str,
261
+ child_id: str,
262
+ wait_for_completion: bool,
263
+ ) -> str:
264
+ """
265
+ Start a child workflow execution on Celery worker.
266
+
267
+ This task executes child workflows and handles parent notification
268
+ when the child completes or fails.
269
+
270
+ Args:
271
+ workflow_name: Name of the child workflow
272
+ args_json: Serialized positional arguments
273
+ kwargs_json: Serialized keyword arguments
274
+ child_run_id: Child workflow run ID (already created by parent)
275
+ storage_config: Storage backend configuration
276
+ parent_run_id: Parent workflow run ID
277
+ child_id: Deterministic child ID for replay
278
+ wait_for_completion: Whether parent is waiting for child
279
+
280
+ Returns:
281
+ Child workflow run ID
282
+ """
283
+ logger.info(
284
+ f"Starting child workflow on worker: {workflow_name}",
285
+ child_run_id=child_run_id,
286
+ parent_run_id=parent_run_id,
287
+ )
288
+
289
+ # Get workflow metadata
290
+ workflow_meta = get_workflow(workflow_name)
291
+ if not workflow_meta:
292
+ raise ValueError(f"Workflow '{workflow_name}' not found in registry")
293
+
294
+ # Deserialize arguments
295
+ args = deserialize_args(args_json)
296
+ kwargs = deserialize_kwargs(kwargs_json)
297
+
298
+ # Get storage backend
299
+ storage = _get_storage_backend(storage_config)
300
+
301
+ # Execute child workflow on worker
302
+ asyncio.run(
303
+ _execute_child_workflow_on_worker(
304
+ workflow_func=workflow_meta.func,
305
+ workflow_name=workflow_name,
306
+ args=args,
307
+ kwargs=kwargs,
308
+ child_run_id=child_run_id,
309
+ storage=storage,
310
+ storage_config=storage_config,
311
+ parent_run_id=parent_run_id,
312
+ child_id=child_id,
313
+ wait_for_completion=wait_for_completion,
314
+ )
315
+ )
316
+
317
+ logger.info(
318
+ f"Child workflow execution completed: {workflow_name}",
319
+ child_run_id=child_run_id,
320
+ )
321
+ return child_run_id
322
+
323
+
324
+ async def _execute_child_workflow_on_worker(
325
+ workflow_func: Callable[..., Any],
326
+ workflow_name: str,
327
+ args: tuple[Any, ...],
328
+ kwargs: dict[str, Any],
329
+ child_run_id: str,
330
+ storage: StorageBackend,
331
+ storage_config: dict[str, Any] | None,
332
+ parent_run_id: str,
333
+ child_id: str,
334
+ wait_for_completion: bool,
335
+ ) -> None:
336
+ """
337
+ Execute a child workflow on Celery worker and notify parent on completion.
338
+
339
+ This handles:
340
+ 1. Executing the child workflow
341
+ 2. Recording completion/failure events in parent's log
342
+ 3. Triggering parent resumption if waiting
343
+ """
344
+ # Ensure storage is connected (some backends like SQLite require this)
345
+ if hasattr(storage, "connect"):
346
+ await storage.connect()
347
+
348
+ from pyworkflow.engine.events import (
349
+ create_child_workflow_completed_event,
350
+ create_child_workflow_failed_event,
351
+ )
352
+ from pyworkflow.serialization.encoder import serialize
353
+
354
+ try:
355
+ # Update status to RUNNING
356
+ await storage.update_run_status(child_run_id, RunStatus.RUNNING)
357
+
358
+ # Execute the child workflow
359
+ result = await execute_workflow_with_context(
360
+ run_id=child_run_id,
361
+ workflow_func=workflow_func,
362
+ workflow_name=workflow_name,
363
+ args=args,
364
+ kwargs=kwargs,
365
+ storage=storage,
366
+ durable=True,
367
+ event_log=None, # Fresh execution
368
+ )
369
+
370
+ # Update status to COMPLETED
371
+ serialized_result = serialize(result)
372
+ await storage.update_run_status(child_run_id, RunStatus.COMPLETED, result=serialized_result)
373
+
374
+ # Record completion in parent's log
375
+ completion_event = create_child_workflow_completed_event(
376
+ run_id=parent_run_id,
377
+ child_id=child_id,
378
+ child_run_id=child_run_id,
379
+ result=serialized_result,
380
+ )
381
+ await storage.record_event(completion_event)
382
+
383
+ logger.info(
384
+ f"Child workflow completed: {workflow_name}",
385
+ parent_run_id=parent_run_id,
386
+ child_run_id=child_run_id,
387
+ )
388
+
389
+ # If parent is waiting, trigger resumption
390
+ if wait_for_completion:
391
+ await _trigger_parent_resumption_celery(parent_run_id, storage, storage_config)
392
+
393
+ except SuspensionSignal as e:
394
+ # Child workflow suspended (e.g., sleep, hook)
395
+ # Update status and don't notify parent yet - handled on child resumption
396
+ await storage.update_run_status(child_run_id, RunStatus.SUSPENDED)
397
+ logger.debug(
398
+ f"Child workflow suspended: {workflow_name}",
399
+ parent_run_id=parent_run_id,
400
+ child_run_id=child_run_id,
401
+ )
402
+
403
+ # Schedule automatic resumption if we have a resume_at time
404
+ resume_at = e.data.get("resume_at") if e.data else None
405
+ if resume_at:
406
+ schedule_workflow_resumption(child_run_id, resume_at, storage_config)
407
+
408
+ except ContinueAsNewSignal as e:
409
+ # Child workflow continuing as new execution
410
+ from pyworkflow.core.registry import get_workflow
411
+
412
+ child_workflow_meta = get_workflow(workflow_name)
413
+ if not child_workflow_meta:
414
+ raise ValueError(f"Workflow '{workflow_name}' not found in registry")
415
+
416
+ new_run_id = await _handle_continue_as_new_celery(
417
+ current_run_id=child_run_id,
418
+ workflow_meta=child_workflow_meta,
419
+ storage=storage,
420
+ storage_config=storage_config,
421
+ new_args=e.workflow_args,
422
+ new_kwargs=e.workflow_kwargs,
423
+ parent_run_id=parent_run_id,
424
+ )
425
+
426
+ logger.info(
427
+ f"Child workflow continued as new: {workflow_name}",
428
+ old_run_id=child_run_id,
429
+ new_run_id=new_run_id,
430
+ parent_run_id=parent_run_id,
431
+ )
432
+
433
+ except Exception as e:
434
+ # Child workflow failed
435
+ error_msg = str(e)
436
+ error_type = type(e).__name__
437
+
438
+ await storage.update_run_status(child_run_id, RunStatus.FAILED, error=error_msg)
439
+
440
+ # Record failure in parent's log
441
+ failure_event = create_child_workflow_failed_event(
442
+ run_id=parent_run_id,
443
+ child_id=child_id,
444
+ child_run_id=child_run_id,
445
+ error=error_msg,
446
+ error_type=error_type,
447
+ )
448
+ await storage.record_event(failure_event)
449
+
450
+ logger.error(
451
+ f"Child workflow failed: {workflow_name}",
452
+ parent_run_id=parent_run_id,
453
+ child_run_id=child_run_id,
454
+ error=error_msg,
455
+ )
456
+
457
+ # If parent is waiting, trigger resumption (will raise error on replay)
458
+ if wait_for_completion:
459
+ await _trigger_parent_resumption_celery(parent_run_id, storage, storage_config)
460
+
461
+
462
+ async def _trigger_parent_resumption_celery(
463
+ parent_run_id: str,
464
+ storage: StorageBackend,
465
+ storage_config: dict[str, Any] | None,
466
+ ) -> None:
467
+ """
468
+ Trigger parent workflow resumption after child completes.
469
+
470
+ Checks if parent is suspended and schedules resumption via Celery.
471
+ """
472
+ parent_run = await storage.get_run(parent_run_id)
473
+ if parent_run and parent_run.status == RunStatus.SUSPENDED:
474
+ logger.debug(
475
+ "Triggering parent resumption via Celery",
476
+ parent_run_id=parent_run_id,
477
+ )
478
+ # Schedule immediate resumption via Celery
479
+ schedule_workflow_resumption(parent_run_id, datetime.now(UTC), storage_config)
480
+
481
+
482
+ async def _notify_parent_of_child_completion(
483
+ run: WorkflowRun,
484
+ storage: StorageBackend,
485
+ storage_config: dict[str, Any] | None,
486
+ status: RunStatus,
487
+ result: str | None = None,
488
+ error: str | None = None,
489
+ error_type: str | None = None,
490
+ ) -> None:
491
+ """
492
+ Notify parent workflow that a child has completed/failed/cancelled.
493
+
494
+ This is called when a child workflow reaches a terminal state during resume.
495
+ It records the appropriate event in the parent's log and triggers resumption
496
+ if the parent was waiting.
497
+
498
+ Args:
499
+ run: The child workflow run
500
+ storage: Storage backend
501
+ storage_config: Storage configuration for Celery tasks
502
+ status: Terminal status (COMPLETED, FAILED, CANCELLED)
503
+ result: Serialized result (for COMPLETED)
504
+ error: Error message (for FAILED/CANCELLED)
505
+ error_type: Error type name (for FAILED)
506
+ """
507
+ from pyworkflow.engine.events import (
508
+ create_child_workflow_cancelled_event,
509
+ create_child_workflow_completed_event,
510
+ create_child_workflow_failed_event,
511
+ )
512
+
513
+ if not run.parent_run_id:
514
+ return # Not a child workflow
515
+
516
+ parent_run_id = run.parent_run_id
517
+ child_run_id = run.run_id
518
+
519
+ # Find child_id and wait_for_completion from parent's events
520
+ parent_events = await storage.get_events(parent_run_id)
521
+ child_id = None
522
+ wait_for_completion = False
523
+
524
+ for event in parent_events:
525
+ if (
526
+ event.type == EventType.CHILD_WORKFLOW_STARTED
527
+ and event.data.get("child_run_id") == child_run_id
528
+ ):
529
+ child_id = event.data.get("child_id")
530
+ wait_for_completion = event.data.get("wait_for_completion", False)
531
+ break
532
+
533
+ if not child_id:
534
+ logger.warning(
535
+ "Could not find child_id in parent events for resumed child workflow",
536
+ parent_run_id=parent_run_id,
537
+ child_run_id=child_run_id,
538
+ )
539
+ return
540
+
541
+ # Record appropriate event in parent's log
542
+ if status == RunStatus.COMPLETED:
543
+ event = create_child_workflow_completed_event(
544
+ run_id=parent_run_id,
545
+ child_id=child_id,
546
+ child_run_id=child_run_id,
547
+ result=result,
548
+ )
549
+ elif status == RunStatus.FAILED:
550
+ event = create_child_workflow_failed_event(
551
+ run_id=parent_run_id,
552
+ child_id=child_id,
553
+ child_run_id=child_run_id,
554
+ error=error or "Unknown error",
555
+ error_type=error_type or "Exception",
556
+ )
557
+ elif status == RunStatus.CANCELLED:
558
+ event = create_child_workflow_cancelled_event(
559
+ run_id=parent_run_id,
560
+ child_id=child_id,
561
+ child_run_id=child_run_id,
562
+ reason=error,
563
+ )
564
+ else:
565
+ return # Not a terminal state we handle
566
+
567
+ await storage.record_event(event)
568
+
569
+ logger.info(
570
+ f"Notified parent of child workflow {status.value}",
571
+ parent_run_id=parent_run_id,
572
+ child_run_id=child_run_id,
573
+ child_id=child_id,
574
+ )
575
+
576
+ # Trigger parent resumption if waiting
577
+ if wait_for_completion:
578
+ await _trigger_parent_resumption_celery(parent_run_id, storage, storage_config)
579
+
580
+
581
+ async def _handle_workflow_recovery(
582
+ run: WorkflowRun,
583
+ storage: StorageBackend,
584
+ worker_id: str | None = None,
585
+ ) -> bool:
586
+ """
587
+ Handle workflow recovery from worker failure.
588
+
589
+ Called when a workflow is found in RUNNING status but we're starting fresh.
590
+ This indicates a previous worker crashed.
591
+
592
+ Args:
593
+ run: Existing workflow run record
594
+ storage: Storage backend
595
+ worker_id: ID of the current worker
596
+
597
+ Returns:
598
+ True if recovery should proceed, False if max attempts exceeded
599
+ """
600
+ # Check if recovery is enabled for this workflow
601
+ if not run.recover_on_worker_loss:
602
+ logger.warning(
603
+ "Workflow recovery disabled, marking as failed",
604
+ run_id=run.run_id,
605
+ workflow_name=run.workflow_name,
606
+ )
607
+ await storage.update_run_status(
608
+ run_id=run.run_id,
609
+ status=RunStatus.FAILED,
610
+ error="Worker lost and recovery is disabled",
611
+ )
612
+ return False
613
+
614
+ # Check recovery attempt limit
615
+ new_attempts = run.recovery_attempts + 1
616
+ if new_attempts > run.max_recovery_attempts:
617
+ logger.error(
618
+ "Workflow exceeded max recovery attempts",
619
+ run_id=run.run_id,
620
+ workflow_name=run.workflow_name,
621
+ recovery_attempts=run.recovery_attempts,
622
+ max_recovery_attempts=run.max_recovery_attempts,
623
+ )
624
+ await storage.update_run_status(
625
+ run_id=run.run_id,
626
+ status=RunStatus.FAILED,
627
+ error=f"Exceeded max recovery attempts ({run.max_recovery_attempts})",
628
+ )
629
+ return False
630
+
631
+ # Get last event sequence
632
+ events = await storage.get_events(run.run_id)
633
+ last_event_sequence = max((e.sequence or 0 for e in events), default=0) if events else None
634
+
635
+ # Record interruption event
636
+ interrupted_event = create_workflow_interrupted_event(
637
+ run_id=run.run_id,
638
+ reason="worker_lost",
639
+ worker_id=worker_id,
640
+ last_event_sequence=last_event_sequence,
641
+ error="Worker process terminated unexpectedly",
642
+ recovery_attempt=new_attempts,
643
+ recoverable=True,
644
+ )
645
+ await storage.record_event(interrupted_event)
646
+
647
+ # Update recovery attempts counter
648
+ # Note: We need to update the run record with the new recovery_attempts count
649
+ run.recovery_attempts = new_attempts
650
+ await storage.update_run_recovery_attempts(run.run_id, new_attempts)
651
+
652
+ logger.info(
653
+ "Workflow recovery initiated",
654
+ run_id=run.run_id,
655
+ workflow_name=run.workflow_name,
656
+ recovery_attempt=new_attempts,
657
+ max_recovery_attempts=run.max_recovery_attempts,
658
+ )
659
+
660
+ return True
661
+
662
+
663
+ async def _recover_workflow_on_worker(
664
+ run: WorkflowRun,
665
+ workflow_meta: WorkflowMetadata,
666
+ storage: StorageBackend,
667
+ storage_config: dict[str, Any] | None = None,
668
+ ) -> str:
669
+ """
670
+ Recover a workflow after worker failure.
671
+
672
+ This is similar to resuming a suspended workflow, but specifically handles
673
+ the recovery scenario after a worker crash.
674
+
675
+ Args:
676
+ run: Existing workflow run record
677
+ workflow_meta: Workflow metadata
678
+ storage: Storage backend
679
+ storage_config: Storage configuration for child tasks
680
+
681
+ Returns:
682
+ Workflow run ID
683
+ """
684
+ run_id = run.run_id
685
+ workflow_name = run.workflow_name
686
+
687
+ logger.info(
688
+ f"Recovering workflow execution: {workflow_name}",
689
+ run_id=run_id,
690
+ workflow_name=workflow_name,
691
+ recovery_attempt=run.recovery_attempts,
692
+ )
693
+
694
+ # Update status to RUNNING (from RUNNING or INTERRUPTED)
695
+ await storage.update_run_status(run_id=run_id, status=RunStatus.RUNNING)
696
+
697
+ # Load event log for replay
698
+ events = await storage.get_events(run_id)
699
+
700
+ # Complete any pending sleeps (mark them as done before resuming)
701
+ events = await _complete_pending_sleeps(run_id, events, storage)
702
+
703
+ # Deserialize arguments
704
+ args = deserialize_args(run.input_args)
705
+ kwargs = deserialize_kwargs(run.input_kwargs)
706
+
707
+ # Execute workflow with event replay
708
+ try:
709
+ result = await execute_workflow_with_context(
710
+ workflow_func=workflow_meta.func,
711
+ run_id=run_id,
712
+ workflow_name=workflow_name,
713
+ storage=storage,
714
+ args=args,
715
+ kwargs=kwargs,
716
+ event_log=events,
717
+ )
718
+
719
+ # Update run status to completed
720
+ await storage.update_run_status(
721
+ run_id=run_id, status=RunStatus.COMPLETED, result=serialize_args(result)
722
+ )
723
+
724
+ # Cancel all running children (TERMINATE policy)
725
+ await _handle_parent_completion(run_id, RunStatus.COMPLETED, storage)
726
+
727
+ logger.info(
728
+ f"Workflow recovered and completed: {workflow_name}",
729
+ run_id=run_id,
730
+ workflow_name=workflow_name,
731
+ recovery_attempt=run.recovery_attempts,
732
+ )
733
+
734
+ return run_id
735
+
736
+ except SuspensionSignal as e:
737
+ # Workflow suspended again
738
+ await storage.update_run_status(run_id=run_id, status=RunStatus.SUSPENDED)
739
+
740
+ logger.info(
741
+ f"Recovered workflow suspended: {e.reason}",
742
+ run_id=run_id,
743
+ workflow_name=workflow_name,
744
+ reason=e.reason,
745
+ )
746
+
747
+ # Schedule automatic resumption if we have a resume_at time
748
+ resume_at = e.data.get("resume_at") if e.data else None
749
+ if resume_at:
750
+ schedule_workflow_resumption(run_id, resume_at, storage_config=storage_config)
751
+ logger.info(
752
+ "Scheduled automatic workflow resumption",
753
+ run_id=run_id,
754
+ resume_at=resume_at.isoformat(),
755
+ )
756
+
757
+ return run_id
758
+
759
+ except ContinueAsNewSignal as e:
760
+ # Workflow continuing as new execution
761
+ new_run_id = await _handle_continue_as_new_celery(
762
+ current_run_id=run_id,
763
+ workflow_meta=workflow_meta,
764
+ storage=storage,
765
+ storage_config=storage_config,
766
+ new_args=e.workflow_args,
767
+ new_kwargs=e.workflow_kwargs,
768
+ )
769
+
770
+ # Cancel all running children (TERMINATE policy)
771
+ await _handle_parent_completion(run_id, RunStatus.CONTINUED_AS_NEW, storage)
772
+
773
+ logger.info(
774
+ f"Recovered workflow continued as new: {workflow_name}",
775
+ old_run_id=run_id,
776
+ new_run_id=new_run_id,
777
+ )
778
+
779
+ return run_id
780
+
781
+ except Exception as e:
782
+ # Workflow failed during recovery
783
+ await storage.update_run_status(run_id=run_id, status=RunStatus.FAILED, error=str(e))
784
+
785
+ # Cancel all running children (TERMINATE policy)
786
+ await _handle_parent_completion(run_id, RunStatus.FAILED, storage)
787
+
788
+ # Cancel all running children (TERMINATE policy)
789
+ await _handle_parent_completion(run_id, RunStatus.FAILED, storage)
790
+
791
+ logger.error(
792
+ f"Workflow failed during recovery: {workflow_name}",
793
+ run_id=run_id,
794
+ workflow_name=workflow_name,
795
+ error=str(e),
796
+ exc_info=True,
797
+ )
798
+
799
+ raise
800
+
801
+
802
+ async def _start_workflow_on_worker(
803
+ workflow_meta: WorkflowMetadata,
804
+ args: tuple,
805
+ kwargs: dict,
806
+ storage: StorageBackend,
807
+ storage_config: dict[str, Any] | None = None,
808
+ idempotency_key: str | None = None,
809
+ run_id: str | None = None,
810
+ ) -> str:
811
+ """
812
+ Internal function to start workflow on Celery worker.
813
+
814
+ This mirrors the logic from testing.py but runs on workers.
815
+ Handles recovery scenarios when picking up a task from a crashed worker.
816
+
817
+ Args:
818
+ workflow_meta: Workflow metadata
819
+ args: Workflow positional arguments
820
+ kwargs: Workflow keyword arguments
821
+ storage: Storage backend
822
+ storage_config: Storage configuration for child tasks
823
+ idempotency_key: Optional idempotency key
824
+ run_id: Pre-generated run ID (if None, generates a new one)
825
+ """
826
+ from pyworkflow.config import get_config
827
+
828
+ # Ensure storage is connected (some backends like SQLite require this)
829
+ if hasattr(storage, "connect"):
830
+ await storage.connect()
831
+
832
+ workflow_name = workflow_meta.name
833
+ config = get_config()
834
+
835
+ # Check idempotency key
836
+ if idempotency_key:
837
+ existing_run = await storage.get_run_by_idempotency_key(idempotency_key)
838
+ if existing_run:
839
+ # Check if this is a recovery scenario (workflow was RUNNING but worker crashed)
840
+ if existing_run.status == RunStatus.RUNNING:
841
+ # Check if this is truly a crashed worker or just a duplicate task execution
842
+ from datetime import timedelta
843
+
844
+ run_age = datetime.now(UTC) - existing_run.created_at
845
+ if run_age < timedelta(seconds=30):
846
+ logger.info(
847
+ f"Run with idempotency key '{idempotency_key}' already exists and was created recently. "
848
+ "Likely duplicate task execution, skipping.",
849
+ run_id=existing_run.run_id,
850
+ )
851
+ return existing_run.run_id
852
+
853
+ # This is a recovery scenario - worker crashed while running
854
+ can_recover = await _handle_workflow_recovery(
855
+ run=existing_run,
856
+ storage=storage,
857
+ worker_id=None, # TODO: Get actual worker ID from Celery
858
+ )
859
+ if can_recover:
860
+ # Continue with recovery - resume workflow from last checkpoint
861
+ return await _recover_workflow_on_worker(
862
+ run=existing_run,
863
+ workflow_meta=workflow_meta,
864
+ storage=storage,
865
+ storage_config=storage_config,
866
+ )
867
+ else:
868
+ # Recovery disabled or max attempts exceeded
869
+ return existing_run.run_id
870
+ elif existing_run.status == RunStatus.INTERRUPTED:
871
+ # Previous recovery attempt also failed, try again
872
+ can_recover = await _handle_workflow_recovery(
873
+ run=existing_run,
874
+ storage=storage,
875
+ worker_id=None,
876
+ )
877
+ if can_recover:
878
+ return await _recover_workflow_on_worker(
879
+ run=existing_run,
880
+ workflow_meta=workflow_meta,
881
+ storage=storage,
882
+ storage_config=storage_config,
883
+ )
884
+ else:
885
+ return existing_run.run_id
886
+ else:
887
+ # Workflow already completed/failed/etc
888
+ logger.info(
889
+ f"Workflow with idempotency key '{idempotency_key}' already exists",
890
+ run_id=existing_run.run_id,
891
+ status=existing_run.status.value,
892
+ )
893
+ return existing_run.run_id
894
+
895
+ # Use provided run_id or generate a new one
896
+ if run_id is None:
897
+ run_id = f"run_{uuid.uuid4().hex[:16]}"
898
+
899
+ # Check if run already exists (recovery scenario without idempotency key)
900
+ existing_run = await storage.get_run(run_id)
901
+ if existing_run and existing_run.status == RunStatus.RUNNING:
902
+ # This is a recovery scenario
903
+ can_recover = await _handle_workflow_recovery(
904
+ run=existing_run,
905
+ storage=storage,
906
+ worker_id=None,
907
+ )
908
+ if can_recover:
909
+ return await _recover_workflow_on_worker(
910
+ run=existing_run,
911
+ workflow_meta=workflow_meta,
912
+ storage=storage,
913
+ storage_config=storage_config,
914
+ )
915
+ else:
916
+ return existing_run.run_id
917
+
918
+ logger.info(
919
+ f"Starting workflow execution on worker: {workflow_name}",
920
+ run_id=run_id,
921
+ workflow_name=workflow_name,
922
+ )
923
+
924
+ # Determine recovery settings
925
+ # Priority: workflow decorator > global config > defaults based on durable mode
926
+ recover_on_worker_loss = getattr(
927
+ workflow_meta.func, "__workflow_recover_on_worker_loss__", None
928
+ )
929
+ max_recovery_attempts = getattr(workflow_meta.func, "__workflow_max_recovery_attempts__", None)
930
+ is_durable = getattr(workflow_meta.func, "__workflow_durable__", True)
931
+
932
+ if recover_on_worker_loss is None:
933
+ recover_on_worker_loss = config.default_recover_on_worker_loss
934
+ if recover_on_worker_loss is None:
935
+ # Default: True for durable, False for transient
936
+ recover_on_worker_loss = is_durable if is_durable is not None else True
937
+
938
+ if max_recovery_attempts is None:
939
+ max_recovery_attempts = config.default_max_recovery_attempts
940
+
941
+ # Create workflow run record
942
+ run = WorkflowRun(
943
+ run_id=run_id,
944
+ workflow_name=workflow_name,
945
+ status=RunStatus.RUNNING,
946
+ created_at=datetime.now(UTC),
947
+ started_at=datetime.now(UTC),
948
+ input_args=serialize_args(*args),
949
+ input_kwargs=serialize_kwargs(**kwargs),
950
+ idempotency_key=idempotency_key,
951
+ max_duration=workflow_meta.max_duration,
952
+ metadata={}, # Run-level metadata (not from decorator)
953
+ recovery_attempts=0,
954
+ max_recovery_attempts=max_recovery_attempts,
955
+ recover_on_worker_loss=recover_on_worker_loss,
956
+ )
957
+
958
+ await storage.create_run(run)
959
+
960
+ # Record workflow started event
961
+ start_event = create_workflow_started_event(
962
+ run_id=run_id,
963
+ workflow_name=workflow_name,
964
+ args=serialize_args(*args),
965
+ kwargs=serialize_kwargs(**kwargs),
966
+ metadata={}, # Run-level metadata
967
+ )
968
+
969
+ await storage.record_event(start_event)
970
+
971
+ # Execute workflow
972
+ try:
973
+ result = await execute_workflow_with_context(
974
+ workflow_func=workflow_meta.func,
975
+ run_id=run_id,
976
+ workflow_name=workflow_name,
977
+ storage=storage,
978
+ args=args,
979
+ kwargs=kwargs,
980
+ )
981
+
982
+ # Update run status to completed
983
+ await storage.update_run_status(
984
+ run_id=run_id, status=RunStatus.COMPLETED, result=serialize_args(result)
985
+ )
986
+
987
+ # Cancel all running children (TERMINATE policy)
988
+ await _handle_parent_completion(run_id, RunStatus.COMPLETED, storage)
989
+
990
+ logger.info(
991
+ f"Workflow completed successfully on worker: {workflow_name}",
992
+ run_id=run_id,
993
+ workflow_name=workflow_name,
994
+ )
995
+
996
+ return run_id
997
+
998
+ except CancellationError as e:
999
+ # Workflow was cancelled
1000
+ cancelled_event = create_workflow_cancelled_event(
1001
+ run_id=run_id,
1002
+ reason=e.reason,
1003
+ cleanup_completed=True, # If we got here, cleanup has completed
1004
+ )
1005
+ await storage.record_event(cancelled_event)
1006
+ await storage.update_run_status(run_id=run_id, status=RunStatus.CANCELLED)
1007
+ await storage.clear_cancellation_flag(run_id)
1008
+
1009
+ # Cancel all running children (TERMINATE policy)
1010
+ await _handle_parent_completion(run_id, RunStatus.CANCELLED, storage)
1011
+
1012
+ logger.info(
1013
+ f"Workflow cancelled on worker: {workflow_name}",
1014
+ run_id=run_id,
1015
+ workflow_name=workflow_name,
1016
+ reason=e.reason,
1017
+ )
1018
+
1019
+ return run_id
1020
+
1021
+ except SuspensionSignal as e:
1022
+ # Workflow suspended (sleep or hook)
1023
+ await storage.update_run_status(run_id=run_id, status=RunStatus.SUSPENDED)
1024
+
1025
+ logger.info(
1026
+ f"Workflow suspended on worker: {e.reason}",
1027
+ run_id=run_id,
1028
+ workflow_name=workflow_name,
1029
+ reason=e.reason,
1030
+ )
1031
+
1032
+ # Schedule automatic resumption if we have a resume_at time
1033
+ resume_at = e.data.get("resume_at") if e.data else None
1034
+ if resume_at:
1035
+ schedule_workflow_resumption(run_id, resume_at, storage_config=storage_config)
1036
+ logger.info(
1037
+ "Scheduled automatic workflow resumption",
1038
+ run_id=run_id,
1039
+ resume_at=resume_at.isoformat(),
1040
+ )
1041
+
1042
+ return run_id
1043
+
1044
+ except ContinueAsNewSignal as e:
1045
+ # Workflow continuing as new execution
1046
+ new_run_id = await _handle_continue_as_new_celery(
1047
+ current_run_id=run_id,
1048
+ workflow_meta=workflow_meta,
1049
+ storage=storage,
1050
+ storage_config=storage_config,
1051
+ new_args=e.workflow_args,
1052
+ new_kwargs=e.workflow_kwargs,
1053
+ )
1054
+
1055
+ # Cancel all running children (TERMINATE policy)
1056
+ await _handle_parent_completion(run_id, RunStatus.CONTINUED_AS_NEW, storage)
1057
+
1058
+ logger.info(
1059
+ f"Workflow continued as new on worker: {workflow_name}",
1060
+ old_run_id=run_id,
1061
+ new_run_id=new_run_id,
1062
+ )
1063
+
1064
+ return run_id
1065
+
1066
+ except Exception as e:
1067
+ # Workflow failed
1068
+ await storage.update_run_status(run_id=run_id, status=RunStatus.FAILED, error=str(e))
1069
+
1070
+ # Cancel all running children (TERMINATE policy)
1071
+ await _handle_parent_completion(run_id, RunStatus.FAILED, storage)
1072
+
1073
+ # Cancel all running children (TERMINATE policy)
1074
+ await _handle_parent_completion(run_id, RunStatus.FAILED, storage)
1075
+
1076
+ logger.error(
1077
+ f"Workflow failed on worker: {workflow_name}",
1078
+ run_id=run_id,
1079
+ workflow_name=workflow_name,
1080
+ error=str(e),
1081
+ exc_info=True,
1082
+ )
1083
+
1084
+ raise
1085
+
1086
+
1087
+ @celery_app.task(
1088
+ name="pyworkflow.resume_workflow",
1089
+ queue="pyworkflow.schedules",
1090
+ )
1091
+ def resume_workflow_task(
1092
+ run_id: str,
1093
+ storage_config: dict[str, Any] | None = None,
1094
+ ) -> Any | None:
1095
+ """
1096
+ Resume a suspended workflow.
1097
+
1098
+ This task is scheduled automatically when a workflow suspends (e.g., for sleep).
1099
+ It executes on Celery workers and runs the workflow directly.
1100
+
1101
+ Args:
1102
+ run_id: Workflow run ID to resume
1103
+ storage_config: Storage backend configuration
1104
+
1105
+ Returns:
1106
+ Workflow result if completed, None if suspended again
1107
+ """
1108
+ logger.info(f"Resuming workflow on worker: {run_id}")
1109
+
1110
+ # Get storage backend
1111
+ storage = _get_storage_backend(storage_config)
1112
+
1113
+ # Resume workflow directly on worker
1114
+ result = asyncio.run(_resume_workflow_on_worker(run_id, storage, storage_config))
1115
+
1116
+ if result is not None:
1117
+ logger.info(f"Workflow completed on worker: {run_id}")
1118
+ else:
1119
+ logger.info(f"Workflow suspended again on worker: {run_id}")
1120
+
1121
+ return result
1122
+
1123
+
1124
+ @celery_app.task(
1125
+ name="pyworkflow.execute_scheduled_workflow",
1126
+ queue="pyworkflow.schedules",
1127
+ )
1128
+ def execute_scheduled_workflow_task(
1129
+ schedule_id: str,
1130
+ scheduled_time: str,
1131
+ storage_config: dict[str, Any] | None = None,
1132
+ ) -> str | None:
1133
+ """
1134
+ Execute a workflow from a schedule.
1135
+
1136
+ This task is triggered by the PyWorkflow scheduler when a schedule is due.
1137
+ It starts a new workflow run and tracks it against the schedule.
1138
+
1139
+ Args:
1140
+ schedule_id: Schedule identifier
1141
+ scheduled_time: ISO format scheduled execution time
1142
+ storage_config: Storage backend configuration
1143
+
1144
+ Returns:
1145
+ Workflow run ID if started, None if skipped
1146
+ """
1147
+ logger.info("Executing scheduled workflow", schedule_id=schedule_id)
1148
+
1149
+ storage = _get_storage_backend(storage_config)
1150
+
1151
+ return asyncio.run(
1152
+ _execute_scheduled_workflow(
1153
+ schedule_id=schedule_id,
1154
+ scheduled_time=datetime.fromisoformat(scheduled_time),
1155
+ storage=storage,
1156
+ storage_config=storage_config,
1157
+ )
1158
+ )
1159
+
1160
+
1161
+ async def _execute_scheduled_workflow(
1162
+ schedule_id: str,
1163
+ scheduled_time: datetime,
1164
+ storage: StorageBackend,
1165
+ storage_config: dict[str, Any] | None,
1166
+ ) -> str | None:
1167
+ """
1168
+ Execute a scheduled workflow with tracking.
1169
+
1170
+ Args:
1171
+ schedule_id: Schedule identifier
1172
+ scheduled_time: When the schedule was supposed to trigger
1173
+ storage: Storage backend
1174
+ storage_config: Storage configuration for serialization
1175
+
1176
+ Returns:
1177
+ Workflow run ID if started, None if skipped
1178
+ """
1179
+ # Ensure storage is connected (some backends like SQLite require this)
1180
+ if hasattr(storage, "connect"):
1181
+ await storage.connect()
1182
+
1183
+ from pyworkflow.engine.events import create_schedule_triggered_event
1184
+ from pyworkflow.storage.schemas import ScheduleStatus
1185
+
1186
+ # Get schedule
1187
+ schedule = await storage.get_schedule(schedule_id)
1188
+ if not schedule:
1189
+ logger.error(f"Schedule not found: {schedule_id}")
1190
+ return None
1191
+
1192
+ if schedule.status != ScheduleStatus.ACTIVE:
1193
+ logger.info(f"Schedule not active: {schedule_id}")
1194
+ return None
1195
+
1196
+ # Get workflow
1197
+ workflow_meta = get_workflow(schedule.workflow_name)
1198
+ if not workflow_meta:
1199
+ logger.error(f"Workflow not found: {schedule.workflow_name}")
1200
+ schedule.failed_runs += 1
1201
+ schedule.updated_at = datetime.now(UTC)
1202
+ await storage.update_schedule(schedule)
1203
+ return None
1204
+
1205
+ # Deserialize arguments
1206
+ args = deserialize_args(schedule.args)
1207
+ kwargs = deserialize_kwargs(schedule.kwargs)
1208
+
1209
+ # Generate run_id
1210
+ run_id = f"sched_{schedule_id[:8]}_{uuid.uuid4().hex[:8]}"
1211
+
1212
+ # Add to running runs
1213
+ await storage.add_running_run(schedule_id, run_id)
1214
+
1215
+ # Update schedule stats
1216
+ schedule.total_runs += 1
1217
+ schedule.last_run_at = datetime.now(UTC)
1218
+ schedule.last_run_id = run_id
1219
+ await storage.update_schedule(schedule)
1220
+
1221
+ try:
1222
+ # Serialize args for Celery task
1223
+ args_json = serialize_args(*args)
1224
+ kwargs_json = serialize_kwargs(**kwargs)
1225
+
1226
+ # Start the workflow via Celery
1227
+ # Note: start_workflow_task will create the run record
1228
+ start_workflow_task.delay(
1229
+ workflow_name=schedule.workflow_name,
1230
+ args_json=args_json,
1231
+ kwargs_json=kwargs_json,
1232
+ run_id=run_id,
1233
+ storage_config=storage_config,
1234
+ metadata={"schedule_id": schedule_id, "scheduled_time": scheduled_time.isoformat()},
1235
+ )
1236
+
1237
+ # Record trigger event - use schedule_id as run_id since workflow run may not exist yet
1238
+ trigger_event = create_schedule_triggered_event(
1239
+ run_id=schedule_id, # Use schedule_id for event association
1240
+ schedule_id=schedule_id,
1241
+ scheduled_time=scheduled_time,
1242
+ actual_time=datetime.now(UTC),
1243
+ workflow_run_id=run_id,
1244
+ )
1245
+ await storage.record_event(trigger_event)
1246
+
1247
+ logger.info(
1248
+ f"Started scheduled workflow: {schedule.workflow_name}",
1249
+ schedule_id=schedule_id,
1250
+ run_id=run_id,
1251
+ )
1252
+
1253
+ return run_id
1254
+
1255
+ except Exception as e:
1256
+ logger.error(f"Failed to start scheduled workflow: {e}")
1257
+ await storage.remove_running_run(schedule_id, run_id)
1258
+ schedule.failed_runs += 1
1259
+ schedule.updated_at = datetime.now(UTC)
1260
+ await storage.update_schedule(schedule)
1261
+ raise
1262
+
1263
+
1264
+ async def _complete_pending_sleeps(
1265
+ run_id: str,
1266
+ events: list[Any],
1267
+ storage: StorageBackend,
1268
+ ) -> list[Any]:
1269
+ """
1270
+ Record SLEEP_COMPLETED events for any pending sleeps.
1271
+
1272
+ When resuming a workflow, we need to mark sleeps as completed
1273
+ so the replay logic knows to skip them.
1274
+
1275
+ Args:
1276
+ run_id: Workflow run ID
1277
+ events: Current event list
1278
+ storage: Storage backend
1279
+
1280
+ Returns:
1281
+ Updated event list with SLEEP_COMPLETED events appended
1282
+ """
1283
+ from pyworkflow.engine.events import EventType, create_sleep_completed_event
1284
+
1285
+ # Find pending sleeps (SLEEP_STARTED without SLEEP_COMPLETED)
1286
+ started_sleeps = set()
1287
+ completed_sleeps = set()
1288
+
1289
+ for event in events:
1290
+ if event.type == EventType.SLEEP_STARTED:
1291
+ started_sleeps.add(event.data.get("sleep_id"))
1292
+ elif event.type == EventType.SLEEP_COMPLETED:
1293
+ completed_sleeps.add(event.data.get("sleep_id"))
1294
+
1295
+ pending_sleeps = started_sleeps - completed_sleeps
1296
+
1297
+ if not pending_sleeps:
1298
+ return events
1299
+
1300
+ # Record SLEEP_COMPLETED for each pending sleep
1301
+ updated_events = list(events)
1302
+ for sleep_id in pending_sleeps:
1303
+ complete_event = create_sleep_completed_event(
1304
+ run_id=run_id,
1305
+ sleep_id=sleep_id,
1306
+ )
1307
+ await storage.record_event(complete_event)
1308
+ updated_events.append(complete_event)
1309
+ logger.debug(f"Recorded SLEEP_COMPLETED for {sleep_id}", run_id=run_id)
1310
+
1311
+ return updated_events
1312
+
1313
+
1314
+ async def _resume_workflow_on_worker(
1315
+ run_id: str,
1316
+ storage: StorageBackend,
1317
+ storage_config: dict[str, Any] | None = None,
1318
+ ) -> Any | None:
1319
+ """
1320
+ Internal function to resume workflow on Celery worker.
1321
+
1322
+ This mirrors the logic from testing.py but runs on workers.
1323
+ """
1324
+ from pyworkflow.core.exceptions import WorkflowNotFoundError
1325
+
1326
+ # Ensure storage is connected (some backends like SQLite require this)
1327
+ if hasattr(storage, "connect"):
1328
+ await storage.connect()
1329
+
1330
+ # Load workflow run
1331
+ run = await storage.get_run(run_id)
1332
+ if not run:
1333
+ raise WorkflowNotFoundError(run_id)
1334
+
1335
+ # Check if workflow was cancelled while suspended
1336
+ if run.status == RunStatus.CANCELLED:
1337
+ logger.info(
1338
+ "Workflow was cancelled while suspended, skipping resume",
1339
+ run_id=run_id,
1340
+ workflow_name=run.workflow_name,
1341
+ )
1342
+ return None
1343
+
1344
+ # Check for cancellation flag
1345
+ cancellation_requested = await storage.check_cancellation_flag(run_id)
1346
+
1347
+ logger.info(
1348
+ f"Resuming workflow execution on worker: {run.workflow_name}",
1349
+ run_id=run_id,
1350
+ workflow_name=run.workflow_name,
1351
+ current_status=run.status.value,
1352
+ cancellation_requested=cancellation_requested,
1353
+ )
1354
+
1355
+ # Get workflow function
1356
+ workflow_meta = get_workflow(run.workflow_name)
1357
+ if not workflow_meta:
1358
+ raise ValueError(f"Workflow '{run.workflow_name}' not registered")
1359
+
1360
+ # Load event log
1361
+ events = await storage.get_events(run_id)
1362
+
1363
+ # Complete any pending sleeps (mark them as done before resuming)
1364
+ events = await _complete_pending_sleeps(run_id, events, storage)
1365
+
1366
+ # Deserialize arguments
1367
+ args = deserialize_args(run.input_args)
1368
+ kwargs = deserialize_kwargs(run.input_kwargs)
1369
+
1370
+ # Update status to running
1371
+ await storage.update_run_status(run_id=run_id, status=RunStatus.RUNNING)
1372
+
1373
+ # Execute workflow with event replay
1374
+ try:
1375
+ result = await execute_workflow_with_context(
1376
+ workflow_func=workflow_meta.func,
1377
+ run_id=run_id,
1378
+ workflow_name=run.workflow_name,
1379
+ storage=storage,
1380
+ args=args,
1381
+ kwargs=kwargs,
1382
+ event_log=events,
1383
+ cancellation_requested=cancellation_requested,
1384
+ )
1385
+
1386
+ # Update run status to completed
1387
+ await storage.update_run_status(
1388
+ run_id=run_id, status=RunStatus.COMPLETED, result=serialize_args(result)
1389
+ )
1390
+
1391
+ # Clear cancellation flag if any
1392
+ await storage.clear_cancellation_flag(run_id)
1393
+
1394
+ # Cancel all running children (TERMINATE policy)
1395
+ await _handle_parent_completion(run_id, RunStatus.COMPLETED, storage)
1396
+
1397
+ # Notify parent if this is a child workflow
1398
+ await _notify_parent_of_child_completion(
1399
+ run=run,
1400
+ storage=storage,
1401
+ storage_config=storage_config,
1402
+ status=RunStatus.COMPLETED,
1403
+ result=serialize_args(result),
1404
+ )
1405
+
1406
+ logger.info(
1407
+ f"Workflow resumed and completed on worker: {run.workflow_name}",
1408
+ run_id=run_id,
1409
+ workflow_name=run.workflow_name,
1410
+ )
1411
+
1412
+ return result
1413
+
1414
+ except CancellationError as e:
1415
+ # Workflow was cancelled
1416
+ cancelled_event = create_workflow_cancelled_event(
1417
+ run_id=run_id,
1418
+ reason=e.reason,
1419
+ cleanup_completed=True,
1420
+ )
1421
+ await storage.record_event(cancelled_event)
1422
+ await storage.update_run_status(run_id=run_id, status=RunStatus.CANCELLED)
1423
+ await storage.clear_cancellation_flag(run_id)
1424
+
1425
+ # Cancel all running children (TERMINATE policy)
1426
+ await _handle_parent_completion(run_id, RunStatus.CANCELLED, storage)
1427
+
1428
+ # Notify parent if this is a child workflow
1429
+ await _notify_parent_of_child_completion(
1430
+ run=run,
1431
+ storage=storage,
1432
+ storage_config=storage_config,
1433
+ status=RunStatus.CANCELLED,
1434
+ error=e.reason,
1435
+ )
1436
+
1437
+ logger.info(
1438
+ f"Workflow cancelled on resume on worker: {run.workflow_name}",
1439
+ run_id=run_id,
1440
+ workflow_name=run.workflow_name,
1441
+ reason=e.reason,
1442
+ )
1443
+
1444
+ return None
1445
+
1446
+ except SuspensionSignal as e:
1447
+ # Workflow suspended again
1448
+ await storage.update_run_status(run_id=run_id, status=RunStatus.SUSPENDED)
1449
+
1450
+ logger.info(
1451
+ f"Workflow suspended again on worker: {e.reason}",
1452
+ run_id=run_id,
1453
+ workflow_name=run.workflow_name,
1454
+ reason=e.reason,
1455
+ )
1456
+
1457
+ # Schedule automatic resumption if we have a resume_at time
1458
+ resume_at = e.data.get("resume_at") if e.data else None
1459
+ if resume_at:
1460
+ schedule_workflow_resumption(run_id, resume_at, storage_config=storage_config)
1461
+ logger.info(
1462
+ "Scheduled automatic workflow resumption",
1463
+ run_id=run_id,
1464
+ resume_at=resume_at.isoformat(),
1465
+ )
1466
+
1467
+ return None
1468
+
1469
+ except ContinueAsNewSignal as e:
1470
+ # Workflow continuing as new execution
1471
+ workflow_meta = get_workflow(run.workflow_name)
1472
+ if not workflow_meta:
1473
+ raise ValueError(f"Workflow {run.workflow_name} not registered")
1474
+
1475
+ new_run_id = await _handle_continue_as_new_celery(
1476
+ current_run_id=run_id,
1477
+ workflow_meta=workflow_meta,
1478
+ storage=storage,
1479
+ storage_config=storage_config,
1480
+ new_args=e.workflow_args,
1481
+ new_kwargs=e.workflow_kwargs,
1482
+ parent_run_id=run.parent_run_id,
1483
+ )
1484
+
1485
+ # Cancel all running children (TERMINATE policy)
1486
+ await _handle_parent_completion(run_id, RunStatus.CONTINUED_AS_NEW, storage)
1487
+
1488
+ logger.info(
1489
+ f"Workflow continued as new on resume: {run.workflow_name}",
1490
+ old_run_id=run_id,
1491
+ new_run_id=new_run_id,
1492
+ )
1493
+
1494
+ return None
1495
+
1496
+ except Exception as e:
1497
+ # Workflow failed
1498
+ error_msg = str(e)
1499
+ error_type = type(e).__name__
1500
+ await storage.update_run_status(run_id=run_id, status=RunStatus.FAILED, error=error_msg)
1501
+
1502
+ # Cancel all running children (TERMINATE policy)
1503
+ await _handle_parent_completion(run_id, RunStatus.FAILED, storage)
1504
+
1505
+ # Notify parent if this is a child workflow
1506
+ await _notify_parent_of_child_completion(
1507
+ run=run,
1508
+ storage=storage,
1509
+ storage_config=storage_config,
1510
+ status=RunStatus.FAILED,
1511
+ error=error_msg,
1512
+ error_type=error_type,
1513
+ )
1514
+
1515
+ logger.error(
1516
+ f"Workflow failed on resume on worker: {run.workflow_name}",
1517
+ run_id=run_id,
1518
+ workflow_name=run.workflow_name,
1519
+ error=error_msg,
1520
+ exc_info=True,
1521
+ )
1522
+
1523
+ raise
1524
+
1525
+
1526
+ def _get_storage_backend(config: dict[str, Any] | None = None) -> StorageBackend:
1527
+ """
1528
+ Get storage backend from configuration.
1529
+
1530
+ This is an alias for config_to_storage for backward compatibility.
1531
+ """
1532
+ from pyworkflow.storage.config import config_to_storage
1533
+
1534
+ return config_to_storage(config)
1535
+
1536
+
1537
+ def schedule_workflow_resumption(
1538
+ run_id: str,
1539
+ resume_at: datetime,
1540
+ storage_config: dict[str, Any] | None = None,
1541
+ ) -> None:
1542
+ """
1543
+ Schedule automatic workflow resumption after sleep.
1544
+
1545
+ Args:
1546
+ run_id: Workflow run ID
1547
+ resume_at: When to resume the workflow
1548
+ storage_config: Storage backend configuration to pass to the resume task
1549
+ """
1550
+ from datetime import UTC
1551
+
1552
+ # Calculate delay in seconds
1553
+ now = datetime.now(UTC)
1554
+ delay_seconds = max(0, int((resume_at - now).total_seconds()))
1555
+
1556
+ logger.info(
1557
+ "Scheduling workflow resumption",
1558
+ run_id=run_id,
1559
+ resume_at=resume_at.isoformat(),
1560
+ delay_seconds=delay_seconds,
1561
+ )
1562
+
1563
+ # Schedule the resume task
1564
+ resume_workflow_task.apply_async(
1565
+ args=[run_id],
1566
+ kwargs={"storage_config": storage_config},
1567
+ countdown=delay_seconds,
1568
+ )
1569
+
1570
+
1571
+ async def _handle_parent_completion(
1572
+ run_id: str,
1573
+ status: RunStatus,
1574
+ storage: StorageBackend,
1575
+ ) -> None:
1576
+ """
1577
+ Handle parent workflow completion by cancelling all running children.
1578
+
1579
+ When a parent workflow reaches a terminal state (COMPLETED, FAILED, CANCELLED),
1580
+ all running child workflows are automatically cancelled. This implements the
1581
+ TERMINATE parent close policy.
1582
+
1583
+ Args:
1584
+ run_id: Parent workflow run ID
1585
+ status: Terminal status of the parent workflow
1586
+ storage: Storage backend
1587
+ """
1588
+ from pyworkflow.engine.executor import cancel_workflow
1589
+
1590
+ # Get all non-terminal children
1591
+ children = await storage.get_children(run_id)
1592
+ non_terminal_statuses = {
1593
+ RunStatus.PENDING,
1594
+ RunStatus.RUNNING,
1595
+ RunStatus.SUSPENDED,
1596
+ RunStatus.INTERRUPTED,
1597
+ }
1598
+
1599
+ running_children = [c for c in children if c.status in non_terminal_statuses]
1600
+
1601
+ if not running_children:
1602
+ return
1603
+
1604
+ logger.info(
1605
+ f"Cancelling {len(running_children)} child workflow(s) due to parent {status.value}",
1606
+ parent_run_id=run_id,
1607
+ parent_status=status.value,
1608
+ child_count=len(running_children),
1609
+ )
1610
+
1611
+ # Cancel each running child
1612
+ for child in running_children:
1613
+ try:
1614
+ reason = f"Parent workflow {run_id} {status.value}"
1615
+
1616
+ # Cancel the child workflow
1617
+ await cancel_workflow(
1618
+ run_id=child.run_id,
1619
+ reason=reason,
1620
+ storage=storage,
1621
+ )
1622
+
1623
+ # Find the child_id from parent's events
1624
+ events = await storage.get_events(run_id)
1625
+ child_id = None
1626
+ for event in events:
1627
+ if (
1628
+ event.type == EventType.CHILD_WORKFLOW_STARTED
1629
+ and event.data.get("child_run_id") == child.run_id
1630
+ ):
1631
+ child_id = event.data.get("child_id")
1632
+ break
1633
+
1634
+ # Record cancellation event in parent's log
1635
+ if child_id:
1636
+ cancel_event = create_child_workflow_cancelled_event(
1637
+ run_id=run_id,
1638
+ child_id=child_id,
1639
+ child_run_id=child.run_id,
1640
+ reason=reason,
1641
+ )
1642
+ await storage.record_event(cancel_event)
1643
+
1644
+ logger.info(
1645
+ f"Cancelled child workflow: {child.workflow_name}",
1646
+ parent_run_id=run_id,
1647
+ child_run_id=child.run_id,
1648
+ child_workflow_name=child.workflow_name,
1649
+ )
1650
+
1651
+ except Exception as e:
1652
+ # Log error but don't fail parent completion
1653
+ logger.error(
1654
+ f"Failed to cancel child workflow: {child.workflow_name}",
1655
+ parent_run_id=run_id,
1656
+ child_run_id=child.run_id,
1657
+ error=str(e),
1658
+ )
1659
+
1660
+
1661
+ async def _handle_continue_as_new_celery(
1662
+ current_run_id: str,
1663
+ workflow_meta: WorkflowMetadata,
1664
+ storage: StorageBackend,
1665
+ storage_config: dict[str, Any] | None,
1666
+ new_args: tuple,
1667
+ new_kwargs: dict,
1668
+ parent_run_id: str | None = None,
1669
+ ) -> str:
1670
+ """
1671
+ Handle continue-as-new in Celery context.
1672
+
1673
+ This function:
1674
+ 1. Generates new run_id
1675
+ 2. Records WORKFLOW_CONTINUED_AS_NEW event in current run
1676
+ 3. Updates current run status to CONTINUED_AS_NEW
1677
+ 4. Updates current run's continued_to_run_id
1678
+ 5. Creates new WorkflowRun with continued_from_run_id
1679
+ 6. Schedules new workflow execution via Celery
1680
+
1681
+ Args:
1682
+ current_run_id: The run ID of the current workflow
1683
+ workflow_meta: Workflow metadata
1684
+ storage: Storage backend
1685
+ storage_config: Storage configuration for serialization
1686
+ new_args: Arguments for the new workflow
1687
+ new_kwargs: Keyword arguments for the new workflow
1688
+ parent_run_id: Parent run ID if this is a child workflow
1689
+
1690
+ Returns:
1691
+ New run ID
1692
+ """
1693
+ # Generate new run_id
1694
+ new_run_id = f"run_{uuid.uuid4().hex[:16]}"
1695
+
1696
+ # Serialize arguments
1697
+ args_json = serialize_args(*new_args)
1698
+ kwargs_json = serialize_kwargs(**new_kwargs)
1699
+
1700
+ # Record continuation event in current run's log
1701
+ continuation_event = create_workflow_continued_as_new_event(
1702
+ run_id=current_run_id,
1703
+ new_run_id=new_run_id,
1704
+ args=args_json,
1705
+ kwargs=kwargs_json,
1706
+ )
1707
+ await storage.record_event(continuation_event)
1708
+
1709
+ # Update current run status and link to new run
1710
+ await storage.update_run_status(
1711
+ run_id=current_run_id,
1712
+ status=RunStatus.CONTINUED_AS_NEW,
1713
+ )
1714
+ await storage.update_run_continuation(
1715
+ run_id=current_run_id,
1716
+ continued_to_run_id=new_run_id,
1717
+ )
1718
+
1719
+ # Get current run to copy metadata
1720
+ current_run = await storage.get_run(current_run_id)
1721
+ nesting_depth = current_run.nesting_depth if current_run else 0
1722
+
1723
+ # Create new workflow run linked to current
1724
+ new_run = WorkflowRun(
1725
+ run_id=new_run_id,
1726
+ workflow_name=workflow_meta.name,
1727
+ status=RunStatus.PENDING,
1728
+ created_at=datetime.now(UTC),
1729
+ input_args=args_json,
1730
+ input_kwargs=kwargs_json,
1731
+ continued_from_run_id=current_run_id,
1732
+ nesting_depth=nesting_depth,
1733
+ parent_run_id=parent_run_id,
1734
+ )
1735
+ await storage.create_run(new_run)
1736
+
1737
+ # Schedule new workflow execution via Celery
1738
+ start_workflow_task.delay(
1739
+ workflow_name=workflow_meta.name,
1740
+ args_json=args_json,
1741
+ kwargs_json=kwargs_json,
1742
+ run_id=new_run_id,
1743
+ storage_config=storage_config,
1744
+ )
1745
+
1746
+ return new_run_id