pyworkflow-engine 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dashboard/backend/app/__init__.py +1 -0
- dashboard/backend/app/config.py +32 -0
- dashboard/backend/app/controllers/__init__.py +6 -0
- dashboard/backend/app/controllers/run_controller.py +86 -0
- dashboard/backend/app/controllers/workflow_controller.py +33 -0
- dashboard/backend/app/dependencies/__init__.py +5 -0
- dashboard/backend/app/dependencies/storage.py +50 -0
- dashboard/backend/app/repositories/__init__.py +6 -0
- dashboard/backend/app/repositories/run_repository.py +80 -0
- dashboard/backend/app/repositories/workflow_repository.py +27 -0
- dashboard/backend/app/rest/__init__.py +8 -0
- dashboard/backend/app/rest/v1/__init__.py +12 -0
- dashboard/backend/app/rest/v1/health.py +33 -0
- dashboard/backend/app/rest/v1/runs.py +133 -0
- dashboard/backend/app/rest/v1/workflows.py +41 -0
- dashboard/backend/app/schemas/__init__.py +23 -0
- dashboard/backend/app/schemas/common.py +16 -0
- dashboard/backend/app/schemas/event.py +24 -0
- dashboard/backend/app/schemas/hook.py +25 -0
- dashboard/backend/app/schemas/run.py +54 -0
- dashboard/backend/app/schemas/step.py +28 -0
- dashboard/backend/app/schemas/workflow.py +31 -0
- dashboard/backend/app/server.py +87 -0
- dashboard/backend/app/services/__init__.py +6 -0
- dashboard/backend/app/services/run_service.py +240 -0
- dashboard/backend/app/services/workflow_service.py +155 -0
- dashboard/backend/main.py +18 -0
- docs/concepts/cancellation.mdx +362 -0
- docs/concepts/continue-as-new.mdx +434 -0
- docs/concepts/events.mdx +266 -0
- docs/concepts/fault-tolerance.mdx +370 -0
- docs/concepts/hooks.mdx +552 -0
- docs/concepts/limitations.mdx +167 -0
- docs/concepts/schedules.mdx +775 -0
- docs/concepts/sleep.mdx +312 -0
- docs/concepts/steps.mdx +301 -0
- docs/concepts/workflows.mdx +255 -0
- docs/guides/cli.mdx +942 -0
- docs/guides/configuration.mdx +560 -0
- docs/introduction.mdx +155 -0
- docs/quickstart.mdx +279 -0
- examples/__init__.py +1 -0
- examples/celery/__init__.py +1 -0
- examples/celery/durable/docker-compose.yml +55 -0
- examples/celery/durable/pyworkflow.config.yaml +12 -0
- examples/celery/durable/workflows/__init__.py +122 -0
- examples/celery/durable/workflows/basic.py +87 -0
- examples/celery/durable/workflows/batch_processing.py +102 -0
- examples/celery/durable/workflows/cancellation.py +273 -0
- examples/celery/durable/workflows/child_workflow_patterns.py +240 -0
- examples/celery/durable/workflows/child_workflows.py +202 -0
- examples/celery/durable/workflows/continue_as_new.py +260 -0
- examples/celery/durable/workflows/fault_tolerance.py +210 -0
- examples/celery/durable/workflows/hooks.py +211 -0
- examples/celery/durable/workflows/idempotency.py +112 -0
- examples/celery/durable/workflows/long_running.py +99 -0
- examples/celery/durable/workflows/retries.py +101 -0
- examples/celery/durable/workflows/schedules.py +209 -0
- examples/celery/transient/01_basic_workflow.py +91 -0
- examples/celery/transient/02_fault_tolerance.py +257 -0
- examples/celery/transient/__init__.py +20 -0
- examples/celery/transient/pyworkflow.config.yaml +25 -0
- examples/local/__init__.py +1 -0
- examples/local/durable/01_basic_workflow.py +94 -0
- examples/local/durable/02_file_storage.py +132 -0
- examples/local/durable/03_retries.py +169 -0
- examples/local/durable/04_long_running.py +119 -0
- examples/local/durable/05_event_log.py +145 -0
- examples/local/durable/06_idempotency.py +148 -0
- examples/local/durable/07_hooks.py +334 -0
- examples/local/durable/08_cancellation.py +233 -0
- examples/local/durable/09_child_workflows.py +198 -0
- examples/local/durable/10_child_workflow_patterns.py +265 -0
- examples/local/durable/11_continue_as_new.py +249 -0
- examples/local/durable/12_schedules.py +198 -0
- examples/local/durable/__init__.py +1 -0
- examples/local/transient/01_quick_tasks.py +87 -0
- examples/local/transient/02_retries.py +130 -0
- examples/local/transient/03_sleep.py +141 -0
- examples/local/transient/__init__.py +1 -0
- pyworkflow/__init__.py +256 -0
- pyworkflow/aws/__init__.py +68 -0
- pyworkflow/aws/context.py +234 -0
- pyworkflow/aws/handler.py +184 -0
- pyworkflow/aws/testing.py +310 -0
- pyworkflow/celery/__init__.py +41 -0
- pyworkflow/celery/app.py +198 -0
- pyworkflow/celery/scheduler.py +315 -0
- pyworkflow/celery/tasks.py +1746 -0
- pyworkflow/cli/__init__.py +132 -0
- pyworkflow/cli/__main__.py +6 -0
- pyworkflow/cli/commands/__init__.py +1 -0
- pyworkflow/cli/commands/hooks.py +640 -0
- pyworkflow/cli/commands/quickstart.py +495 -0
- pyworkflow/cli/commands/runs.py +773 -0
- pyworkflow/cli/commands/scheduler.py +130 -0
- pyworkflow/cli/commands/schedules.py +794 -0
- pyworkflow/cli/commands/setup.py +703 -0
- pyworkflow/cli/commands/worker.py +413 -0
- pyworkflow/cli/commands/workflows.py +1257 -0
- pyworkflow/cli/output/__init__.py +1 -0
- pyworkflow/cli/output/formatters.py +321 -0
- pyworkflow/cli/output/styles.py +121 -0
- pyworkflow/cli/utils/__init__.py +1 -0
- pyworkflow/cli/utils/async_helpers.py +30 -0
- pyworkflow/cli/utils/config.py +130 -0
- pyworkflow/cli/utils/config_generator.py +344 -0
- pyworkflow/cli/utils/discovery.py +53 -0
- pyworkflow/cli/utils/docker_manager.py +651 -0
- pyworkflow/cli/utils/interactive.py +364 -0
- pyworkflow/cli/utils/storage.py +115 -0
- pyworkflow/config.py +329 -0
- pyworkflow/context/__init__.py +63 -0
- pyworkflow/context/aws.py +230 -0
- pyworkflow/context/base.py +416 -0
- pyworkflow/context/local.py +930 -0
- pyworkflow/context/mock.py +381 -0
- pyworkflow/core/__init__.py +0 -0
- pyworkflow/core/exceptions.py +353 -0
- pyworkflow/core/registry.py +313 -0
- pyworkflow/core/scheduled.py +328 -0
- pyworkflow/core/step.py +494 -0
- pyworkflow/core/workflow.py +294 -0
- pyworkflow/discovery.py +248 -0
- pyworkflow/engine/__init__.py +0 -0
- pyworkflow/engine/events.py +879 -0
- pyworkflow/engine/executor.py +682 -0
- pyworkflow/engine/replay.py +273 -0
- pyworkflow/observability/__init__.py +19 -0
- pyworkflow/observability/logging.py +234 -0
- pyworkflow/primitives/__init__.py +33 -0
- pyworkflow/primitives/child_handle.py +174 -0
- pyworkflow/primitives/child_workflow.py +372 -0
- pyworkflow/primitives/continue_as_new.py +101 -0
- pyworkflow/primitives/define_hook.py +150 -0
- pyworkflow/primitives/hooks.py +97 -0
- pyworkflow/primitives/resume_hook.py +210 -0
- pyworkflow/primitives/schedule.py +545 -0
- pyworkflow/primitives/shield.py +96 -0
- pyworkflow/primitives/sleep.py +100 -0
- pyworkflow/runtime/__init__.py +21 -0
- pyworkflow/runtime/base.py +179 -0
- pyworkflow/runtime/celery.py +310 -0
- pyworkflow/runtime/factory.py +101 -0
- pyworkflow/runtime/local.py +706 -0
- pyworkflow/scheduler/__init__.py +9 -0
- pyworkflow/scheduler/local.py +248 -0
- pyworkflow/serialization/__init__.py +0 -0
- pyworkflow/serialization/decoder.py +146 -0
- pyworkflow/serialization/encoder.py +162 -0
- pyworkflow/storage/__init__.py +54 -0
- pyworkflow/storage/base.py +612 -0
- pyworkflow/storage/config.py +185 -0
- pyworkflow/storage/dynamodb.py +1315 -0
- pyworkflow/storage/file.py +827 -0
- pyworkflow/storage/memory.py +549 -0
- pyworkflow/storage/postgres.py +1161 -0
- pyworkflow/storage/schemas.py +486 -0
- pyworkflow/storage/sqlite.py +1136 -0
- pyworkflow/utils/__init__.py +0 -0
- pyworkflow/utils/duration.py +177 -0
- pyworkflow/utils/schedule.py +391 -0
- pyworkflow_engine-0.1.7.dist-info/METADATA +687 -0
- pyworkflow_engine-0.1.7.dist-info/RECORD +196 -0
- pyworkflow_engine-0.1.7.dist-info/WHEEL +5 -0
- pyworkflow_engine-0.1.7.dist-info/entry_points.txt +2 -0
- pyworkflow_engine-0.1.7.dist-info/licenses/LICENSE +21 -0
- pyworkflow_engine-0.1.7.dist-info/top_level.txt +5 -0
- tests/examples/__init__.py +0 -0
- tests/integration/__init__.py +0 -0
- tests/integration/test_cancellation.py +330 -0
- tests/integration/test_child_workflows.py +439 -0
- tests/integration/test_continue_as_new.py +428 -0
- tests/integration/test_dynamodb_storage.py +1146 -0
- tests/integration/test_fault_tolerance.py +369 -0
- tests/integration/test_schedule_storage.py +484 -0
- tests/unit/__init__.py +0 -0
- tests/unit/backends/__init__.py +1 -0
- tests/unit/backends/test_dynamodb_storage.py +1554 -0
- tests/unit/backends/test_postgres_storage.py +1281 -0
- tests/unit/backends/test_sqlite_storage.py +1460 -0
- tests/unit/conftest.py +41 -0
- tests/unit/test_cancellation.py +364 -0
- tests/unit/test_child_workflows.py +680 -0
- tests/unit/test_continue_as_new.py +441 -0
- tests/unit/test_event_limits.py +316 -0
- tests/unit/test_executor.py +320 -0
- tests/unit/test_fault_tolerance.py +334 -0
- tests/unit/test_hooks.py +495 -0
- tests/unit/test_registry.py +261 -0
- tests/unit/test_replay.py +420 -0
- tests/unit/test_schedule_schemas.py +285 -0
- tests/unit/test_schedule_utils.py +286 -0
- tests/unit/test_scheduled_workflow.py +274 -0
- tests/unit/test_step.py +353 -0
- tests/unit/test_workflow.py +243 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Sleep primitive for workflow delays.
|
|
3
|
+
|
|
4
|
+
Allows workflows to pause execution for a specified duration without
|
|
5
|
+
holding resources. The workflow will suspend and can be resumed after
|
|
6
|
+
the delay period.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
from datetime import UTC, datetime, timedelta
|
|
11
|
+
|
|
12
|
+
from loguru import logger
|
|
13
|
+
|
|
14
|
+
from pyworkflow.context import get_context, has_context
|
|
15
|
+
from pyworkflow.utils.duration import parse_duration
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def sleep(
|
|
19
|
+
duration: str | int | float | timedelta | datetime,
|
|
20
|
+
name: str | None = None,
|
|
21
|
+
) -> None:
|
|
22
|
+
"""
|
|
23
|
+
Suspend workflow execution for a specified duration.
|
|
24
|
+
|
|
25
|
+
Different contexts handle sleep differently:
|
|
26
|
+
- MockContext: Skips sleep (configurable)
|
|
27
|
+
- LocalContext: Durable sleep with event sourcing
|
|
28
|
+
- AWSContext: AWS native wait (no compute charges)
|
|
29
|
+
|
|
30
|
+
If called outside a workflow context, falls back to asyncio.sleep.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
duration: How long to sleep:
|
|
34
|
+
- str: Duration string ("5s", "2m", "1h", "3d", "1w")
|
|
35
|
+
- int/float: Seconds
|
|
36
|
+
- timedelta: Time duration
|
|
37
|
+
- datetime: Sleep until this specific time
|
|
38
|
+
name: Optional name for this sleep (for debugging)
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
# Sleep for 30 seconds
|
|
42
|
+
await sleep("30s")
|
|
43
|
+
|
|
44
|
+
# Sleep for 5 minutes
|
|
45
|
+
await sleep("5m")
|
|
46
|
+
await sleep(300) # Same as above
|
|
47
|
+
|
|
48
|
+
# Sleep for 1 hour
|
|
49
|
+
await sleep("1h")
|
|
50
|
+
await sleep(timedelta(hours=1))
|
|
51
|
+
|
|
52
|
+
# Named sleep for debugging
|
|
53
|
+
await sleep("5m", name="wait_for_rate_limit")
|
|
54
|
+
"""
|
|
55
|
+
# Check for workflow context
|
|
56
|
+
if has_context():
|
|
57
|
+
ctx = get_context()
|
|
58
|
+
duration_seconds = _calculate_delay_seconds(duration)
|
|
59
|
+
|
|
60
|
+
logger.debug(
|
|
61
|
+
f"Sleep {duration_seconds}s via {ctx.__class__.__name__}",
|
|
62
|
+
run_id=ctx.run_id,
|
|
63
|
+
workflow_name=ctx.workflow_name,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
await ctx.sleep(duration_seconds)
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
# No context available - use regular asyncio.sleep
|
|
70
|
+
duration_seconds = _calculate_delay_seconds(duration)
|
|
71
|
+
logger.debug(
|
|
72
|
+
f"Sleep called outside workflow context, using asyncio.sleep for {duration_seconds}s"
|
|
73
|
+
)
|
|
74
|
+
await asyncio.sleep(duration_seconds)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _calculate_resume_time(duration: str | int | float | timedelta | datetime) -> datetime:
|
|
78
|
+
"""Calculate when the sleep should resume."""
|
|
79
|
+
if isinstance(duration, datetime):
|
|
80
|
+
return duration
|
|
81
|
+
|
|
82
|
+
delay_seconds = _calculate_delay_seconds(duration)
|
|
83
|
+
return datetime.now(UTC) + timedelta(seconds=delay_seconds)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _calculate_delay_seconds(duration: str | int | float | timedelta | datetime) -> int:
|
|
87
|
+
"""Calculate delay in seconds."""
|
|
88
|
+
if isinstance(duration, datetime):
|
|
89
|
+
now = datetime.now(UTC)
|
|
90
|
+
if duration <= now:
|
|
91
|
+
raise ValueError(f"Cannot sleep until past time: {duration} (now: {now})")
|
|
92
|
+
delta = duration - now
|
|
93
|
+
return int(delta.total_seconds())
|
|
94
|
+
|
|
95
|
+
if isinstance(duration, timedelta):
|
|
96
|
+
return int(duration.total_seconds())
|
|
97
|
+
elif isinstance(duration, str):
|
|
98
|
+
return parse_duration(duration)
|
|
99
|
+
else:
|
|
100
|
+
return int(duration)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PyWorkflow Runtime Abstraction Layer.
|
|
3
|
+
|
|
4
|
+
Runtimes determine WHERE workflow code executes:
|
|
5
|
+
- LocalRuntime: In-process execution (for CI, testing, simple scripts)
|
|
6
|
+
- CeleryRuntime: Distributed execution via Celery workers
|
|
7
|
+
- LambdaRuntime: AWS Lambda execution (future)
|
|
8
|
+
- DurableLambdaRuntime: AWS Durable Lambda execution (future)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pyworkflow.runtime.base import Runtime
|
|
12
|
+
from pyworkflow.runtime.factory import get_runtime, register_runtime, validate_runtime_durable
|
|
13
|
+
from pyworkflow.runtime.local import LocalRuntime
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"Runtime",
|
|
17
|
+
"LocalRuntime",
|
|
18
|
+
"get_runtime",
|
|
19
|
+
"register_runtime",
|
|
20
|
+
"validate_runtime_durable",
|
|
21
|
+
]
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Abstract base class for workflow execution runtimes.
|
|
3
|
+
|
|
4
|
+
Runtimes are responsible for:
|
|
5
|
+
- Starting workflow executions
|
|
6
|
+
- Resuming suspended workflows
|
|
7
|
+
- Scheduling wake-up times for sleeps
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from pyworkflow.storage.base import StorageBackend
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Runtime(ABC):
|
|
20
|
+
"""
|
|
21
|
+
Abstract base class for workflow execution runtimes.
|
|
22
|
+
|
|
23
|
+
A runtime determines WHERE and HOW workflow code executes.
|
|
24
|
+
Different runtimes support different capabilities (durable vs transient).
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
async def start_workflow(
|
|
29
|
+
self,
|
|
30
|
+
workflow_func: Callable[..., Any],
|
|
31
|
+
args: tuple,
|
|
32
|
+
kwargs: dict,
|
|
33
|
+
run_id: str,
|
|
34
|
+
workflow_name: str,
|
|
35
|
+
storage: Optional["StorageBackend"],
|
|
36
|
+
durable: bool,
|
|
37
|
+
idempotency_key: str | None = None,
|
|
38
|
+
max_duration: str | None = None,
|
|
39
|
+
metadata: dict | None = None,
|
|
40
|
+
) -> str:
|
|
41
|
+
"""
|
|
42
|
+
Start a new workflow execution.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
workflow_func: The workflow function to execute
|
|
46
|
+
args: Positional arguments for the workflow
|
|
47
|
+
kwargs: Keyword arguments for the workflow
|
|
48
|
+
run_id: Unique identifier for this run
|
|
49
|
+
workflow_name: Name of the workflow
|
|
50
|
+
storage: Storage backend (None for transient workflows)
|
|
51
|
+
durable: Whether this is a durable workflow
|
|
52
|
+
idempotency_key: Optional key for idempotent execution
|
|
53
|
+
max_duration: Optional maximum duration for the workflow
|
|
54
|
+
metadata: Optional metadata dictionary
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
The run_id of the started workflow
|
|
58
|
+
"""
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
@abstractmethod
|
|
62
|
+
async def resume_workflow(
|
|
63
|
+
self,
|
|
64
|
+
run_id: str,
|
|
65
|
+
storage: "StorageBackend",
|
|
66
|
+
) -> Any:
|
|
67
|
+
"""
|
|
68
|
+
Resume a suspended workflow.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
run_id: The run_id of the workflow to resume
|
|
72
|
+
storage: Storage backend containing workflow state
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
The result of the workflow execution
|
|
76
|
+
"""
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
async def schedule_wake(
|
|
81
|
+
self,
|
|
82
|
+
run_id: str,
|
|
83
|
+
wake_time: datetime,
|
|
84
|
+
storage: "StorageBackend",
|
|
85
|
+
) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Schedule a workflow to be resumed at a specific time.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
run_id: The run_id of the workflow to wake
|
|
91
|
+
wake_time: When to resume the workflow
|
|
92
|
+
storage: Storage backend
|
|
93
|
+
"""
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
async def schedule_resume(
|
|
97
|
+
self,
|
|
98
|
+
run_id: str,
|
|
99
|
+
storage: "StorageBackend",
|
|
100
|
+
) -> None:
|
|
101
|
+
"""
|
|
102
|
+
Schedule a workflow to be resumed immediately.
|
|
103
|
+
|
|
104
|
+
This is called by resume_hook() after recording the hook event.
|
|
105
|
+
Each runtime implements this differently:
|
|
106
|
+
- CeleryRuntime: Schedules an async Celery task
|
|
107
|
+
- LocalRuntime: Calls resume_workflow directly (in-process)
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
run_id: The run_id of the workflow to resume
|
|
111
|
+
storage: Storage backend
|
|
112
|
+
"""
|
|
113
|
+
# Default implementation: no-op
|
|
114
|
+
# Subclasses override if they support async scheduling
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
@abstractmethod
|
|
118
|
+
async def start_child_workflow(
|
|
119
|
+
self,
|
|
120
|
+
workflow_func: Callable[..., Any],
|
|
121
|
+
args: tuple,
|
|
122
|
+
kwargs: dict,
|
|
123
|
+
child_run_id: str,
|
|
124
|
+
workflow_name: str,
|
|
125
|
+
storage: "StorageBackend",
|
|
126
|
+
parent_run_id: str,
|
|
127
|
+
child_id: str,
|
|
128
|
+
wait_for_completion: bool,
|
|
129
|
+
) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Start a child workflow execution (fire-and-forget).
|
|
132
|
+
|
|
133
|
+
Child workflows run in the background and notify the parent
|
|
134
|
+
when completed/failed. If wait_for_completion=True, the parent
|
|
135
|
+
will be resumed when the child finishes.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
workflow_func: The child workflow function to execute
|
|
139
|
+
args: Positional arguments for the child workflow
|
|
140
|
+
kwargs: Keyword arguments for the child workflow
|
|
141
|
+
child_run_id: Unique identifier for the child run
|
|
142
|
+
workflow_name: Name of the child workflow
|
|
143
|
+
storage: Storage backend
|
|
144
|
+
parent_run_id: Run ID of the parent workflow
|
|
145
|
+
child_id: Deterministic child ID for replay
|
|
146
|
+
wait_for_completion: Whether parent is waiting for child to complete
|
|
147
|
+
"""
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
@abstractmethod
|
|
152
|
+
def name(self) -> str:
|
|
153
|
+
"""
|
|
154
|
+
Runtime identifier.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
String identifier for this runtime (e.g., "local", "celery")
|
|
158
|
+
"""
|
|
159
|
+
pass
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def supports_durable(self) -> bool:
|
|
163
|
+
"""
|
|
164
|
+
Whether this runtime supports durable (event-sourced) workflows.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
True if durable workflows are supported
|
|
168
|
+
"""
|
|
169
|
+
return True
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def supports_transient(self) -> bool:
|
|
173
|
+
"""
|
|
174
|
+
Whether this runtime supports transient (non-durable) workflows.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
True if transient workflows are supported
|
|
178
|
+
"""
|
|
179
|
+
return True
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Celery runtime - executes workflows on distributed Celery workers.
|
|
3
|
+
|
|
4
|
+
The Celery runtime is ideal for:
|
|
5
|
+
- Production deployments
|
|
6
|
+
- Distributed execution across multiple workers
|
|
7
|
+
- Long-running workflows with sleeps and webhooks
|
|
8
|
+
- High availability and scalability
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
15
|
+
|
|
16
|
+
from loguru import logger
|
|
17
|
+
|
|
18
|
+
from pyworkflow.runtime.base import Runtime
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from pyworkflow.storage.base import StorageBackend
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CeleryRuntime(Runtime):
|
|
25
|
+
"""
|
|
26
|
+
Execute workflows on distributed Celery workers.
|
|
27
|
+
|
|
28
|
+
This runtime dispatches workflow execution to Celery workers,
|
|
29
|
+
enabling distributed processing and automatic resumption of
|
|
30
|
+
suspended workflows.
|
|
31
|
+
|
|
32
|
+
Note: This runtime only supports durable workflows since
|
|
33
|
+
Celery execution requires state persistence for proper
|
|
34
|
+
task routing and resumption.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
broker_url: str | None = None,
|
|
40
|
+
result_backend: str | None = None,
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
Initialize Celery runtime.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
broker_url: Celery broker URL (default: from env or redis://localhost:6379/0)
|
|
47
|
+
result_backend: Result backend URL (default: from env or redis://localhost:6379/1)
|
|
48
|
+
"""
|
|
49
|
+
self._broker_url: str = (
|
|
50
|
+
broker_url
|
|
51
|
+
or os.getenv("PYWORKFLOW_CELERY_BROKER", "redis://localhost:6379/0")
|
|
52
|
+
or "redis://localhost:6379/0"
|
|
53
|
+
)
|
|
54
|
+
self._result_backend: str = (
|
|
55
|
+
result_backend
|
|
56
|
+
or os.getenv("PYWORKFLOW_CELERY_RESULT_BACKEND", "redis://localhost:6379/1")
|
|
57
|
+
or "redis://localhost:6379/1"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def name(self) -> str:
|
|
62
|
+
return "celery"
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def supports_durable(self) -> bool:
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def supports_transient(self) -> bool:
|
|
70
|
+
# Celery runtime requires durable workflows for proper state management
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def broker_url(self) -> str:
|
|
75
|
+
"""Get the configured broker URL."""
|
|
76
|
+
return self._broker_url
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def result_backend(self) -> str:
|
|
80
|
+
"""Get the configured result backend URL."""
|
|
81
|
+
return self._result_backend
|
|
82
|
+
|
|
83
|
+
def _get_storage_config(self, storage: Optional["StorageBackend"]) -> dict | None:
|
|
84
|
+
"""
|
|
85
|
+
Convert storage backend to configuration dict for Celery tasks.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
storage: Storage backend instance
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Configuration dict or None
|
|
92
|
+
"""
|
|
93
|
+
from pyworkflow.storage.config import storage_to_config
|
|
94
|
+
|
|
95
|
+
config = storage_to_config(storage)
|
|
96
|
+
|
|
97
|
+
# In-memory storage cannot be shared across Celery workers
|
|
98
|
+
if config and config.get("type") == "memory":
|
|
99
|
+
logger.warning(
|
|
100
|
+
"InMemoryStorageBackend cannot be used with Celery runtime. "
|
|
101
|
+
"Falling back to FileStorageBackend."
|
|
102
|
+
)
|
|
103
|
+
return {"type": "file"}
|
|
104
|
+
|
|
105
|
+
return config
|
|
106
|
+
|
|
107
|
+
async def start_workflow(
|
|
108
|
+
self,
|
|
109
|
+
workflow_func: Callable[..., Any],
|
|
110
|
+
args: tuple,
|
|
111
|
+
kwargs: dict,
|
|
112
|
+
run_id: str,
|
|
113
|
+
workflow_name: str,
|
|
114
|
+
storage: Optional["StorageBackend"],
|
|
115
|
+
durable: bool,
|
|
116
|
+
idempotency_key: str | None = None,
|
|
117
|
+
max_duration: str | None = None,
|
|
118
|
+
metadata: dict | None = None,
|
|
119
|
+
) -> str:
|
|
120
|
+
"""
|
|
121
|
+
Start a workflow execution by dispatching to Celery workers.
|
|
122
|
+
|
|
123
|
+
The workflow will be queued and executed by an available worker.
|
|
124
|
+
"""
|
|
125
|
+
from pyworkflow.celery.tasks import start_workflow_task
|
|
126
|
+
from pyworkflow.serialization.encoder import serialize_args, serialize_kwargs
|
|
127
|
+
|
|
128
|
+
if not durable:
|
|
129
|
+
raise ValueError(
|
|
130
|
+
"Celery runtime requires durable=True. "
|
|
131
|
+
"Use the 'local' runtime for transient workflows."
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
logger.info(
|
|
135
|
+
f"Dispatching workflow to Celery: {workflow_name}",
|
|
136
|
+
run_id=run_id,
|
|
137
|
+
workflow_name=workflow_name,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Serialize arguments for Celery transport
|
|
141
|
+
args_json = serialize_args(*args)
|
|
142
|
+
kwargs_json = serialize_kwargs(**kwargs)
|
|
143
|
+
|
|
144
|
+
# Get storage configuration for workers
|
|
145
|
+
storage_config = self._get_storage_config(storage)
|
|
146
|
+
|
|
147
|
+
# Dispatch to Celery worker
|
|
148
|
+
task_result = start_workflow_task.delay(
|
|
149
|
+
workflow_name=workflow_name,
|
|
150
|
+
args_json=args_json,
|
|
151
|
+
kwargs_json=kwargs_json,
|
|
152
|
+
run_id=run_id,
|
|
153
|
+
storage_config=storage_config,
|
|
154
|
+
idempotency_key=idempotency_key,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
logger.info(
|
|
158
|
+
f"Workflow dispatched to Celery: {workflow_name}",
|
|
159
|
+
run_id=run_id,
|
|
160
|
+
task_id=task_result.id,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Return the run_id (the actual run_id is generated by the worker)
|
|
164
|
+
# For now, we return a pending status indicator
|
|
165
|
+
# The actual run_id can be obtained from the task result
|
|
166
|
+
return run_id
|
|
167
|
+
|
|
168
|
+
async def resume_workflow(
|
|
169
|
+
self,
|
|
170
|
+
run_id: str,
|
|
171
|
+
storage: "StorageBackend",
|
|
172
|
+
) -> Any:
|
|
173
|
+
"""
|
|
174
|
+
Resume a suspended workflow by dispatching to Celery workers.
|
|
175
|
+
"""
|
|
176
|
+
from pyworkflow.celery.tasks import resume_workflow_task
|
|
177
|
+
|
|
178
|
+
logger.info(
|
|
179
|
+
f"Dispatching workflow resume to Celery: {run_id}",
|
|
180
|
+
run_id=run_id,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Get storage configuration for workers
|
|
184
|
+
storage_config = self._get_storage_config(storage)
|
|
185
|
+
|
|
186
|
+
# Dispatch to Celery worker
|
|
187
|
+
task_result = resume_workflow_task.delay(
|
|
188
|
+
run_id=run_id,
|
|
189
|
+
storage_config=storage_config,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
logger.info(
|
|
193
|
+
f"Workflow resume dispatched to Celery: {run_id}",
|
|
194
|
+
run_id=run_id,
|
|
195
|
+
task_id=task_result.id,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Return None since the actual result will be available asynchronously
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
async def schedule_resume(
|
|
202
|
+
self,
|
|
203
|
+
run_id: str,
|
|
204
|
+
storage: "StorageBackend",
|
|
205
|
+
) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Schedule immediate workflow resumption via Celery task.
|
|
208
|
+
|
|
209
|
+
This is called by resume_hook() to trigger workflow resumption
|
|
210
|
+
after a hook event is received.
|
|
211
|
+
"""
|
|
212
|
+
from pyworkflow.celery.tasks import resume_workflow_task
|
|
213
|
+
|
|
214
|
+
logger.info(
|
|
215
|
+
f"Scheduling workflow resume via Celery: {run_id}",
|
|
216
|
+
run_id=run_id,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
storage_config = self._get_storage_config(storage)
|
|
220
|
+
|
|
221
|
+
resume_workflow_task.apply_async(
|
|
222
|
+
args=[run_id],
|
|
223
|
+
kwargs={"storage_config": storage_config},
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
logger.info(
|
|
227
|
+
f"Workflow resume scheduled: {run_id}",
|
|
228
|
+
run_id=run_id,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
async def schedule_wake(
|
|
232
|
+
self,
|
|
233
|
+
run_id: str,
|
|
234
|
+
wake_time: datetime,
|
|
235
|
+
storage: "StorageBackend",
|
|
236
|
+
) -> None:
|
|
237
|
+
"""
|
|
238
|
+
Schedule workflow resumption at a specific time using Celery.
|
|
239
|
+
|
|
240
|
+
Uses Celery's countdown feature to delay task execution.
|
|
241
|
+
"""
|
|
242
|
+
from pyworkflow.celery.tasks import schedule_workflow_resumption
|
|
243
|
+
|
|
244
|
+
logger.info(
|
|
245
|
+
f"Scheduling workflow wake via Celery: {run_id}",
|
|
246
|
+
run_id=run_id,
|
|
247
|
+
wake_time=wake_time.isoformat(),
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# Use the existing schedule function which handles the delay calculation
|
|
251
|
+
schedule_workflow_resumption(run_id, wake_time)
|
|
252
|
+
|
|
253
|
+
logger.info(
|
|
254
|
+
f"Workflow wake scheduled: {run_id}",
|
|
255
|
+
run_id=run_id,
|
|
256
|
+
wake_time=wake_time.isoformat(),
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
async def start_child_workflow(
|
|
260
|
+
self,
|
|
261
|
+
workflow_func: Callable[..., Any],
|
|
262
|
+
args: tuple,
|
|
263
|
+
kwargs: dict,
|
|
264
|
+
child_run_id: str,
|
|
265
|
+
workflow_name: str,
|
|
266
|
+
storage: "StorageBackend",
|
|
267
|
+
parent_run_id: str,
|
|
268
|
+
child_id: str,
|
|
269
|
+
wait_for_completion: bool,
|
|
270
|
+
) -> None:
|
|
271
|
+
"""
|
|
272
|
+
Start a child workflow via Celery (fire-and-forget).
|
|
273
|
+
|
|
274
|
+
Dispatches child workflow execution to a Celery worker. The worker
|
|
275
|
+
will handle parent notification and resumption when the child completes.
|
|
276
|
+
"""
|
|
277
|
+
from pyworkflow.celery.tasks import start_child_workflow_task
|
|
278
|
+
from pyworkflow.serialization.encoder import serialize_args, serialize_kwargs
|
|
279
|
+
|
|
280
|
+
logger.info(
|
|
281
|
+
f"Dispatching child workflow to Celery: {workflow_name}",
|
|
282
|
+
child_run_id=child_run_id,
|
|
283
|
+
parent_run_id=parent_run_id,
|
|
284
|
+
child_id=child_id,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# Serialize arguments for Celery transport
|
|
288
|
+
args_json = serialize_args(*args)
|
|
289
|
+
kwargs_json = serialize_kwargs(**kwargs)
|
|
290
|
+
|
|
291
|
+
# Get storage configuration for workers
|
|
292
|
+
storage_config = self._get_storage_config(storage)
|
|
293
|
+
|
|
294
|
+
# Dispatch to Celery worker
|
|
295
|
+
task_result = start_child_workflow_task.delay(
|
|
296
|
+
workflow_name=workflow_name,
|
|
297
|
+
args_json=args_json,
|
|
298
|
+
kwargs_json=kwargs_json,
|
|
299
|
+
child_run_id=child_run_id,
|
|
300
|
+
storage_config=storage_config,
|
|
301
|
+
parent_run_id=parent_run_id,
|
|
302
|
+
child_id=child_id,
|
|
303
|
+
wait_for_completion=wait_for_completion,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
logger.info(
|
|
307
|
+
f"Child workflow dispatched to Celery: {workflow_name}",
|
|
308
|
+
child_run_id=child_run_id,
|
|
309
|
+
task_id=task_result.id,
|
|
310
|
+
)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Runtime factory and registration.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- Runtime registration and lookup
|
|
6
|
+
- Validation of runtime + durable combinations
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from pyworkflow.runtime.base import Runtime
|
|
10
|
+
|
|
11
|
+
# Runtime registry
|
|
12
|
+
_runtimes: dict[str, type[Runtime]] = {}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def register_runtime(name: str, runtime_class: type[Runtime]) -> None:
|
|
16
|
+
"""
|
|
17
|
+
Register a runtime implementation.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
name: Runtime identifier (e.g., "local", "celery")
|
|
21
|
+
runtime_class: Runtime class to register
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
>>> from pyworkflow.runtime import register_runtime
|
|
25
|
+
>>> from myapp.runtime import CustomRuntime
|
|
26
|
+
>>> register_runtime("custom", CustomRuntime)
|
|
27
|
+
"""
|
|
28
|
+
_runtimes[name] = runtime_class
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_runtime(name: str) -> Runtime:
|
|
32
|
+
"""
|
|
33
|
+
Get a runtime instance by name.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
name: Runtime identifier
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Runtime instance
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
ValueError: If runtime is not registered
|
|
43
|
+
"""
|
|
44
|
+
if name not in _runtimes:
|
|
45
|
+
available = ", ".join(sorted(_runtimes.keys())) or "(none registered)"
|
|
46
|
+
raise ValueError(f"Unknown runtime: '{name}'. Available runtimes: {available}")
|
|
47
|
+
return _runtimes[name]()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def validate_runtime_durable(runtime: Runtime, durable: bool) -> None:
|
|
51
|
+
"""
|
|
52
|
+
Validate that a runtime supports the requested durability mode.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
runtime: Runtime instance
|
|
56
|
+
durable: Whether durable mode is requested
|
|
57
|
+
|
|
58
|
+
Raises:
|
|
59
|
+
ValueError: If the combination is not supported
|
|
60
|
+
"""
|
|
61
|
+
if durable and not runtime.supports_durable:
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"Runtime '{runtime.name}' does not support durable workflows. "
|
|
64
|
+
f"Use durable=False or choose a different runtime."
|
|
65
|
+
)
|
|
66
|
+
if not durable and not runtime.supports_transient:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"Runtime '{runtime.name}' requires durable=True. "
|
|
69
|
+
f"This runtime does not support transient workflows."
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def list_runtimes() -> dict[str, type[Runtime]]:
|
|
74
|
+
"""
|
|
75
|
+
List all registered runtimes.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Dictionary of runtime name -> runtime class
|
|
79
|
+
"""
|
|
80
|
+
return dict(_runtimes)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# Register built-in runtimes
|
|
84
|
+
def _register_builtin_runtimes() -> None:
|
|
85
|
+
"""Register built-in runtimes."""
|
|
86
|
+
from pyworkflow.runtime.local import LocalRuntime
|
|
87
|
+
|
|
88
|
+
register_runtime("local", LocalRuntime)
|
|
89
|
+
|
|
90
|
+
# Register Celery runtime (lazy import to avoid circular deps)
|
|
91
|
+
try:
|
|
92
|
+
from pyworkflow.runtime.celery import CeleryRuntime
|
|
93
|
+
|
|
94
|
+
register_runtime("celery", CeleryRuntime)
|
|
95
|
+
except ImportError:
|
|
96
|
+
# Celery not installed, skip registration
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# Auto-register on import
|
|
101
|
+
_register_builtin_runtimes()
|