pyworkflow-engine 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dashboard/backend/app/__init__.py +1 -0
- dashboard/backend/app/config.py +32 -0
- dashboard/backend/app/controllers/__init__.py +6 -0
- dashboard/backend/app/controllers/run_controller.py +86 -0
- dashboard/backend/app/controllers/workflow_controller.py +33 -0
- dashboard/backend/app/dependencies/__init__.py +5 -0
- dashboard/backend/app/dependencies/storage.py +50 -0
- dashboard/backend/app/repositories/__init__.py +6 -0
- dashboard/backend/app/repositories/run_repository.py +80 -0
- dashboard/backend/app/repositories/workflow_repository.py +27 -0
- dashboard/backend/app/rest/__init__.py +8 -0
- dashboard/backend/app/rest/v1/__init__.py +12 -0
- dashboard/backend/app/rest/v1/health.py +33 -0
- dashboard/backend/app/rest/v1/runs.py +133 -0
- dashboard/backend/app/rest/v1/workflows.py +41 -0
- dashboard/backend/app/schemas/__init__.py +23 -0
- dashboard/backend/app/schemas/common.py +16 -0
- dashboard/backend/app/schemas/event.py +24 -0
- dashboard/backend/app/schemas/hook.py +25 -0
- dashboard/backend/app/schemas/run.py +54 -0
- dashboard/backend/app/schemas/step.py +28 -0
- dashboard/backend/app/schemas/workflow.py +31 -0
- dashboard/backend/app/server.py +87 -0
- dashboard/backend/app/services/__init__.py +6 -0
- dashboard/backend/app/services/run_service.py +240 -0
- dashboard/backend/app/services/workflow_service.py +155 -0
- dashboard/backend/main.py +18 -0
- docs/concepts/cancellation.mdx +362 -0
- docs/concepts/continue-as-new.mdx +434 -0
- docs/concepts/events.mdx +266 -0
- docs/concepts/fault-tolerance.mdx +370 -0
- docs/concepts/hooks.mdx +552 -0
- docs/concepts/limitations.mdx +167 -0
- docs/concepts/schedules.mdx +775 -0
- docs/concepts/sleep.mdx +312 -0
- docs/concepts/steps.mdx +301 -0
- docs/concepts/workflows.mdx +255 -0
- docs/guides/cli.mdx +942 -0
- docs/guides/configuration.mdx +560 -0
- docs/introduction.mdx +155 -0
- docs/quickstart.mdx +279 -0
- examples/__init__.py +1 -0
- examples/celery/__init__.py +1 -0
- examples/celery/durable/docker-compose.yml +55 -0
- examples/celery/durable/pyworkflow.config.yaml +12 -0
- examples/celery/durable/workflows/__init__.py +122 -0
- examples/celery/durable/workflows/basic.py +87 -0
- examples/celery/durable/workflows/batch_processing.py +102 -0
- examples/celery/durable/workflows/cancellation.py +273 -0
- examples/celery/durable/workflows/child_workflow_patterns.py +240 -0
- examples/celery/durable/workflows/child_workflows.py +202 -0
- examples/celery/durable/workflows/continue_as_new.py +260 -0
- examples/celery/durable/workflows/fault_tolerance.py +210 -0
- examples/celery/durable/workflows/hooks.py +211 -0
- examples/celery/durable/workflows/idempotency.py +112 -0
- examples/celery/durable/workflows/long_running.py +99 -0
- examples/celery/durable/workflows/retries.py +101 -0
- examples/celery/durable/workflows/schedules.py +209 -0
- examples/celery/transient/01_basic_workflow.py +91 -0
- examples/celery/transient/02_fault_tolerance.py +257 -0
- examples/celery/transient/__init__.py +20 -0
- examples/celery/transient/pyworkflow.config.yaml +25 -0
- examples/local/__init__.py +1 -0
- examples/local/durable/01_basic_workflow.py +94 -0
- examples/local/durable/02_file_storage.py +132 -0
- examples/local/durable/03_retries.py +169 -0
- examples/local/durable/04_long_running.py +119 -0
- examples/local/durable/05_event_log.py +145 -0
- examples/local/durable/06_idempotency.py +148 -0
- examples/local/durable/07_hooks.py +334 -0
- examples/local/durable/08_cancellation.py +233 -0
- examples/local/durable/09_child_workflows.py +198 -0
- examples/local/durable/10_child_workflow_patterns.py +265 -0
- examples/local/durable/11_continue_as_new.py +249 -0
- examples/local/durable/12_schedules.py +198 -0
- examples/local/durable/__init__.py +1 -0
- examples/local/transient/01_quick_tasks.py +87 -0
- examples/local/transient/02_retries.py +130 -0
- examples/local/transient/03_sleep.py +141 -0
- examples/local/transient/__init__.py +1 -0
- pyworkflow/__init__.py +256 -0
- pyworkflow/aws/__init__.py +68 -0
- pyworkflow/aws/context.py +234 -0
- pyworkflow/aws/handler.py +184 -0
- pyworkflow/aws/testing.py +310 -0
- pyworkflow/celery/__init__.py +41 -0
- pyworkflow/celery/app.py +198 -0
- pyworkflow/celery/scheduler.py +315 -0
- pyworkflow/celery/tasks.py +1746 -0
- pyworkflow/cli/__init__.py +132 -0
- pyworkflow/cli/__main__.py +6 -0
- pyworkflow/cli/commands/__init__.py +1 -0
- pyworkflow/cli/commands/hooks.py +640 -0
- pyworkflow/cli/commands/quickstart.py +495 -0
- pyworkflow/cli/commands/runs.py +773 -0
- pyworkflow/cli/commands/scheduler.py +130 -0
- pyworkflow/cli/commands/schedules.py +794 -0
- pyworkflow/cli/commands/setup.py +703 -0
- pyworkflow/cli/commands/worker.py +413 -0
- pyworkflow/cli/commands/workflows.py +1257 -0
- pyworkflow/cli/output/__init__.py +1 -0
- pyworkflow/cli/output/formatters.py +321 -0
- pyworkflow/cli/output/styles.py +121 -0
- pyworkflow/cli/utils/__init__.py +1 -0
- pyworkflow/cli/utils/async_helpers.py +30 -0
- pyworkflow/cli/utils/config.py +130 -0
- pyworkflow/cli/utils/config_generator.py +344 -0
- pyworkflow/cli/utils/discovery.py +53 -0
- pyworkflow/cli/utils/docker_manager.py +651 -0
- pyworkflow/cli/utils/interactive.py +364 -0
- pyworkflow/cli/utils/storage.py +115 -0
- pyworkflow/config.py +329 -0
- pyworkflow/context/__init__.py +63 -0
- pyworkflow/context/aws.py +230 -0
- pyworkflow/context/base.py +416 -0
- pyworkflow/context/local.py +930 -0
- pyworkflow/context/mock.py +381 -0
- pyworkflow/core/__init__.py +0 -0
- pyworkflow/core/exceptions.py +353 -0
- pyworkflow/core/registry.py +313 -0
- pyworkflow/core/scheduled.py +328 -0
- pyworkflow/core/step.py +494 -0
- pyworkflow/core/workflow.py +294 -0
- pyworkflow/discovery.py +248 -0
- pyworkflow/engine/__init__.py +0 -0
- pyworkflow/engine/events.py +879 -0
- pyworkflow/engine/executor.py +682 -0
- pyworkflow/engine/replay.py +273 -0
- pyworkflow/observability/__init__.py +19 -0
- pyworkflow/observability/logging.py +234 -0
- pyworkflow/primitives/__init__.py +33 -0
- pyworkflow/primitives/child_handle.py +174 -0
- pyworkflow/primitives/child_workflow.py +372 -0
- pyworkflow/primitives/continue_as_new.py +101 -0
- pyworkflow/primitives/define_hook.py +150 -0
- pyworkflow/primitives/hooks.py +97 -0
- pyworkflow/primitives/resume_hook.py +210 -0
- pyworkflow/primitives/schedule.py +545 -0
- pyworkflow/primitives/shield.py +96 -0
- pyworkflow/primitives/sleep.py +100 -0
- pyworkflow/runtime/__init__.py +21 -0
- pyworkflow/runtime/base.py +179 -0
- pyworkflow/runtime/celery.py +310 -0
- pyworkflow/runtime/factory.py +101 -0
- pyworkflow/runtime/local.py +706 -0
- pyworkflow/scheduler/__init__.py +9 -0
- pyworkflow/scheduler/local.py +248 -0
- pyworkflow/serialization/__init__.py +0 -0
- pyworkflow/serialization/decoder.py +146 -0
- pyworkflow/serialization/encoder.py +162 -0
- pyworkflow/storage/__init__.py +54 -0
- pyworkflow/storage/base.py +612 -0
- pyworkflow/storage/config.py +185 -0
- pyworkflow/storage/dynamodb.py +1315 -0
- pyworkflow/storage/file.py +827 -0
- pyworkflow/storage/memory.py +549 -0
- pyworkflow/storage/postgres.py +1161 -0
- pyworkflow/storage/schemas.py +486 -0
- pyworkflow/storage/sqlite.py +1136 -0
- pyworkflow/utils/__init__.py +0 -0
- pyworkflow/utils/duration.py +177 -0
- pyworkflow/utils/schedule.py +391 -0
- pyworkflow_engine-0.1.7.dist-info/METADATA +687 -0
- pyworkflow_engine-0.1.7.dist-info/RECORD +196 -0
- pyworkflow_engine-0.1.7.dist-info/WHEEL +5 -0
- pyworkflow_engine-0.1.7.dist-info/entry_points.txt +2 -0
- pyworkflow_engine-0.1.7.dist-info/licenses/LICENSE +21 -0
- pyworkflow_engine-0.1.7.dist-info/top_level.txt +5 -0
- tests/examples/__init__.py +0 -0
- tests/integration/__init__.py +0 -0
- tests/integration/test_cancellation.py +330 -0
- tests/integration/test_child_workflows.py +439 -0
- tests/integration/test_continue_as_new.py +428 -0
- tests/integration/test_dynamodb_storage.py +1146 -0
- tests/integration/test_fault_tolerance.py +369 -0
- tests/integration/test_schedule_storage.py +484 -0
- tests/unit/__init__.py +0 -0
- tests/unit/backends/__init__.py +1 -0
- tests/unit/backends/test_dynamodb_storage.py +1554 -0
- tests/unit/backends/test_postgres_storage.py +1281 -0
- tests/unit/backends/test_sqlite_storage.py +1460 -0
- tests/unit/conftest.py +41 -0
- tests/unit/test_cancellation.py +364 -0
- tests/unit/test_child_workflows.py +680 -0
- tests/unit/test_continue_as_new.py +441 -0
- tests/unit/test_event_limits.py +316 -0
- tests/unit/test_executor.py +320 -0
- tests/unit/test_fault_tolerance.py +334 -0
- tests/unit/test_hooks.py +495 -0
- tests/unit/test_registry.py +261 -0
- tests/unit/test_replay.py +420 -0
- tests/unit/test_schedule_schemas.py +285 -0
- tests/unit/test_schedule_utils.py +286 -0
- tests/unit/test_scheduled_workflow.py +274 -0
- tests/unit/test_step.py +353 -0
- tests/unit/test_workflow.py +243 -0
pyworkflow/config.py
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PyWorkflow configuration system.
|
|
3
|
+
|
|
4
|
+
Provides global configuration for runtime, storage, and default settings.
|
|
5
|
+
|
|
6
|
+
Configuration is loaded in this priority order:
|
|
7
|
+
1. Values set via pyworkflow.configure() (highest priority)
|
|
8
|
+
2. Values from pyworkflow.config.yaml in current directory
|
|
9
|
+
3. Default values
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
>>> import pyworkflow
|
|
13
|
+
>>> pyworkflow.configure(
|
|
14
|
+
... default_runtime="local",
|
|
15
|
+
... default_durable=False,
|
|
16
|
+
... storage=InMemoryStorageBackend(),
|
|
17
|
+
... )
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import warnings
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from pyworkflow.storage.base import StorageBackend
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _load_yaml_config() -> dict[str, Any]:
|
|
30
|
+
"""
|
|
31
|
+
Load configuration from pyworkflow.config.yaml in current directory.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Configuration dictionary, empty dict if file not found
|
|
35
|
+
"""
|
|
36
|
+
config_path = Path.cwd() / "pyworkflow.config.yaml"
|
|
37
|
+
if not config_path.exists():
|
|
38
|
+
return {}
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
import yaml
|
|
42
|
+
|
|
43
|
+
with open(config_path) as f:
|
|
44
|
+
config = yaml.safe_load(f) or {}
|
|
45
|
+
return config
|
|
46
|
+
except ImportError:
|
|
47
|
+
return {}
|
|
48
|
+
except Exception:
|
|
49
|
+
return {}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _create_storage_from_config(storage_config: dict[str, Any]) -> Optional["StorageBackend"]:
|
|
53
|
+
"""Create a storage backend from config dictionary."""
|
|
54
|
+
if not storage_config:
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
from pyworkflow.storage.config import config_to_storage
|
|
58
|
+
|
|
59
|
+
return config_to_storage(storage_config)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class PyWorkflowConfig:
|
|
64
|
+
"""
|
|
65
|
+
Global configuration for PyWorkflow.
|
|
66
|
+
|
|
67
|
+
Attributes:
|
|
68
|
+
default_runtime: Default runtime to use ("local", "celery", etc.)
|
|
69
|
+
default_durable: Whether workflows are durable by default
|
|
70
|
+
default_retries: Default number of retries for steps
|
|
71
|
+
default_recover_on_worker_loss: Whether to auto-recover on worker failure
|
|
72
|
+
default_max_recovery_attempts: Default max recovery attempts on worker failure
|
|
73
|
+
storage: Storage backend instance for durable workflows
|
|
74
|
+
celery_broker: Celery broker URL (for celery runtime)
|
|
75
|
+
aws_region: AWS region (for lambda runtimes)
|
|
76
|
+
event_soft_limit: Log warning when event count reaches this (default: 10000)
|
|
77
|
+
event_hard_limit: Fail workflow when event count reaches this (default: 50000)
|
|
78
|
+
event_warning_interval: Log warning every N events after soft limit (default: 100)
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
# Defaults (can be overridden per-workflow)
|
|
82
|
+
default_runtime: str = "local"
|
|
83
|
+
default_durable: bool = False
|
|
84
|
+
default_retries: int = 3
|
|
85
|
+
|
|
86
|
+
# Fault tolerance defaults
|
|
87
|
+
default_recover_on_worker_loss: bool | None = (
|
|
88
|
+
None # None = True for durable, False for transient
|
|
89
|
+
)
|
|
90
|
+
default_max_recovery_attempts: int = 3
|
|
91
|
+
|
|
92
|
+
# Infrastructure (app-level only)
|
|
93
|
+
storage: Optional["StorageBackend"] = None
|
|
94
|
+
celery_broker: str | None = None
|
|
95
|
+
aws_region: str | None = None
|
|
96
|
+
|
|
97
|
+
# Event limit settings (WARNING: Do not modify unless you understand the implications)
|
|
98
|
+
# These limits prevent runaway workflows from consuming excessive resources
|
|
99
|
+
event_soft_limit: int = 10_000 # Log warning at this count
|
|
100
|
+
event_hard_limit: int = 50_000 # Fail workflow at this count
|
|
101
|
+
event_warning_interval: int = 100 # Log warning every N events after soft limit
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _config_from_yaml() -> PyWorkflowConfig:
|
|
105
|
+
"""Create a PyWorkflowConfig from YAML file settings."""
|
|
106
|
+
yaml_config = _load_yaml_config()
|
|
107
|
+
|
|
108
|
+
if not yaml_config:
|
|
109
|
+
return PyWorkflowConfig()
|
|
110
|
+
|
|
111
|
+
# Map YAML keys to config attributes
|
|
112
|
+
runtime = yaml_config.get("runtime", "local")
|
|
113
|
+
durable = runtime == "celery" # Celery runtime defaults to durable
|
|
114
|
+
|
|
115
|
+
# Create storage from config
|
|
116
|
+
storage = _create_storage_from_config(yaml_config.get("storage", {}))
|
|
117
|
+
|
|
118
|
+
# Get celery broker
|
|
119
|
+
celery_config = yaml_config.get("celery", {})
|
|
120
|
+
celery_broker = celery_config.get("broker")
|
|
121
|
+
|
|
122
|
+
return PyWorkflowConfig(
|
|
123
|
+
default_runtime=runtime,
|
|
124
|
+
default_durable=durable,
|
|
125
|
+
storage=storage,
|
|
126
|
+
celery_broker=celery_broker,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# Global singleton
|
|
131
|
+
_config: PyWorkflowConfig | None = None
|
|
132
|
+
_config_loaded_from_yaml: bool = False
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def configure(
|
|
136
|
+
*,
|
|
137
|
+
module: str | None = None,
|
|
138
|
+
discover: bool = True,
|
|
139
|
+
**kwargs: Any,
|
|
140
|
+
) -> None:
|
|
141
|
+
"""
|
|
142
|
+
Configure PyWorkflow defaults.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
module: Python module path to discover workflows from (e.g., "myapp.workflows").
|
|
146
|
+
If provided and discover=True, the module will be imported to register
|
|
147
|
+
workflows decorated with @workflow.
|
|
148
|
+
discover: If True (default) and module is provided, automatically discover
|
|
149
|
+
and register workflows from the specified module.
|
|
150
|
+
default_runtime: Default runtime ("local", "celery", "lambda", "durable-lambda")
|
|
151
|
+
default_durable: Whether workflows are durable by default
|
|
152
|
+
default_retries: Default number of retries for steps
|
|
153
|
+
default_recover_on_worker_loss: Whether to auto-recover on worker failure
|
|
154
|
+
(None = True for durable, False for transient)
|
|
155
|
+
default_max_recovery_attempts: Max recovery attempts on worker failure
|
|
156
|
+
storage: Storage backend instance
|
|
157
|
+
celery_broker: Celery broker URL
|
|
158
|
+
aws_region: AWS region
|
|
159
|
+
|
|
160
|
+
Event Limit Settings (Advanced - modify with caution):
|
|
161
|
+
event_soft_limit: Log warning when event count reaches this (default: 10000)
|
|
162
|
+
event_hard_limit: Fail workflow when event count reaches this (default: 50000)
|
|
163
|
+
event_warning_interval: Log warning every N events after soft limit (default: 100)
|
|
164
|
+
|
|
165
|
+
WARNING: Modifying event limits is not recommended. These defaults are carefully
|
|
166
|
+
chosen to prevent runaway workflows from consuming excessive resources.
|
|
167
|
+
|
|
168
|
+
Example:
|
|
169
|
+
>>> import pyworkflow
|
|
170
|
+
>>> from pyworkflow.storage import InMemoryStorageBackend
|
|
171
|
+
>>>
|
|
172
|
+
>>> pyworkflow.configure(
|
|
173
|
+
... default_runtime="local",
|
|
174
|
+
... default_durable=True,
|
|
175
|
+
... storage=InMemoryStorageBackend(),
|
|
176
|
+
... )
|
|
177
|
+
|
|
178
|
+
>>> # Configure with workflow discovery
|
|
179
|
+
>>> pyworkflow.configure(module="myapp.workflows")
|
|
180
|
+
"""
|
|
181
|
+
global _config
|
|
182
|
+
if _config is None:
|
|
183
|
+
_config = PyWorkflowConfig()
|
|
184
|
+
|
|
185
|
+
# Warn if user is modifying event limits
|
|
186
|
+
event_limit_keys = {"event_soft_limit", "event_hard_limit", "event_warning_interval"}
|
|
187
|
+
modified_limits = event_limit_keys & set(kwargs.keys())
|
|
188
|
+
if modified_limits:
|
|
189
|
+
warnings.warn(
|
|
190
|
+
f"Modifying event limits ({', '.join(sorted(modified_limits))}) is not recommended. "
|
|
191
|
+
"These defaults are carefully chosen to prevent runaway workflows.",
|
|
192
|
+
UserWarning,
|
|
193
|
+
stacklevel=2,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
for key, value in kwargs.items():
|
|
197
|
+
if hasattr(_config, key):
|
|
198
|
+
setattr(_config, key, value)
|
|
199
|
+
else:
|
|
200
|
+
valid_keys = list(PyWorkflowConfig.__dataclass_fields__.keys())
|
|
201
|
+
raise ValueError(
|
|
202
|
+
f"Unknown config option: {key}. Valid options: {', '.join(valid_keys)}"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Auto-discover workflows if module is specified
|
|
206
|
+
if discover and module:
|
|
207
|
+
from pyworkflow.discovery import discover_workflows
|
|
208
|
+
|
|
209
|
+
discover_workflows(module_path=module)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def configure_from_yaml(path: str | Path, discover: bool = True) -> None:
|
|
213
|
+
"""
|
|
214
|
+
Configure PyWorkflow from a specific YAML file.
|
|
215
|
+
|
|
216
|
+
Unlike the automatic YAML loading in get_config(), this function:
|
|
217
|
+
- Requires an explicit path
|
|
218
|
+
- Raises FileNotFoundError if the file doesn't exist
|
|
219
|
+
- Raises ValueError if YAML parsing fails
|
|
220
|
+
- Optionally discovers workflows from the 'module' field in the YAML
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
path: Path to the YAML configuration file
|
|
224
|
+
discover: If True (default), automatically discover and register
|
|
225
|
+
workflows from the 'module' or 'modules' field in the YAML file.
|
|
226
|
+
Set to False to skip discovery.
|
|
227
|
+
|
|
228
|
+
Raises:
|
|
229
|
+
FileNotFoundError: If the specified file doesn't exist
|
|
230
|
+
ValueError: If the YAML file is invalid or cannot be parsed
|
|
231
|
+
ImportError: If PyYAML is not installed
|
|
232
|
+
DiscoveryError: If workflow module discovery fails (when discover=True)
|
|
233
|
+
|
|
234
|
+
Example:
|
|
235
|
+
>>> import pyworkflow
|
|
236
|
+
>>> pyworkflow.configure_from_yaml("/etc/pyworkflow/config.yaml")
|
|
237
|
+
|
|
238
|
+
>>> # Skip workflow discovery
|
|
239
|
+
>>> pyworkflow.configure_from_yaml("/etc/pyworkflow/config.yaml", discover=False)
|
|
240
|
+
"""
|
|
241
|
+
global _config, _config_loaded_from_yaml
|
|
242
|
+
|
|
243
|
+
config_path = Path(path)
|
|
244
|
+
|
|
245
|
+
if not config_path.exists():
|
|
246
|
+
raise FileNotFoundError(f"PyWorkflow configuration file not found: {config_path}")
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
import yaml
|
|
250
|
+
except ImportError:
|
|
251
|
+
raise ImportError(
|
|
252
|
+
"PyYAML is required for YAML configuration. Install it with: pip install pyyaml"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
with open(config_path) as f:
|
|
257
|
+
yaml_config = yaml.safe_load(f) or {}
|
|
258
|
+
except yaml.YAMLError as e:
|
|
259
|
+
raise ValueError(f"Invalid YAML in {config_path}: {e}")
|
|
260
|
+
|
|
261
|
+
# Map YAML keys to config attributes (same logic as _config_from_yaml)
|
|
262
|
+
runtime = yaml_config.get("runtime", "local")
|
|
263
|
+
durable = runtime == "celery" # Celery runtime defaults to durable
|
|
264
|
+
|
|
265
|
+
# Create storage from config
|
|
266
|
+
storage = _create_storage_from_config(yaml_config.get("storage", {}))
|
|
267
|
+
|
|
268
|
+
# Get celery broker
|
|
269
|
+
celery_config = yaml_config.get("celery", {})
|
|
270
|
+
celery_broker = celery_config.get("broker")
|
|
271
|
+
|
|
272
|
+
_config = PyWorkflowConfig(
|
|
273
|
+
default_runtime=runtime,
|
|
274
|
+
default_durable=durable,
|
|
275
|
+
storage=storage,
|
|
276
|
+
celery_broker=celery_broker,
|
|
277
|
+
)
|
|
278
|
+
_config_loaded_from_yaml = True
|
|
279
|
+
|
|
280
|
+
# Auto-discover workflows if enabled
|
|
281
|
+
if discover:
|
|
282
|
+
from pyworkflow.discovery import discover_workflows
|
|
283
|
+
|
|
284
|
+
discover_workflows(config=yaml_config, config_path=config_path)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def get_config() -> PyWorkflowConfig:
|
|
288
|
+
"""
|
|
289
|
+
Get the current configuration.
|
|
290
|
+
|
|
291
|
+
If not yet configured, loads from pyworkflow.config.yaml if present,
|
|
292
|
+
otherwise creates default configuration.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
Current PyWorkflowConfig instance
|
|
296
|
+
"""
|
|
297
|
+
global _config, _config_loaded_from_yaml
|
|
298
|
+
if _config is None:
|
|
299
|
+
# Try to load from YAML config file first
|
|
300
|
+
_config = _config_from_yaml()
|
|
301
|
+
_config_loaded_from_yaml = True
|
|
302
|
+
return _config
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def reset_config() -> None:
|
|
306
|
+
"""
|
|
307
|
+
Reset configuration to defaults.
|
|
308
|
+
|
|
309
|
+
Primarily used for testing.
|
|
310
|
+
"""
|
|
311
|
+
global _config, _config_loaded_from_yaml
|
|
312
|
+
_config = None
|
|
313
|
+
_config_loaded_from_yaml = False
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def get_storage() -> Optional["StorageBackend"]:
|
|
317
|
+
"""
|
|
318
|
+
Get the configured storage backend.
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
StorageBackend instance if configured, None otherwise
|
|
322
|
+
|
|
323
|
+
Example:
|
|
324
|
+
>>> import pyworkflow
|
|
325
|
+
>>> from pyworkflow.storage import InMemoryStorageBackend
|
|
326
|
+
>>> pyworkflow.configure(storage=InMemoryStorageBackend())
|
|
327
|
+
>>> storage = pyworkflow.get_storage()
|
|
328
|
+
"""
|
|
329
|
+
return get_config().storage
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Workflow Context - The unified interface for workflow execution.
|
|
3
|
+
|
|
4
|
+
The context provides implicit access to workflow operations within execution.
|
|
5
|
+
Uses Python's contextvars for implicit context passing (similar to Scala's implicits).
|
|
6
|
+
|
|
7
|
+
Available Contexts:
|
|
8
|
+
- LocalContext: In-process execution with optional event sourcing
|
|
9
|
+
- AWSContext: AWS Durable Lambda Functions with automatic checkpointing
|
|
10
|
+
- MockContext: For testing workflows without side effects
|
|
11
|
+
|
|
12
|
+
Usage with implicit context:
|
|
13
|
+
from pyworkflow.context import get_context
|
|
14
|
+
|
|
15
|
+
async def my_step(order_id: str):
|
|
16
|
+
ctx = get_context() # Implicitly available
|
|
17
|
+
ctx.log(f"Processing {order_id}")
|
|
18
|
+
return {"order_id": order_id}
|
|
19
|
+
|
|
20
|
+
@workflow()
|
|
21
|
+
async def my_workflow(order_id: str):
|
|
22
|
+
# Context is set automatically by @workflow
|
|
23
|
+
ctx = get_context()
|
|
24
|
+
result = await ctx.run(my_step, order_id)
|
|
25
|
+
await ctx.sleep("5m")
|
|
26
|
+
return result
|
|
27
|
+
|
|
28
|
+
Usage with explicit context (context manager):
|
|
29
|
+
from pyworkflow.context import LocalContext
|
|
30
|
+
|
|
31
|
+
async with LocalContext(run_id="run_123", workflow_name="my_workflow") as ctx:
|
|
32
|
+
result = await ctx.run(my_step, "order_123")
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from pyworkflow.context.base import (
|
|
36
|
+
WorkflowContext,
|
|
37
|
+
get_context,
|
|
38
|
+
has_context,
|
|
39
|
+
reset_context,
|
|
40
|
+
set_context,
|
|
41
|
+
)
|
|
42
|
+
from pyworkflow.context.local import LocalContext
|
|
43
|
+
from pyworkflow.context.mock import MockContext
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
# Base context and helpers
|
|
47
|
+
"WorkflowContext",
|
|
48
|
+
"get_context",
|
|
49
|
+
"has_context",
|
|
50
|
+
"set_context",
|
|
51
|
+
"reset_context",
|
|
52
|
+
# Context implementations
|
|
53
|
+
"LocalContext",
|
|
54
|
+
"MockContext",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
# AWS context is optional - only available if aws-durable-execution-sdk installed
|
|
58
|
+
try:
|
|
59
|
+
from pyworkflow.context.aws import AWSContext
|
|
60
|
+
|
|
61
|
+
__all__.append("AWSContext")
|
|
62
|
+
except ImportError:
|
|
63
|
+
pass
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AWSContext - AWS Durable Lambda Functions execution context.
|
|
3
|
+
|
|
4
|
+
This context wraps the AWS Durable Execution SDK to provide PyWorkflow's
|
|
5
|
+
context interface while leveraging AWS native checkpointing and durability.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
from typing import TYPE_CHECKING, Any
|
|
12
|
+
|
|
13
|
+
from loguru import logger
|
|
14
|
+
|
|
15
|
+
from pyworkflow.context.base import StepFunction, WorkflowContext
|
|
16
|
+
from pyworkflow.utils.duration import parse_duration
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from aws_durable_execution_sdk_python import DurableContext
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AWSContext(WorkflowContext):
|
|
23
|
+
"""
|
|
24
|
+
AWS Durable Lambda Functions execution context.
|
|
25
|
+
|
|
26
|
+
This context wraps the AWS Durable Execution SDK's DurableContext,
|
|
27
|
+
translating PyWorkflow operations to AWS SDK calls:
|
|
28
|
+
|
|
29
|
+
- ctx.run() -> context.step()
|
|
30
|
+
- ctx.sleep() -> context.wait()
|
|
31
|
+
- ctx.wait_for_event() -> context.wait_for_callback()
|
|
32
|
+
- ctx.parallel() -> context.parallel()
|
|
33
|
+
|
|
34
|
+
AWS handles all checkpointing, replay, and durability automatically.
|
|
35
|
+
|
|
36
|
+
Example:
|
|
37
|
+
# Created by @aws_workflow decorator, not directly
|
|
38
|
+
@aws_workflow()
|
|
39
|
+
async def my_workflow(ctx: AWSContext, order_id: str):
|
|
40
|
+
result = await ctx.run(validate_order, order_id)
|
|
41
|
+
await ctx.sleep("5m") # No compute charges!
|
|
42
|
+
return result
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
aws_context: DurableContext,
|
|
48
|
+
run_id: str = "aws_run",
|
|
49
|
+
workflow_name: str = "aws_workflow",
|
|
50
|
+
) -> None:
|
|
51
|
+
"""
|
|
52
|
+
Initialize AWS context.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
aws_context: The AWS DurableContext from Lambda handler
|
|
56
|
+
run_id: Run ID (extracted from Lambda or generated)
|
|
57
|
+
workflow_name: Workflow name
|
|
58
|
+
"""
|
|
59
|
+
super().__init__(run_id=run_id, workflow_name=workflow_name)
|
|
60
|
+
self._aws_ctx = aws_context
|
|
61
|
+
self._step_counter = 0
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def aws_context(self) -> DurableContext:
|
|
65
|
+
"""Get the underlying AWS DurableContext."""
|
|
66
|
+
return self._aws_ctx
|
|
67
|
+
|
|
68
|
+
# =========================================================================
|
|
69
|
+
# Step execution
|
|
70
|
+
# =========================================================================
|
|
71
|
+
|
|
72
|
+
async def run(
|
|
73
|
+
self,
|
|
74
|
+
func: StepFunction,
|
|
75
|
+
*args: Any,
|
|
76
|
+
name: str | None = None,
|
|
77
|
+
**kwargs: Any,
|
|
78
|
+
) -> Any:
|
|
79
|
+
"""
|
|
80
|
+
Execute a step with AWS checkpointing.
|
|
81
|
+
|
|
82
|
+
Uses AWS context.step() which provides:
|
|
83
|
+
- Automatic checkpointing before/after execution
|
|
84
|
+
- Replay support (returns cached result if already completed)
|
|
85
|
+
- Retry handling
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
func: Step function to execute
|
|
89
|
+
*args: Arguments for the function
|
|
90
|
+
name: Optional step name (used for checkpointing)
|
|
91
|
+
**kwargs: Keyword arguments
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Step result
|
|
95
|
+
"""
|
|
96
|
+
step_name = name or getattr(func, "__name__", None)
|
|
97
|
+
if not step_name:
|
|
98
|
+
self._step_counter += 1
|
|
99
|
+
step_name = f"step_{self._step_counter}"
|
|
100
|
+
|
|
101
|
+
logger.debug(f"[aws] Running step: {step_name}")
|
|
102
|
+
|
|
103
|
+
def execute_step(_: Any) -> Any:
|
|
104
|
+
"""Inner function for AWS context.step()."""
|
|
105
|
+
if asyncio.iscoroutinefunction(func):
|
|
106
|
+
# Run async function in event loop
|
|
107
|
+
try:
|
|
108
|
+
loop = asyncio.get_running_loop()
|
|
109
|
+
except RuntimeError:
|
|
110
|
+
loop = None
|
|
111
|
+
|
|
112
|
+
if loop is not None:
|
|
113
|
+
# Already in async context - use thread
|
|
114
|
+
import concurrent.futures
|
|
115
|
+
|
|
116
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
117
|
+
future = executor.submit(asyncio.run, func(*args, **kwargs))
|
|
118
|
+
return future.result()
|
|
119
|
+
else:
|
|
120
|
+
return asyncio.run(func(*args, **kwargs))
|
|
121
|
+
else:
|
|
122
|
+
return func(*args, **kwargs)
|
|
123
|
+
|
|
124
|
+
# Use AWS context.step() for checkpointing
|
|
125
|
+
result = self._aws_ctx.step(execute_step, name=step_name)
|
|
126
|
+
|
|
127
|
+
logger.debug(f"[aws] Step completed: {step_name}")
|
|
128
|
+
return result
|
|
129
|
+
|
|
130
|
+
# =========================================================================
|
|
131
|
+
# Sleep
|
|
132
|
+
# =========================================================================
|
|
133
|
+
|
|
134
|
+
async def sleep(self, duration: str | int | float) -> None:
|
|
135
|
+
"""
|
|
136
|
+
Sleep using AWS native wait (no compute charges).
|
|
137
|
+
|
|
138
|
+
Uses AWS context.wait() which:
|
|
139
|
+
- Suspends Lambda execution
|
|
140
|
+
- No charges during wait time
|
|
141
|
+
- Automatically resumes when duration elapses
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
duration: Sleep duration
|
|
145
|
+
"""
|
|
146
|
+
duration_seconds = parse_duration(duration) if isinstance(duration, str) else int(duration)
|
|
147
|
+
|
|
148
|
+
logger.debug(f"[aws] Sleeping: {duration_seconds}s")
|
|
149
|
+
|
|
150
|
+
# Try to use AWS Duration, fall back to raw seconds for mock
|
|
151
|
+
try:
|
|
152
|
+
from aws_durable_execution_sdk_python.config import Duration
|
|
153
|
+
|
|
154
|
+
duration_obj = Duration.from_seconds(duration_seconds)
|
|
155
|
+
except ImportError:
|
|
156
|
+
# Using mock context
|
|
157
|
+
duration_obj = duration_seconds
|
|
158
|
+
|
|
159
|
+
self._aws_ctx.wait(duration_obj)
|
|
160
|
+
|
|
161
|
+
logger.debug(f"[aws] Sleep completed: {duration_seconds}s")
|
|
162
|
+
|
|
163
|
+
# =========================================================================
|
|
164
|
+
# Parallel execution
|
|
165
|
+
# =========================================================================
|
|
166
|
+
|
|
167
|
+
async def parallel(self, *tasks: Any) -> list[Any]:
|
|
168
|
+
"""
|
|
169
|
+
Execute tasks in parallel using AWS context.parallel().
|
|
170
|
+
|
|
171
|
+
Note: AWS parallel() has a different signature - it takes functions
|
|
172
|
+
that receive a child context. For simplicity, we fall back to
|
|
173
|
+
asyncio.gather for the MVP.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
*tasks: Coroutines to execute in parallel
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
List of results
|
|
180
|
+
"""
|
|
181
|
+
# For MVP, use asyncio.gather
|
|
182
|
+
# TODO: Use AWS context.parallel() for better checkpointing
|
|
183
|
+
return list(await asyncio.gather(*tasks))
|
|
184
|
+
|
|
185
|
+
# =========================================================================
|
|
186
|
+
# External events (callbacks)
|
|
187
|
+
# =========================================================================
|
|
188
|
+
|
|
189
|
+
async def wait_for_event(
|
|
190
|
+
self,
|
|
191
|
+
event_name: str,
|
|
192
|
+
timeout: str | int | None = None,
|
|
193
|
+
) -> Any:
|
|
194
|
+
"""
|
|
195
|
+
Wait for an external event using AWS callbacks.
|
|
196
|
+
|
|
197
|
+
Uses AWS context.create_callback() or context.wait_for_callback().
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
event_name: Name for the callback
|
|
201
|
+
timeout: Optional timeout
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Callback payload when received
|
|
205
|
+
"""
|
|
206
|
+
logger.debug(f"[aws] Waiting for event: {event_name}")
|
|
207
|
+
|
|
208
|
+
# Parse timeout
|
|
209
|
+
timeout_seconds = None
|
|
210
|
+
if timeout:
|
|
211
|
+
timeout_seconds = parse_duration(timeout) if isinstance(timeout, str) else int(timeout)
|
|
212
|
+
|
|
213
|
+
try:
|
|
214
|
+
from aws_durable_execution_sdk_python.config import CallbackConfig
|
|
215
|
+
|
|
216
|
+
config = None
|
|
217
|
+
if timeout_seconds:
|
|
218
|
+
config = CallbackConfig(timeout_seconds=timeout_seconds)
|
|
219
|
+
|
|
220
|
+
callback = self._aws_ctx.create_callback(name=event_name, config=config)
|
|
221
|
+
|
|
222
|
+
# Return the callback result when available
|
|
223
|
+
result = callback.result()
|
|
224
|
+
|
|
225
|
+
logger.debug(f"[aws] Event received: {event_name}")
|
|
226
|
+
return result
|
|
227
|
+
|
|
228
|
+
except ImportError:
|
|
229
|
+
# Mock context - return mock data
|
|
230
|
+
return {"event": event_name, "mock": True}
|