pyworkflow-engine 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dashboard/backend/app/__init__.py +1 -0
- dashboard/backend/app/config.py +32 -0
- dashboard/backend/app/controllers/__init__.py +6 -0
- dashboard/backend/app/controllers/run_controller.py +86 -0
- dashboard/backend/app/controllers/workflow_controller.py +33 -0
- dashboard/backend/app/dependencies/__init__.py +5 -0
- dashboard/backend/app/dependencies/storage.py +50 -0
- dashboard/backend/app/repositories/__init__.py +6 -0
- dashboard/backend/app/repositories/run_repository.py +80 -0
- dashboard/backend/app/repositories/workflow_repository.py +27 -0
- dashboard/backend/app/rest/__init__.py +8 -0
- dashboard/backend/app/rest/v1/__init__.py +12 -0
- dashboard/backend/app/rest/v1/health.py +33 -0
- dashboard/backend/app/rest/v1/runs.py +133 -0
- dashboard/backend/app/rest/v1/workflows.py +41 -0
- dashboard/backend/app/schemas/__init__.py +23 -0
- dashboard/backend/app/schemas/common.py +16 -0
- dashboard/backend/app/schemas/event.py +24 -0
- dashboard/backend/app/schemas/hook.py +25 -0
- dashboard/backend/app/schemas/run.py +54 -0
- dashboard/backend/app/schemas/step.py +28 -0
- dashboard/backend/app/schemas/workflow.py +31 -0
- dashboard/backend/app/server.py +87 -0
- dashboard/backend/app/services/__init__.py +6 -0
- dashboard/backend/app/services/run_service.py +240 -0
- dashboard/backend/app/services/workflow_service.py +155 -0
- dashboard/backend/main.py +18 -0
- docs/concepts/cancellation.mdx +362 -0
- docs/concepts/continue-as-new.mdx +434 -0
- docs/concepts/events.mdx +266 -0
- docs/concepts/fault-tolerance.mdx +370 -0
- docs/concepts/hooks.mdx +552 -0
- docs/concepts/limitations.mdx +167 -0
- docs/concepts/schedules.mdx +775 -0
- docs/concepts/sleep.mdx +312 -0
- docs/concepts/steps.mdx +301 -0
- docs/concepts/workflows.mdx +255 -0
- docs/guides/cli.mdx +942 -0
- docs/guides/configuration.mdx +560 -0
- docs/introduction.mdx +155 -0
- docs/quickstart.mdx +279 -0
- examples/__init__.py +1 -0
- examples/celery/__init__.py +1 -0
- examples/celery/durable/docker-compose.yml +55 -0
- examples/celery/durable/pyworkflow.config.yaml +12 -0
- examples/celery/durable/workflows/__init__.py +122 -0
- examples/celery/durable/workflows/basic.py +87 -0
- examples/celery/durable/workflows/batch_processing.py +102 -0
- examples/celery/durable/workflows/cancellation.py +273 -0
- examples/celery/durable/workflows/child_workflow_patterns.py +240 -0
- examples/celery/durable/workflows/child_workflows.py +202 -0
- examples/celery/durable/workflows/continue_as_new.py +260 -0
- examples/celery/durable/workflows/fault_tolerance.py +210 -0
- examples/celery/durable/workflows/hooks.py +211 -0
- examples/celery/durable/workflows/idempotency.py +112 -0
- examples/celery/durable/workflows/long_running.py +99 -0
- examples/celery/durable/workflows/retries.py +101 -0
- examples/celery/durable/workflows/schedules.py +209 -0
- examples/celery/transient/01_basic_workflow.py +91 -0
- examples/celery/transient/02_fault_tolerance.py +257 -0
- examples/celery/transient/__init__.py +20 -0
- examples/celery/transient/pyworkflow.config.yaml +25 -0
- examples/local/__init__.py +1 -0
- examples/local/durable/01_basic_workflow.py +94 -0
- examples/local/durable/02_file_storage.py +132 -0
- examples/local/durable/03_retries.py +169 -0
- examples/local/durable/04_long_running.py +119 -0
- examples/local/durable/05_event_log.py +145 -0
- examples/local/durable/06_idempotency.py +148 -0
- examples/local/durable/07_hooks.py +334 -0
- examples/local/durable/08_cancellation.py +233 -0
- examples/local/durable/09_child_workflows.py +198 -0
- examples/local/durable/10_child_workflow_patterns.py +265 -0
- examples/local/durable/11_continue_as_new.py +249 -0
- examples/local/durable/12_schedules.py +198 -0
- examples/local/durable/__init__.py +1 -0
- examples/local/transient/01_quick_tasks.py +87 -0
- examples/local/transient/02_retries.py +130 -0
- examples/local/transient/03_sleep.py +141 -0
- examples/local/transient/__init__.py +1 -0
- pyworkflow/__init__.py +256 -0
- pyworkflow/aws/__init__.py +68 -0
- pyworkflow/aws/context.py +234 -0
- pyworkflow/aws/handler.py +184 -0
- pyworkflow/aws/testing.py +310 -0
- pyworkflow/celery/__init__.py +41 -0
- pyworkflow/celery/app.py +198 -0
- pyworkflow/celery/scheduler.py +315 -0
- pyworkflow/celery/tasks.py +1746 -0
- pyworkflow/cli/__init__.py +132 -0
- pyworkflow/cli/__main__.py +6 -0
- pyworkflow/cli/commands/__init__.py +1 -0
- pyworkflow/cli/commands/hooks.py +640 -0
- pyworkflow/cli/commands/quickstart.py +495 -0
- pyworkflow/cli/commands/runs.py +773 -0
- pyworkflow/cli/commands/scheduler.py +130 -0
- pyworkflow/cli/commands/schedules.py +794 -0
- pyworkflow/cli/commands/setup.py +703 -0
- pyworkflow/cli/commands/worker.py +413 -0
- pyworkflow/cli/commands/workflows.py +1257 -0
- pyworkflow/cli/output/__init__.py +1 -0
- pyworkflow/cli/output/formatters.py +321 -0
- pyworkflow/cli/output/styles.py +121 -0
- pyworkflow/cli/utils/__init__.py +1 -0
- pyworkflow/cli/utils/async_helpers.py +30 -0
- pyworkflow/cli/utils/config.py +130 -0
- pyworkflow/cli/utils/config_generator.py +344 -0
- pyworkflow/cli/utils/discovery.py +53 -0
- pyworkflow/cli/utils/docker_manager.py +651 -0
- pyworkflow/cli/utils/interactive.py +364 -0
- pyworkflow/cli/utils/storage.py +115 -0
- pyworkflow/config.py +329 -0
- pyworkflow/context/__init__.py +63 -0
- pyworkflow/context/aws.py +230 -0
- pyworkflow/context/base.py +416 -0
- pyworkflow/context/local.py +930 -0
- pyworkflow/context/mock.py +381 -0
- pyworkflow/core/__init__.py +0 -0
- pyworkflow/core/exceptions.py +353 -0
- pyworkflow/core/registry.py +313 -0
- pyworkflow/core/scheduled.py +328 -0
- pyworkflow/core/step.py +494 -0
- pyworkflow/core/workflow.py +294 -0
- pyworkflow/discovery.py +248 -0
- pyworkflow/engine/__init__.py +0 -0
- pyworkflow/engine/events.py +879 -0
- pyworkflow/engine/executor.py +682 -0
- pyworkflow/engine/replay.py +273 -0
- pyworkflow/observability/__init__.py +19 -0
- pyworkflow/observability/logging.py +234 -0
- pyworkflow/primitives/__init__.py +33 -0
- pyworkflow/primitives/child_handle.py +174 -0
- pyworkflow/primitives/child_workflow.py +372 -0
- pyworkflow/primitives/continue_as_new.py +101 -0
- pyworkflow/primitives/define_hook.py +150 -0
- pyworkflow/primitives/hooks.py +97 -0
- pyworkflow/primitives/resume_hook.py +210 -0
- pyworkflow/primitives/schedule.py +545 -0
- pyworkflow/primitives/shield.py +96 -0
- pyworkflow/primitives/sleep.py +100 -0
- pyworkflow/runtime/__init__.py +21 -0
- pyworkflow/runtime/base.py +179 -0
- pyworkflow/runtime/celery.py +310 -0
- pyworkflow/runtime/factory.py +101 -0
- pyworkflow/runtime/local.py +706 -0
- pyworkflow/scheduler/__init__.py +9 -0
- pyworkflow/scheduler/local.py +248 -0
- pyworkflow/serialization/__init__.py +0 -0
- pyworkflow/serialization/decoder.py +146 -0
- pyworkflow/serialization/encoder.py +162 -0
- pyworkflow/storage/__init__.py +54 -0
- pyworkflow/storage/base.py +612 -0
- pyworkflow/storage/config.py +185 -0
- pyworkflow/storage/dynamodb.py +1315 -0
- pyworkflow/storage/file.py +827 -0
- pyworkflow/storage/memory.py +549 -0
- pyworkflow/storage/postgres.py +1161 -0
- pyworkflow/storage/schemas.py +486 -0
- pyworkflow/storage/sqlite.py +1136 -0
- pyworkflow/utils/__init__.py +0 -0
- pyworkflow/utils/duration.py +177 -0
- pyworkflow/utils/schedule.py +391 -0
- pyworkflow_engine-0.1.7.dist-info/METADATA +687 -0
- pyworkflow_engine-0.1.7.dist-info/RECORD +196 -0
- pyworkflow_engine-0.1.7.dist-info/WHEEL +5 -0
- pyworkflow_engine-0.1.7.dist-info/entry_points.txt +2 -0
- pyworkflow_engine-0.1.7.dist-info/licenses/LICENSE +21 -0
- pyworkflow_engine-0.1.7.dist-info/top_level.txt +5 -0
- tests/examples/__init__.py +0 -0
- tests/integration/__init__.py +0 -0
- tests/integration/test_cancellation.py +330 -0
- tests/integration/test_child_workflows.py +439 -0
- tests/integration/test_continue_as_new.py +428 -0
- tests/integration/test_dynamodb_storage.py +1146 -0
- tests/integration/test_fault_tolerance.py +369 -0
- tests/integration/test_schedule_storage.py +484 -0
- tests/unit/__init__.py +0 -0
- tests/unit/backends/__init__.py +1 -0
- tests/unit/backends/test_dynamodb_storage.py +1554 -0
- tests/unit/backends/test_postgres_storage.py +1281 -0
- tests/unit/backends/test_sqlite_storage.py +1460 -0
- tests/unit/conftest.py +41 -0
- tests/unit/test_cancellation.py +364 -0
- tests/unit/test_child_workflows.py +680 -0
- tests/unit/test_continue_as_new.py +441 -0
- tests/unit/test_event_limits.py +316 -0
- tests/unit/test_executor.py +320 -0
- tests/unit/test_fault_tolerance.py +334 -0
- tests/unit/test_hooks.py +495 -0
- tests/unit/test_registry.py +261 -0
- tests/unit/test_replay.py +420 -0
- tests/unit/test_schedule_schemas.py +285 -0
- tests/unit/test_schedule_utils.py +286 -0
- tests/unit/test_scheduled_workflow.py +274 -0
- tests/unit/test_step.py +353 -0
- tests/unit/test_workflow.py +243 -0
|
@@ -0,0 +1,560 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: 'Configuration'
|
|
3
|
+
description: 'Configure PyWorkflow for your application using config files or programmatic setup'
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
## Overview
|
|
7
|
+
|
|
8
|
+
PyWorkflow configuration determines how workflows execute: which runtime to use, where to store
|
|
9
|
+
state, and default behaviors. Configuration can come from multiple sources with a clear priority order.
|
|
10
|
+
|
|
11
|
+
<CardGroup cols={2}>
|
|
12
|
+
<Card title="Config File" icon="file-code">
|
|
13
|
+
Zero-code configuration via `pyworkflow.config.yaml`
|
|
14
|
+
</Card>
|
|
15
|
+
<Card title="Programmatic" icon="code">
|
|
16
|
+
Configure in Python code with `pyworkflow.configure()`
|
|
17
|
+
</Card>
|
|
18
|
+
<Card title="Per-Call Override" icon="sliders">
|
|
19
|
+
Override settings per `start()` call
|
|
20
|
+
</Card>
|
|
21
|
+
<Card title="Environment Variables" icon="terminal">
|
|
22
|
+
Configure via environment for deployment flexibility
|
|
23
|
+
</Card>
|
|
24
|
+
</CardGroup>
|
|
25
|
+
|
|
26
|
+
## Configuration Priority
|
|
27
|
+
|
|
28
|
+
When you call `pyworkflow.start()`, configuration is resolved in this order:
|
|
29
|
+
|
|
30
|
+
| Priority | Source | Description |
|
|
31
|
+
|----------|--------|-------------|
|
|
32
|
+
| 1 (highest) | `start()` parameters | Explicit `runtime=`, `durable=`, `storage=` arguments |
|
|
33
|
+
| 2 | `pyworkflow.configure()` | Values set programmatically |
|
|
34
|
+
| 3 | `pyworkflow.config.yaml` | Config file in current directory |
|
|
35
|
+
| 4 (lowest) | Defaults | `runtime="local"`, `durable=False` |
|
|
36
|
+
|
|
37
|
+
<Note>
|
|
38
|
+
When you use Celery runtime in the config file (`runtime: celery`), PyWorkflow automatically
|
|
39
|
+
sets `durable=True` since Celery requires durable mode.
|
|
40
|
+
</Note>
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Config File (Recommended)
|
|
45
|
+
|
|
46
|
+
The simplest way to configure PyWorkflow is with a `pyworkflow.config.yaml` file in your
|
|
47
|
+
project directory:
|
|
48
|
+
|
|
49
|
+
```yaml
|
|
50
|
+
# pyworkflow.config.yaml
|
|
51
|
+
|
|
52
|
+
# Module containing workflow definitions (for CLI discovery)
|
|
53
|
+
module: myapp.workflows
|
|
54
|
+
|
|
55
|
+
# Runtime: "celery" for distributed, "local" for in-process
|
|
56
|
+
runtime: celery
|
|
57
|
+
|
|
58
|
+
# Storage backend for durable workflows
|
|
59
|
+
storage:
|
|
60
|
+
backend: file # "file" or "memory"
|
|
61
|
+
path: ./workflow_data # Path for file backend
|
|
62
|
+
|
|
63
|
+
# Celery broker settings (when runtime: celery)
|
|
64
|
+
celery:
|
|
65
|
+
broker: redis://localhost:6379/0
|
|
66
|
+
result_backend: redis://localhost:6379/1
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Automatic Loading
|
|
70
|
+
|
|
71
|
+
The config file is automatically loaded when:
|
|
72
|
+
|
|
73
|
+
1. **CLI commands** - `pyworkflow worker run`, `pyworkflow workflows list`, etc.
|
|
74
|
+
2. **Python code** - When you call `pyworkflow.start()` or `pyworkflow.get_config()`
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
# Your Python code - no explicit configuration needed!
|
|
78
|
+
import asyncio
|
|
79
|
+
import pyworkflow
|
|
80
|
+
from myapp.workflows import order_workflow
|
|
81
|
+
|
|
82
|
+
async def main():
|
|
83
|
+
# Automatically uses settings from pyworkflow.config.yaml:
|
|
84
|
+
# - runtime: celery
|
|
85
|
+
# - durable: True (implied by celery runtime)
|
|
86
|
+
# - storage: FileStorageBackend("./workflow_data")
|
|
87
|
+
run_id = await pyworkflow.start(order_workflow, "order-123", 99.99)
|
|
88
|
+
print(f"Started workflow: {run_id}")
|
|
89
|
+
|
|
90
|
+
asyncio.run(main())
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Config File Location
|
|
94
|
+
|
|
95
|
+
PyWorkflow looks for `pyworkflow.config.yaml` in the **current working directory** (where
|
|
96
|
+
you run your Python script or CLI command from).
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
myproject/
|
|
100
|
+
├── pyworkflow.config.yaml # Config file here
|
|
101
|
+
├── myapp/
|
|
102
|
+
│ └── workflows.py
|
|
103
|
+
└── scripts/
|
|
104
|
+
└── run_workflow.py
|
|
105
|
+
|
|
106
|
+
# Run from project root - config is found
|
|
107
|
+
cd myproject
|
|
108
|
+
python scripts/run_workflow.py # ✓ Uses pyworkflow.config.yaml
|
|
109
|
+
|
|
110
|
+
# Run from scripts directory - config NOT found (uses defaults)
|
|
111
|
+
cd myproject/scripts
|
|
112
|
+
python run_workflow.py # ✗ No config file in ./scripts/
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
<Tip>
|
|
116
|
+
Always run your scripts from the directory containing `pyworkflow.config.yaml`, or use
|
|
117
|
+
programmatic configuration if you need more control.
|
|
118
|
+
</Tip>
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Project Structure
|
|
123
|
+
|
|
124
|
+
PyWorkflow supports two ways to organize your workflow code. The `module` field in your
|
|
125
|
+
config file tells PyWorkflow where to find and import your workflows.
|
|
126
|
+
|
|
127
|
+
### Option 1: Single File
|
|
128
|
+
|
|
129
|
+
For simple projects, define all workflows in a single file:
|
|
130
|
+
|
|
131
|
+
```
|
|
132
|
+
myproject/
|
|
133
|
+
├── pyworkflow.config.yaml
|
|
134
|
+
└── workflows.py # All workflows here
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
```yaml
|
|
138
|
+
# pyworkflow.config.yaml
|
|
139
|
+
module: workflows
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
# workflows.py
|
|
144
|
+
from pyworkflow import workflow, step
|
|
145
|
+
|
|
146
|
+
@step()
|
|
147
|
+
async def validate_order(order_id: str) -> dict:
|
|
148
|
+
return {"order_id": order_id, "valid": True}
|
|
149
|
+
|
|
150
|
+
@workflow()
|
|
151
|
+
async def process_order(order_id: str) -> dict:
|
|
152
|
+
result = await validate_order(order_id)
|
|
153
|
+
return {"status": "completed", **result}
|
|
154
|
+
|
|
155
|
+
@workflow()
|
|
156
|
+
async def send_notification(user_id: str, message: str) -> dict:
|
|
157
|
+
return {"sent": True, "user_id": user_id}
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Option 2: Package Directory (Recommended)
|
|
161
|
+
|
|
162
|
+
For larger projects, organize workflows into a package with multiple files:
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
myproject/
|
|
166
|
+
├── pyworkflow.config.yaml
|
|
167
|
+
└── workflows/
|
|
168
|
+
├── __init__.py # Exports all workflows
|
|
169
|
+
├── orders.py # Order-related workflows
|
|
170
|
+
└── notifications.py # Notification workflows
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
```yaml
|
|
174
|
+
# pyworkflow.config.yaml
|
|
175
|
+
module: workflows
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
# workflows/__init__.py
|
|
180
|
+
"""Export all workflows from the package."""
|
|
181
|
+
from .orders import process_order, refund_order
|
|
182
|
+
from .notifications import send_notification, send_bulk_notifications
|
|
183
|
+
|
|
184
|
+
__all__ = [
|
|
185
|
+
"process_order",
|
|
186
|
+
"refund_order",
|
|
187
|
+
"send_notification",
|
|
188
|
+
"send_bulk_notifications",
|
|
189
|
+
]
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
# workflows/orders.py
|
|
194
|
+
from pyworkflow import workflow, step
|
|
195
|
+
|
|
196
|
+
@step()
|
|
197
|
+
async def validate_order(order_id: str) -> dict:
|
|
198
|
+
return {"order_id": order_id, "valid": True}
|
|
199
|
+
|
|
200
|
+
@step()
|
|
201
|
+
async def process_payment(order_id: str, amount: float) -> dict:
|
|
202
|
+
return {"order_id": order_id, "paid": True}
|
|
203
|
+
|
|
204
|
+
@workflow()
|
|
205
|
+
async def process_order(order_id: str, amount: float) -> dict:
|
|
206
|
+
validation = await validate_order(order_id)
|
|
207
|
+
payment = await process_payment(order_id, amount)
|
|
208
|
+
return {"status": "completed", "validation": validation, "payment": payment}
|
|
209
|
+
|
|
210
|
+
@workflow()
|
|
211
|
+
async def refund_order(order_id: str) -> dict:
|
|
212
|
+
return {"order_id": order_id, "refunded": True}
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
# workflows/notifications.py
|
|
217
|
+
from pyworkflow import workflow, step, sleep
|
|
218
|
+
|
|
219
|
+
@step()
|
|
220
|
+
async def send_email(to: str, subject: str, body: str) -> dict:
|
|
221
|
+
return {"sent": True, "to": to}
|
|
222
|
+
|
|
223
|
+
@workflow()
|
|
224
|
+
async def send_notification(user_id: str, message: str) -> dict:
|
|
225
|
+
result = await send_email(f"{user_id}@example.com", "Notification", message)
|
|
226
|
+
return result
|
|
227
|
+
|
|
228
|
+
@workflow()
|
|
229
|
+
async def send_bulk_notifications(user_ids: list[str], message: str) -> dict:
|
|
230
|
+
results = []
|
|
231
|
+
for user_id in user_ids:
|
|
232
|
+
result = await send_email(f"{user_id}@example.com", "Notification", message)
|
|
233
|
+
results.append(result)
|
|
234
|
+
await sleep("1s") # Rate limiting
|
|
235
|
+
return {"sent": len(results)}
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### How Discovery Works
|
|
239
|
+
|
|
240
|
+
When PyWorkflow imports your module:
|
|
241
|
+
|
|
242
|
+
1. **Module Import**: Python imports the specified module (e.g., `workflows` or `workflows/__init__.py`)
|
|
243
|
+
2. **Decorator Registration**: The `@workflow` and `@step` decorators automatically register
|
|
244
|
+
functions in the global registry
|
|
245
|
+
3. **Explicit Exports**: For package directories, `__init__.py` imports trigger the decorators
|
|
246
|
+
|
|
247
|
+
<Note>
|
|
248
|
+
The key is that importing your module must trigger the `@workflow` decorators to run.
|
|
249
|
+
With a package directory, make sure `__init__.py` imports all workflow functions.
|
|
250
|
+
</Note>
|
|
251
|
+
|
|
252
|
+
### Nested Packages
|
|
253
|
+
|
|
254
|
+
For large applications, you can nest packages deeper:
|
|
255
|
+
|
|
256
|
+
```
|
|
257
|
+
myproject/
|
|
258
|
+
├── pyworkflow.config.yaml
|
|
259
|
+
└── myapp/
|
|
260
|
+
└── workflows/
|
|
261
|
+
├── __init__.py
|
|
262
|
+
├── orders/
|
|
263
|
+
│ ├── __init__.py
|
|
264
|
+
│ └── processing.py
|
|
265
|
+
└── notifications/
|
|
266
|
+
├── __init__.py
|
|
267
|
+
└── email.py
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
```yaml
|
|
271
|
+
# pyworkflow.config.yaml
|
|
272
|
+
module: myapp.workflows
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
Each nested `__init__.py` should re-export workflows from its submodules to ensure
|
|
276
|
+
they are discovered when the top-level module is imported.
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
## Programmatic Configuration
|
|
281
|
+
|
|
282
|
+
For more control, configure PyWorkflow in your Python code:
|
|
283
|
+
|
|
284
|
+
```python
|
|
285
|
+
import pyworkflow
|
|
286
|
+
from pyworkflow.storage import FileStorageBackend
|
|
287
|
+
|
|
288
|
+
# Configure once at application startup
|
|
289
|
+
pyworkflow.configure(
|
|
290
|
+
default_runtime="celery", # or "local"
|
|
291
|
+
default_durable=True,
|
|
292
|
+
storage=FileStorageBackend("./workflow_data"),
|
|
293
|
+
celery_broker="redis://localhost:6379/0",
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# All subsequent start() calls use these defaults
|
|
297
|
+
async def main():
|
|
298
|
+
run_id = await pyworkflow.start(my_workflow, arg1, arg2)
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### Configuration Options
|
|
302
|
+
|
|
303
|
+
| Option | Type | Default | Description |
|
|
304
|
+
|--------|------|---------|-------------|
|
|
305
|
+
| `default_runtime` | `str` | `"local"` | Default runtime: `"local"` or `"celery"` |
|
|
306
|
+
| `default_durable` | `bool` | `False` | Whether workflows are durable by default |
|
|
307
|
+
| `default_retries` | `int` | `3` | Default retry count for steps |
|
|
308
|
+
| `storage` | `StorageBackend` | `None` | Storage backend instance |
|
|
309
|
+
| `celery_broker` | `str` | `None` | Celery broker URL |
|
|
310
|
+
| `aws_region` | `str` | `None` | AWS region (for Lambda runtimes) |
|
|
311
|
+
| `event_soft_limit` | `int` | `10,000` | Event count to start logging warnings |
|
|
312
|
+
| `event_hard_limit` | `int` | `50,000` | Event count to terminate workflow |
|
|
313
|
+
| `event_warning_interval` | `int` | `100` | Events between warnings after soft limit |
|
|
314
|
+
|
|
315
|
+
<Warning>
|
|
316
|
+
**Event limit settings should not be modified** unless you fully understand the implications. See [Limitations](/concepts/limitations) for details.
|
|
317
|
+
</Warning>
|
|
318
|
+
|
|
319
|
+
### Storage Backends
|
|
320
|
+
|
|
321
|
+
```python
|
|
322
|
+
from pyworkflow.storage import FileStorageBackend, InMemoryStorageBackend
|
|
323
|
+
|
|
324
|
+
# File-based storage (persistent)
|
|
325
|
+
pyworkflow.configure(
|
|
326
|
+
storage=FileStorageBackend("./workflow_data")
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# In-memory storage (for testing)
|
|
330
|
+
pyworkflow.configure(
|
|
331
|
+
storage=InMemoryStorageBackend()
|
|
332
|
+
)
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
---
|
|
336
|
+
|
|
337
|
+
## Per-Call Overrides
|
|
338
|
+
|
|
339
|
+
Override configuration for individual `start()` calls:
|
|
340
|
+
|
|
341
|
+
```python
|
|
342
|
+
import pyworkflow
|
|
343
|
+
|
|
344
|
+
# Override runtime for this specific call
|
|
345
|
+
run_id = await pyworkflow.start(
|
|
346
|
+
my_workflow,
|
|
347
|
+
arg1, arg2,
|
|
348
|
+
runtime="local", # Override: run locally instead of Celery
|
|
349
|
+
durable=False, # Override: transient execution
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# Use custom storage for this call
|
|
353
|
+
from pyworkflow.storage import InMemoryStorageBackend
|
|
354
|
+
|
|
355
|
+
run_id = await pyworkflow.start(
|
|
356
|
+
my_workflow,
|
|
357
|
+
arg1, arg2,
|
|
358
|
+
storage=InMemoryStorageBackend(), # Override storage
|
|
359
|
+
)
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
### Parameter Priority Example
|
|
363
|
+
|
|
364
|
+
```python
|
|
365
|
+
import pyworkflow
|
|
366
|
+
|
|
367
|
+
# Global config: runtime="local", durable=False
|
|
368
|
+
pyworkflow.configure(default_runtime="local", default_durable=False)
|
|
369
|
+
|
|
370
|
+
# This call uses local runtime, transient (from config)
|
|
371
|
+
await pyworkflow.start(workflow_a, "arg")
|
|
372
|
+
|
|
373
|
+
# This call overrides to celery runtime, durable
|
|
374
|
+
await pyworkflow.start(workflow_b, "arg", runtime="celery", durable=True)
|
|
375
|
+
|
|
376
|
+
# This call uses local runtime (from config), but durable=True (override)
|
|
377
|
+
await pyworkflow.start(workflow_c, "arg", durable=True)
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
---
|
|
381
|
+
|
|
382
|
+
## Environment Variables
|
|
383
|
+
|
|
384
|
+
Environment variables provide deployment flexibility:
|
|
385
|
+
|
|
386
|
+
| Variable | Description |
|
|
387
|
+
|----------|-------------|
|
|
388
|
+
| `PYWORKFLOW_MODULE` | Module for workflow discovery |
|
|
389
|
+
| `PYWORKFLOW_RUNTIME` | Default runtime (`local` or `celery`) |
|
|
390
|
+
| `PYWORKFLOW_STORAGE_BACKEND` | Storage backend type |
|
|
391
|
+
| `PYWORKFLOW_STORAGE_PATH` | Path for file storage |
|
|
392
|
+
| `PYWORKFLOW_CELERY_BROKER` | Celery broker URL |
|
|
393
|
+
| `PYWORKFLOW_CELERY_RESULT_BACKEND` | Celery result backend URL |
|
|
394
|
+
| `PYWORKFLOW_DISCOVER` | Modules to import for workflow discovery |
|
|
395
|
+
|
|
396
|
+
```bash
|
|
397
|
+
# Example: Production deployment with environment variables
|
|
398
|
+
export PYWORKFLOW_RUNTIME=celery
|
|
399
|
+
export PYWORKFLOW_CELERY_BROKER=redis://redis-cluster:6379/0
|
|
400
|
+
export PYWORKFLOW_STORAGE_PATH=/data/workflows
|
|
401
|
+
|
|
402
|
+
python -m myapp.main
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
---
|
|
406
|
+
|
|
407
|
+
## Configuration Patterns
|
|
408
|
+
|
|
409
|
+
### Development vs Production
|
|
410
|
+
|
|
411
|
+
<Tabs>
|
|
412
|
+
<Tab title="Development">
|
|
413
|
+
```yaml
|
|
414
|
+
# pyworkflow.config.yaml (dev)
|
|
415
|
+
module: myapp.workflows
|
|
416
|
+
runtime: local # Run in-process for fast iteration
|
|
417
|
+
storage:
|
|
418
|
+
backend: memory # No persistence needed
|
|
419
|
+
```
|
|
420
|
+
</Tab>
|
|
421
|
+
<Tab title="Production">
|
|
422
|
+
```yaml
|
|
423
|
+
# pyworkflow.config.yaml (prod)
|
|
424
|
+
module: myapp.workflows
|
|
425
|
+
runtime: celery
|
|
426
|
+
storage:
|
|
427
|
+
backend: file
|
|
428
|
+
path: /data/workflows
|
|
429
|
+
celery:
|
|
430
|
+
broker: redis://redis:6379/0
|
|
431
|
+
result_backend: redis://redis:6379/1
|
|
432
|
+
```
|
|
433
|
+
</Tab>
|
|
434
|
+
</Tabs>
|
|
435
|
+
|
|
436
|
+
### Testing Configuration
|
|
437
|
+
|
|
438
|
+
```python
|
|
439
|
+
import pytest
|
|
440
|
+
import pyworkflow
|
|
441
|
+
from pyworkflow.storage import InMemoryStorageBackend
|
|
442
|
+
|
|
443
|
+
@pytest.fixture(autouse=True)
|
|
444
|
+
def reset_config():
|
|
445
|
+
"""Reset PyWorkflow config before each test."""
|
|
446
|
+
pyworkflow.reset_config()
|
|
447
|
+
pyworkflow.configure(
|
|
448
|
+
default_runtime="local",
|
|
449
|
+
default_durable=True,
|
|
450
|
+
storage=InMemoryStorageBackend(),
|
|
451
|
+
)
|
|
452
|
+
yield
|
|
453
|
+
pyworkflow.reset_config()
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
### Conditional Configuration
|
|
457
|
+
|
|
458
|
+
```python
|
|
459
|
+
import os
|
|
460
|
+
import pyworkflow
|
|
461
|
+
|
|
462
|
+
if os.getenv("ENVIRONMENT") == "production":
|
|
463
|
+
pyworkflow.configure(
|
|
464
|
+
default_runtime="celery",
|
|
465
|
+
default_durable=True,
|
|
466
|
+
celery_broker=os.getenv("CELERY_BROKER_URL"),
|
|
467
|
+
)
|
|
468
|
+
else:
|
|
469
|
+
pyworkflow.configure(
|
|
470
|
+
default_runtime="local",
|
|
471
|
+
default_durable=False,
|
|
472
|
+
)
|
|
473
|
+
```
|
|
474
|
+
|
|
475
|
+
---
|
|
476
|
+
|
|
477
|
+
## Fault Tolerance Settings
|
|
478
|
+
|
|
479
|
+
Configure auto recovery behavior for workflows that experience worker crashes.
|
|
480
|
+
|
|
481
|
+
<Tabs>
|
|
482
|
+
<Tab title="Config File">
|
|
483
|
+
```yaml
|
|
484
|
+
# pyworkflow.config.yaml
|
|
485
|
+
|
|
486
|
+
recovery:
|
|
487
|
+
recover_on_worker_loss: true
|
|
488
|
+
max_recovery_attempts: 5
|
|
489
|
+
```
|
|
490
|
+
</Tab>
|
|
491
|
+
<Tab title="Programmatic">
|
|
492
|
+
```python
|
|
493
|
+
import pyworkflow
|
|
494
|
+
|
|
495
|
+
pyworkflow.configure(
|
|
496
|
+
default_recover_on_worker_loss=True,
|
|
497
|
+
default_max_recovery_attempts=5,
|
|
498
|
+
)
|
|
499
|
+
```
|
|
500
|
+
</Tab>
|
|
501
|
+
<Tab title="Per-Workflow">
|
|
502
|
+
```python
|
|
503
|
+
from pyworkflow import workflow
|
|
504
|
+
|
|
505
|
+
@workflow(
|
|
506
|
+
recover_on_worker_loss=True,
|
|
507
|
+
max_recovery_attempts=3,
|
|
508
|
+
)
|
|
509
|
+
async def resilient_workflow():
|
|
510
|
+
pass
|
|
511
|
+
```
|
|
512
|
+
</Tab>
|
|
513
|
+
</Tabs>
|
|
514
|
+
|
|
515
|
+
### Recovery Options
|
|
516
|
+
|
|
517
|
+
| Option | Type | Default | Description |
|
|
518
|
+
|--------|------|---------|-------------|
|
|
519
|
+
| `recover_on_worker_loss` | `bool` | `True` (durable) | Enable automatic recovery when a worker crashes |
|
|
520
|
+
| `max_recovery_attempts` | `int` | `3` | Maximum number of recovery attempts before marking as failed |
|
|
521
|
+
|
|
522
|
+
<Note>
|
|
523
|
+
For durable workflows, recovery replays events to restore state. For transient workflows, recovery restarts from the beginning. See [Fault Tolerance](/concepts/fault-tolerance) for details.
|
|
524
|
+
</Note>
|
|
525
|
+
|
|
526
|
+
---
|
|
527
|
+
|
|
528
|
+
## Reading Current Configuration
|
|
529
|
+
|
|
530
|
+
Access the current configuration programmatically:
|
|
531
|
+
|
|
532
|
+
```python
|
|
533
|
+
from pyworkflow.config import get_config
|
|
534
|
+
|
|
535
|
+
config = get_config()
|
|
536
|
+
|
|
537
|
+
print(f"Runtime: {config.default_runtime}")
|
|
538
|
+
print(f"Durable: {config.default_durable}")
|
|
539
|
+
print(f"Storage: {config.storage}")
|
|
540
|
+
print(f"Celery Broker: {config.celery_broker}")
|
|
541
|
+
```
|
|
542
|
+
|
|
543
|
+
---
|
|
544
|
+
|
|
545
|
+
## Next Steps
|
|
546
|
+
|
|
547
|
+
<CardGroup cols={2}>
|
|
548
|
+
<Card title="CLI Guide" icon="terminal" href="/guides/cli">
|
|
549
|
+
Learn CLI commands and options.
|
|
550
|
+
</Card>
|
|
551
|
+
<Card title="Celery Runtime" icon="server" href="/guides/celery">
|
|
552
|
+
Configure distributed execution with Celery.
|
|
553
|
+
</Card>
|
|
554
|
+
<Card title="Storage Backends" icon="database" href="/concepts/storage">
|
|
555
|
+
Choose the right storage backend.
|
|
556
|
+
</Card>
|
|
557
|
+
<Card title="Deployment" icon="cloud" href="/guides/deployment">
|
|
558
|
+
Deploy workflows to production.
|
|
559
|
+
</Card>
|
|
560
|
+
</CardGroup>
|
docs/introduction.mdx
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: 'Introduction'
|
|
3
|
+
description: 'Distributed, durable workflow orchestration for Python'
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
<img
|
|
7
|
+
className="block dark:hidden"
|
|
8
|
+
src="/images/hero-light.svg"
|
|
9
|
+
alt="PyWorkflow Hero Light"
|
|
10
|
+
/>
|
|
11
|
+
<img
|
|
12
|
+
className="hidden dark:block"
|
|
13
|
+
src="/images/hero-dark.svg"
|
|
14
|
+
alt="PyWorkflow Hero Dark"
|
|
15
|
+
/>
|
|
16
|
+
|
|
17
|
+
## What is PyWorkflow?
|
|
18
|
+
|
|
19
|
+
PyWorkflow is a workflow orchestration framework created by the team behind [FlowHunt](https://www.flowhunt.io). It enables you to build complex, long-running business processes as simple Python code, handling the hard parts of distributed systems:
|
|
20
|
+
|
|
21
|
+
- **Fault tolerance** - Automatic recovery from failures
|
|
22
|
+
- **Automatic retries** - Configurable retry strategies with backoff
|
|
23
|
+
- **State management** - Event sourcing for complete auditability
|
|
24
|
+
- **Horizontal scaling** - Distribute work across multiple workers
|
|
25
|
+
|
|
26
|
+
## Why PyWorkflow?
|
|
27
|
+
|
|
28
|
+
Building reliable, long-running processes is hard. Traditional approaches require you to manually handle:
|
|
29
|
+
|
|
30
|
+
- What happens when a server restarts mid-process?
|
|
31
|
+
- How do you retry failed operations without duplicating work?
|
|
32
|
+
- How do you pause for hours or days without holding connections?
|
|
33
|
+
- How do you track what happened and when?
|
|
34
|
+
|
|
35
|
+
PyWorkflow solves these problems by treating your workflows as **event-sourced state machines** that can suspend, resume, and recover from any point.
|
|
36
|
+
|
|
37
|
+
## Key Features
|
|
38
|
+
|
|
39
|
+
<CardGroup cols={2}>
|
|
40
|
+
<Card title="Distributed by Default" icon="server">
|
|
41
|
+
All workflows execute across Celery workers for horizontal scaling. Start with one worker, scale to hundreds.
|
|
42
|
+
</Card>
|
|
43
|
+
<Card title="Durable Execution" icon="database">
|
|
44
|
+
Event sourcing ensures workflows can recover from any failure. Every state change is recorded and can be replayed.
|
|
45
|
+
</Card>
|
|
46
|
+
<Card title="Time Travel" icon="clock">
|
|
47
|
+
Sleep for minutes, hours, or days with automatic resumption. Workflows suspend without holding resources.
|
|
48
|
+
</Card>
|
|
49
|
+
<Card title="Fault Tolerant" icon="shield">
|
|
50
|
+
Automatic retries with configurable backoff strategies. Distinguish between transient and permanent failures.
|
|
51
|
+
</Card>
|
|
52
|
+
<Card title="Zero-Resource Suspension" icon="pause">
|
|
53
|
+
Workflows suspend without holding connections, threads, or memory. Resume on any available worker.
|
|
54
|
+
</Card>
|
|
55
|
+
<Card title="Production Ready" icon="rocket">
|
|
56
|
+
Built on battle-tested Celery and Redis. Comprehensive logging and monitoring support.
|
|
57
|
+
</Card>
|
|
58
|
+
</CardGroup>
|
|
59
|
+
|
|
60
|
+
## How It Works
|
|
61
|
+
|
|
62
|
+
PyWorkflow uses **event sourcing** to achieve durable, fault-tolerant execution:
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
┌─────────────────────────────────────────────────────┐
|
|
66
|
+
│ Your Application │
|
|
67
|
+
│ │
|
|
68
|
+
│ start(my_workflow, args) │
|
|
69
|
+
│ │ │
|
|
70
|
+
└─────────┼───────────────────────────────────────────┘
|
|
71
|
+
│
|
|
72
|
+
▼
|
|
73
|
+
┌─────────┐
|
|
74
|
+
│ Redis │ ◄──── Message Broker
|
|
75
|
+
└─────────┘
|
|
76
|
+
│
|
|
77
|
+
├──────┬──────┬──────┐
|
|
78
|
+
▼ ▼ ▼ ▼
|
|
79
|
+
┌──────┐ ┌──────┐ ┌──────┐
|
|
80
|
+
│Worker│ │Worker│ │Worker│ ◄──── Horizontal Scaling
|
|
81
|
+
└──────┘ └──────┘ └──────┘
|
|
82
|
+
│ │ │
|
|
83
|
+
└──────┴──────┘
|
|
84
|
+
│
|
|
85
|
+
▼
|
|
86
|
+
┌──────────┐
|
|
87
|
+
│ Storage │ ◄──── Event Log
|
|
88
|
+
└──────────┘
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
1. **All state changes are recorded as events** in an append-only log
|
|
92
|
+
2. **Deterministic replay** enables workflow resumption from any point
|
|
93
|
+
3. **Complete audit trail** of everything that happened in the workflow
|
|
94
|
+
|
|
95
|
+
## Quick Example
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from pyworkflow import workflow, step, start, sleep
|
|
99
|
+
|
|
100
|
+
@step()
|
|
101
|
+
async def send_welcome_email(user_id: str):
|
|
102
|
+
print(f"Sending welcome email to {user_id}")
|
|
103
|
+
return f"Email sent to {user_id}"
|
|
104
|
+
|
|
105
|
+
@step()
|
|
106
|
+
async def send_tips_email(user_id: str):
|
|
107
|
+
print(f"Sending tips email to {user_id}")
|
|
108
|
+
return f"Tips sent to {user_id}"
|
|
109
|
+
|
|
110
|
+
@workflow()
|
|
111
|
+
async def onboarding_workflow(user_id: str):
|
|
112
|
+
# Send welcome email immediately
|
|
113
|
+
await send_welcome_email(user_id)
|
|
114
|
+
|
|
115
|
+
# Sleep for 1 day - zero resources consumed
|
|
116
|
+
await sleep("1d")
|
|
117
|
+
|
|
118
|
+
# Automatically resumes after 1 day
|
|
119
|
+
await send_tips_email(user_id)
|
|
120
|
+
|
|
121
|
+
return "Onboarding complete"
|
|
122
|
+
|
|
123
|
+
# Start the workflow
|
|
124
|
+
run_id = start(onboarding_workflow, user_id="user_123")
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Use Cases
|
|
128
|
+
|
|
129
|
+
PyWorkflow is ideal for:
|
|
130
|
+
|
|
131
|
+
- **User onboarding flows** - Multi-step processes with delays
|
|
132
|
+
- **Order processing** - Payment, fulfillment, and notification pipelines
|
|
133
|
+
- **Data pipelines** - ETL workflows with retry and monitoring
|
|
134
|
+
- **Scheduled tasks** - Complex scheduling with dependencies
|
|
135
|
+
- **Approval workflows** - Human-in-the-loop processes
|
|
136
|
+
- **Notification sequences** - Drip campaigns and follow-ups
|
|
137
|
+
|
|
138
|
+
## Next Steps
|
|
139
|
+
|
|
140
|
+
<CardGroup cols={2}>
|
|
141
|
+
<Card
|
|
142
|
+
title="Quick Start"
|
|
143
|
+
icon="rocket"
|
|
144
|
+
href="/quickstart"
|
|
145
|
+
>
|
|
146
|
+
Get up and running with PyWorkflow in under 5 minutes.
|
|
147
|
+
</Card>
|
|
148
|
+
<Card
|
|
149
|
+
title="Core Concepts"
|
|
150
|
+
icon="book"
|
|
151
|
+
href="/concepts/workflows"
|
|
152
|
+
>
|
|
153
|
+
Deep dive into workflows, steps, and event sourcing.
|
|
154
|
+
</Card>
|
|
155
|
+
</CardGroup>
|