pyworkflow-engine 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +10 -1
- pyworkflow/celery/tasks.py +272 -24
- pyworkflow/cli/__init__.py +4 -1
- pyworkflow/cli/commands/runs.py +4 -4
- pyworkflow/cli/commands/setup.py +203 -4
- pyworkflow/cli/utils/config_generator.py +76 -3
- pyworkflow/cli/utils/docker_manager.py +232 -0
- pyworkflow/context/__init__.py +13 -0
- pyworkflow/context/base.py +26 -0
- pyworkflow/context/local.py +80 -0
- pyworkflow/context/step_context.py +295 -0
- pyworkflow/core/registry.py +6 -1
- pyworkflow/core/step.py +141 -0
- pyworkflow/core/workflow.py +56 -0
- pyworkflow/engine/events.py +30 -0
- pyworkflow/engine/replay.py +39 -0
- pyworkflow/primitives/child_workflow.py +1 -1
- pyworkflow/runtime/local.py +1 -1
- pyworkflow/storage/__init__.py +14 -0
- pyworkflow/storage/base.py +35 -0
- pyworkflow/storage/cassandra.py +1747 -0
- pyworkflow/storage/config.py +69 -0
- pyworkflow/storage/dynamodb.py +31 -2
- pyworkflow/storage/file.py +28 -0
- pyworkflow/storage/memory.py +18 -0
- pyworkflow/storage/mysql.py +1159 -0
- pyworkflow/storage/postgres.py +27 -2
- pyworkflow/storage/schemas.py +4 -3
- pyworkflow/storage/sqlite.py +25 -2
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/METADATA +7 -4
- pyworkflow_engine-0.1.9.dist-info/RECORD +91 -0
- pyworkflow_engine-0.1.9.dist-info/top_level.txt +1 -0
- dashboard/backend/app/__init__.py +0 -1
- dashboard/backend/app/config.py +0 -32
- dashboard/backend/app/controllers/__init__.py +0 -6
- dashboard/backend/app/controllers/run_controller.py +0 -86
- dashboard/backend/app/controllers/workflow_controller.py +0 -33
- dashboard/backend/app/dependencies/__init__.py +0 -5
- dashboard/backend/app/dependencies/storage.py +0 -50
- dashboard/backend/app/repositories/__init__.py +0 -6
- dashboard/backend/app/repositories/run_repository.py +0 -80
- dashboard/backend/app/repositories/workflow_repository.py +0 -27
- dashboard/backend/app/rest/__init__.py +0 -8
- dashboard/backend/app/rest/v1/__init__.py +0 -12
- dashboard/backend/app/rest/v1/health.py +0 -33
- dashboard/backend/app/rest/v1/runs.py +0 -133
- dashboard/backend/app/rest/v1/workflows.py +0 -41
- dashboard/backend/app/schemas/__init__.py +0 -23
- dashboard/backend/app/schemas/common.py +0 -16
- dashboard/backend/app/schemas/event.py +0 -24
- dashboard/backend/app/schemas/hook.py +0 -25
- dashboard/backend/app/schemas/run.py +0 -54
- dashboard/backend/app/schemas/step.py +0 -28
- dashboard/backend/app/schemas/workflow.py +0 -31
- dashboard/backend/app/server.py +0 -87
- dashboard/backend/app/services/__init__.py +0 -6
- dashboard/backend/app/services/run_service.py +0 -240
- dashboard/backend/app/services/workflow_service.py +0 -155
- dashboard/backend/main.py +0 -18
- docs/concepts/cancellation.mdx +0 -362
- docs/concepts/continue-as-new.mdx +0 -434
- docs/concepts/events.mdx +0 -266
- docs/concepts/fault-tolerance.mdx +0 -370
- docs/concepts/hooks.mdx +0 -552
- docs/concepts/limitations.mdx +0 -167
- docs/concepts/schedules.mdx +0 -775
- docs/concepts/sleep.mdx +0 -312
- docs/concepts/steps.mdx +0 -301
- docs/concepts/workflows.mdx +0 -255
- docs/guides/cli.mdx +0 -942
- docs/guides/configuration.mdx +0 -560
- docs/introduction.mdx +0 -155
- docs/quickstart.mdx +0 -279
- examples/__init__.py +0 -1
- examples/celery/__init__.py +0 -1
- examples/celery/durable/docker-compose.yml +0 -55
- examples/celery/durable/pyworkflow.config.yaml +0 -12
- examples/celery/durable/workflows/__init__.py +0 -122
- examples/celery/durable/workflows/basic.py +0 -87
- examples/celery/durable/workflows/batch_processing.py +0 -102
- examples/celery/durable/workflows/cancellation.py +0 -273
- examples/celery/durable/workflows/child_workflow_patterns.py +0 -240
- examples/celery/durable/workflows/child_workflows.py +0 -202
- examples/celery/durable/workflows/continue_as_new.py +0 -260
- examples/celery/durable/workflows/fault_tolerance.py +0 -210
- examples/celery/durable/workflows/hooks.py +0 -211
- examples/celery/durable/workflows/idempotency.py +0 -112
- examples/celery/durable/workflows/long_running.py +0 -99
- examples/celery/durable/workflows/retries.py +0 -101
- examples/celery/durable/workflows/schedules.py +0 -209
- examples/celery/transient/01_basic_workflow.py +0 -91
- examples/celery/transient/02_fault_tolerance.py +0 -257
- examples/celery/transient/__init__.py +0 -20
- examples/celery/transient/pyworkflow.config.yaml +0 -25
- examples/local/__init__.py +0 -1
- examples/local/durable/01_basic_workflow.py +0 -94
- examples/local/durable/02_file_storage.py +0 -132
- examples/local/durable/03_retries.py +0 -169
- examples/local/durable/04_long_running.py +0 -119
- examples/local/durable/05_event_log.py +0 -145
- examples/local/durable/06_idempotency.py +0 -148
- examples/local/durable/07_hooks.py +0 -334
- examples/local/durable/08_cancellation.py +0 -233
- examples/local/durable/09_child_workflows.py +0 -198
- examples/local/durable/10_child_workflow_patterns.py +0 -265
- examples/local/durable/11_continue_as_new.py +0 -249
- examples/local/durable/12_schedules.py +0 -198
- examples/local/durable/__init__.py +0 -1
- examples/local/transient/01_quick_tasks.py +0 -87
- examples/local/transient/02_retries.py +0 -130
- examples/local/transient/03_sleep.py +0 -141
- examples/local/transient/__init__.py +0 -1
- pyworkflow_engine-0.1.7.dist-info/RECORD +0 -196
- pyworkflow_engine-0.1.7.dist-info/top_level.txt +0 -5
- tests/examples/__init__.py +0 -0
- tests/integration/__init__.py +0 -0
- tests/integration/test_cancellation.py +0 -330
- tests/integration/test_child_workflows.py +0 -439
- tests/integration/test_continue_as_new.py +0 -428
- tests/integration/test_dynamodb_storage.py +0 -1146
- tests/integration/test_fault_tolerance.py +0 -369
- tests/integration/test_schedule_storage.py +0 -484
- tests/unit/__init__.py +0 -0
- tests/unit/backends/__init__.py +0 -1
- tests/unit/backends/test_dynamodb_storage.py +0 -1554
- tests/unit/backends/test_postgres_storage.py +0 -1281
- tests/unit/backends/test_sqlite_storage.py +0 -1460
- tests/unit/conftest.py +0 -41
- tests/unit/test_cancellation.py +0 -364
- tests/unit/test_child_workflows.py +0 -680
- tests/unit/test_continue_as_new.py +0 -441
- tests/unit/test_event_limits.py +0 -316
- tests/unit/test_executor.py +0 -320
- tests/unit/test_fault_tolerance.py +0 -334
- tests/unit/test_hooks.py +0 -495
- tests/unit/test_registry.py +0 -261
- tests/unit/test_replay.py +0 -420
- tests/unit/test_schedule_schemas.py +0 -285
- tests/unit/test_schedule_utils.py +0 -286
- tests/unit/test_scheduled_workflow.py +0 -274
- tests/unit/test_step.py +0 -353
- tests/unit/test_workflow.py +0 -243
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,209 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Celery Durable Workflow - Schedules Example
|
|
3
|
-
|
|
4
|
-
This example demonstrates scheduled workflow execution with Celery Beat.
|
|
5
|
-
- Cron-based scheduling (every minute)
|
|
6
|
-
- Interval-based scheduling (every 30 seconds)
|
|
7
|
-
- Overlap policies to control concurrent executions
|
|
8
|
-
- Schedule management (pause, resume, delete)
|
|
9
|
-
|
|
10
|
-
Prerequisites:
|
|
11
|
-
1. Start Redis: docker run -d -p 6379:6379 redis:7-alpine
|
|
12
|
-
2. Start worker: pyworkflow --module examples.celery.durable.12_schedules worker run
|
|
13
|
-
3. Start beat: pyworkflow --module examples.celery.durable.12_schedules beat run
|
|
14
|
-
|
|
15
|
-
CLI Commands:
|
|
16
|
-
# Create a schedule via CLI
|
|
17
|
-
pyworkflow schedules create metrics_workflow --cron "* * * * *" --overlap skip
|
|
18
|
-
|
|
19
|
-
# List all schedules
|
|
20
|
-
pyworkflow schedules list
|
|
21
|
-
|
|
22
|
-
# Pause/resume a schedule
|
|
23
|
-
pyworkflow schedules pause <schedule_id>
|
|
24
|
-
pyworkflow schedules resume <schedule_id>
|
|
25
|
-
|
|
26
|
-
# Trigger immediately (bypass schedule)
|
|
27
|
-
pyworkflow schedules trigger <schedule_id>
|
|
28
|
-
|
|
29
|
-
# View schedule details
|
|
30
|
-
pyworkflow schedules show <schedule_id>
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
from datetime import datetime
|
|
34
|
-
|
|
35
|
-
from pyworkflow import (
|
|
36
|
-
OverlapPolicy,
|
|
37
|
-
scheduled_workflow,
|
|
38
|
-
step,
|
|
39
|
-
workflow,
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
# --- Steps ---
|
|
44
|
-
@step()
|
|
45
|
-
async def collect_metrics() -> dict:
|
|
46
|
-
"""Collect system metrics."""
|
|
47
|
-
timestamp = datetime.now().isoformat()
|
|
48
|
-
print(f"[Step] Collecting metrics at {timestamp}...")
|
|
49
|
-
return {
|
|
50
|
-
"timestamp": timestamp,
|
|
51
|
-
"cpu_usage": 45.2,
|
|
52
|
-
"memory_usage": 62.8,
|
|
53
|
-
"disk_usage": 78.1,
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
@step()
|
|
58
|
-
async def store_metrics(metrics: dict) -> dict:
|
|
59
|
-
"""Store metrics in database (simulated)."""
|
|
60
|
-
print(f"[Step] Storing metrics: {metrics}")
|
|
61
|
-
return {**metrics, "stored": True}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
@step()
|
|
65
|
-
async def check_alerts(metrics: dict) -> dict:
|
|
66
|
-
"""Check if any metrics exceed thresholds."""
|
|
67
|
-
alerts = []
|
|
68
|
-
if metrics.get("cpu_usage", 0) > 80:
|
|
69
|
-
alerts.append("High CPU usage")
|
|
70
|
-
if metrics.get("memory_usage", 0) > 90:
|
|
71
|
-
alerts.append("High memory usage")
|
|
72
|
-
if metrics.get("disk_usage", 0) > 85:
|
|
73
|
-
alerts.append("High disk usage")
|
|
74
|
-
|
|
75
|
-
print(f"[Step] Alert check complete. Alerts: {alerts or 'None'}")
|
|
76
|
-
return {**metrics, "alerts": alerts}
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
# --- Scheduled Workflow (using decorator) ---
|
|
80
|
-
@scheduled_workflow(
|
|
81
|
-
cron="* * * * *", # Every minute
|
|
82
|
-
overlap_policy=OverlapPolicy.SKIP, # Skip if previous run still active
|
|
83
|
-
timezone="UTC",
|
|
84
|
-
)
|
|
85
|
-
async def metrics_workflow() -> dict:
|
|
86
|
-
"""
|
|
87
|
-
Scheduled metrics collection workflow.
|
|
88
|
-
|
|
89
|
-
Runs every minute via Celery Beat.
|
|
90
|
-
Uses SKIP overlap policy - if a previous run is still active,
|
|
91
|
-
new runs are skipped to prevent resource exhaustion.
|
|
92
|
-
|
|
93
|
-
Steps:
|
|
94
|
-
1. Collect current system metrics
|
|
95
|
-
2. Store metrics in database
|
|
96
|
-
3. Check for threshold alerts
|
|
97
|
-
"""
|
|
98
|
-
metrics = await collect_metrics()
|
|
99
|
-
metrics = await store_metrics(metrics)
|
|
100
|
-
metrics = await check_alerts(metrics)
|
|
101
|
-
return metrics
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
# --- Regular Workflow (for programmatic scheduling) ---
|
|
105
|
-
@workflow()
|
|
106
|
-
async def cleanup_workflow(days_old: int = 30) -> dict:
|
|
107
|
-
"""
|
|
108
|
-
Cleanup old data workflow.
|
|
109
|
-
|
|
110
|
-
This workflow is scheduled programmatically in main().
|
|
111
|
-
"""
|
|
112
|
-
print(f"[Workflow] Cleaning up data older than {days_old} days...")
|
|
113
|
-
return {"cleaned": True, "days_old": days_old}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
async def main() -> None:
|
|
117
|
-
"""
|
|
118
|
-
Create schedules programmatically.
|
|
119
|
-
|
|
120
|
-
The @scheduled_workflow decorator automatically creates a schedule
|
|
121
|
-
when activate_scheduled_workflows() is called (done by Beat).
|
|
122
|
-
|
|
123
|
-
For regular @workflow functions, use create_schedule() to create
|
|
124
|
-
schedules programmatically.
|
|
125
|
-
"""
|
|
126
|
-
import argparse
|
|
127
|
-
|
|
128
|
-
from pyworkflow import (
|
|
129
|
-
OverlapPolicy,
|
|
130
|
-
ScheduleSpec,
|
|
131
|
-
create_schedule,
|
|
132
|
-
delete_schedule,
|
|
133
|
-
list_schedules,
|
|
134
|
-
pause_schedule,
|
|
135
|
-
resume_schedule,
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
parser = argparse.ArgumentParser(description="Schedule Management Example")
|
|
139
|
-
parser.add_argument(
|
|
140
|
-
"--action",
|
|
141
|
-
choices=["create", "list", "pause", "resume", "delete"],
|
|
142
|
-
default="create",
|
|
143
|
-
help="Action to perform",
|
|
144
|
-
)
|
|
145
|
-
parser.add_argument("--schedule-id", help="Schedule ID for pause/resume/delete")
|
|
146
|
-
args = parser.parse_args()
|
|
147
|
-
|
|
148
|
-
print("=== Celery Schedules Example ===\n")
|
|
149
|
-
|
|
150
|
-
if args.action == "create":
|
|
151
|
-
# Create a schedule for the cleanup workflow
|
|
152
|
-
print("Creating cleanup schedule (runs every 2 minutes)...")
|
|
153
|
-
spec = ScheduleSpec(cron="*/2 * * * *", timezone="UTC")
|
|
154
|
-
|
|
155
|
-
schedule = await create_schedule(
|
|
156
|
-
workflow_name="cleanup_workflow",
|
|
157
|
-
spec=spec,
|
|
158
|
-
overlap_policy=OverlapPolicy.SKIP,
|
|
159
|
-
schedule_id="cleanup-hourly",
|
|
160
|
-
days_old=7, # kwargs passed to workflow
|
|
161
|
-
)
|
|
162
|
-
print(f"Schedule created: {schedule.schedule_id}")
|
|
163
|
-
print(f" Workflow: {schedule.workflow_name}")
|
|
164
|
-
print(f" Cron: {schedule.spec.cron}")
|
|
165
|
-
print(f" Next run: {schedule.next_run_time}")
|
|
166
|
-
|
|
167
|
-
# Also show the decorated workflow schedule
|
|
168
|
-
print("\nThe @scheduled_workflow decorator creates:")
|
|
169
|
-
print(" - metrics_workflow: runs every minute")
|
|
170
|
-
print(" - Activated automatically when Beat starts")
|
|
171
|
-
|
|
172
|
-
elif args.action == "list":
|
|
173
|
-
schedules = await list_schedules()
|
|
174
|
-
print(f"Found {len(schedules)} schedule(s):\n")
|
|
175
|
-
for sched in schedules:
|
|
176
|
-
print(f" {sched.schedule_id}")
|
|
177
|
-
print(f" Workflow: {sched.workflow_name}")
|
|
178
|
-
print(f" Status: {sched.status.value}")
|
|
179
|
-
print(f" Spec: cron={sched.spec.cron}, interval={sched.spec.interval}")
|
|
180
|
-
print(f" Total runs: {sched.total_runs}")
|
|
181
|
-
print()
|
|
182
|
-
|
|
183
|
-
elif args.action == "pause" and args.schedule_id:
|
|
184
|
-
schedule = await pause_schedule(args.schedule_id)
|
|
185
|
-
print(f"Paused schedule: {schedule.schedule_id}")
|
|
186
|
-
print(f"Status: {schedule.status.value}")
|
|
187
|
-
|
|
188
|
-
elif args.action == "resume" and args.schedule_id:
|
|
189
|
-
schedule = await resume_schedule(args.schedule_id)
|
|
190
|
-
print(f"Resumed schedule: {schedule.schedule_id}")
|
|
191
|
-
print(f"Status: {schedule.status.value}")
|
|
192
|
-
|
|
193
|
-
elif args.action == "delete" and args.schedule_id:
|
|
194
|
-
await delete_schedule(args.schedule_id)
|
|
195
|
-
print(f"Deleted schedule: {args.schedule_id}")
|
|
196
|
-
|
|
197
|
-
else:
|
|
198
|
-
print("Invalid action or missing schedule-id")
|
|
199
|
-
|
|
200
|
-
print("\n=== How to Run ===")
|
|
201
|
-
print("1. Start worker: pyworkflow --module examples.celery.durable.12_schedules worker run")
|
|
202
|
-
print("2. Start beat: pyworkflow --module examples.celery.durable.12_schedules beat run")
|
|
203
|
-
print("3. Watch logs to see scheduled executions!")
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
if __name__ == "__main__":
|
|
207
|
-
import asyncio
|
|
208
|
-
|
|
209
|
-
asyncio.run(main())
|
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Celery Transient Workflow - Basic Example
|
|
3
|
-
|
|
4
|
-
This example demonstrates a simple transient workflow running on Celery workers.
|
|
5
|
-
|
|
6
|
-
Transient workflows:
|
|
7
|
-
- Do NOT record events
|
|
8
|
-
- Do NOT persist state
|
|
9
|
-
- Are simpler and faster
|
|
10
|
-
- Best for short-lived, stateless tasks
|
|
11
|
-
|
|
12
|
-
Prerequisites:
|
|
13
|
-
1. Start Redis: docker run -d -p 6379:6379 redis:7-alpine
|
|
14
|
-
2. Start worker: pyworkflow --module examples.celery.transient.01_basic_workflow worker run
|
|
15
|
-
|
|
16
|
-
Run with CLI:
|
|
17
|
-
pyworkflow --module examples.celery.transient.01_basic_workflow workflows run quick_task \
|
|
18
|
-
--arg item_id=item-123
|
|
19
|
-
|
|
20
|
-
Note: Since this is transient, runs list and runs status won't show this workflow.
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
import asyncio
|
|
24
|
-
|
|
25
|
-
from pyworkflow import step, workflow
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@step(name="transient_process_item")
|
|
29
|
-
async def process_item(item_id: str) -> dict:
|
|
30
|
-
"""Process a single item."""
|
|
31
|
-
print(f"[Step] Processing item {item_id}...")
|
|
32
|
-
await asyncio.sleep(0.5) # Simulate quick processing
|
|
33
|
-
return {"item_id": item_id, "processed": True}
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
@step(name="transient_enrich_item")
|
|
37
|
-
async def enrich_item(item: dict) -> dict:
|
|
38
|
-
"""Enrich item with additional data."""
|
|
39
|
-
print(f"[Step] Enriching item {item['item_id']}...")
|
|
40
|
-
await asyncio.sleep(0.3)
|
|
41
|
-
return {**item, "enriched": True, "score": 0.95}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
@step(name="transient_store_result")
|
|
45
|
-
async def store_result(item: dict) -> dict:
|
|
46
|
-
"""Store the processed result."""
|
|
47
|
-
print(f"[Step] Storing result for {item['item_id']}...")
|
|
48
|
-
await asyncio.sleep(0.2)
|
|
49
|
-
return {**item, "stored": True}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
@workflow(durable=False, tags=["celery", "transient"]) # Transient workflow - no event recording
|
|
53
|
-
async def quick_task(item_id: str) -> dict:
|
|
54
|
-
"""
|
|
55
|
-
Quick processing task (transient).
|
|
56
|
-
|
|
57
|
-
This workflow runs without event recording for maximum performance.
|
|
58
|
-
Ideal for:
|
|
59
|
-
- High-throughput processing
|
|
60
|
-
- Stateless transformations
|
|
61
|
-
- Quick API calls
|
|
62
|
-
- Tasks that can be safely retried from scratch
|
|
63
|
-
"""
|
|
64
|
-
print(f"\n[Workflow] Quick task for {item_id}")
|
|
65
|
-
|
|
66
|
-
item = await process_item(item_id)
|
|
67
|
-
item = await enrich_item(item)
|
|
68
|
-
item = await store_result(item)
|
|
69
|
-
|
|
70
|
-
print(f"[Workflow] Completed: {item}\n")
|
|
71
|
-
return item
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
async def main() -> None:
|
|
75
|
-
"""Run the transient workflow example."""
|
|
76
|
-
import argparse
|
|
77
|
-
|
|
78
|
-
import pyworkflow
|
|
79
|
-
|
|
80
|
-
parser = argparse.ArgumentParser(description="Quick Processing Task (Transient)")
|
|
81
|
-
parser.add_argument("--item-id", default="item-123", help="Item ID to process")
|
|
82
|
-
args = parser.parse_args()
|
|
83
|
-
|
|
84
|
-
print(f"Starting quick task for {args.item_id}...")
|
|
85
|
-
print("NOTE: This is a transient workflow - no events are recorded")
|
|
86
|
-
run_id = await pyworkflow.start(quick_task, args.item_id)
|
|
87
|
-
print(f"Task dispatched with run_id: {run_id}")
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
if __name__ == "__main__":
|
|
91
|
-
asyncio.run(main())
|
|
@@ -1,257 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Celery Transient Workflow - Fault Tolerance Example
|
|
3
|
-
|
|
4
|
-
This example demonstrates fault tolerance options for transient workflows.
|
|
5
|
-
|
|
6
|
-
Key difference from durable workflows:
|
|
7
|
-
- Transient workflows do NOT record events
|
|
8
|
-
- On worker failure, there's no state to recover from
|
|
9
|
-
- By default, failed transient workflows stay FAILED
|
|
10
|
-
- Optionally, they can be rescheduled to run from scratch
|
|
11
|
-
|
|
12
|
-
Configuration options:
|
|
13
|
-
1. recover_on_worker_loss=False (DEFAULT for transient)
|
|
14
|
-
- On worker crash: workflow is marked as FAILED
|
|
15
|
-
- No automatic retry
|
|
16
|
-
- Use when: tasks have side effects or can't be safely repeated
|
|
17
|
-
|
|
18
|
-
2. recover_on_worker_loss=True
|
|
19
|
-
- On worker crash: workflow is rescheduled from scratch
|
|
20
|
-
- All steps run again (no event replay - there are no events!)
|
|
21
|
-
- Use when: tasks are idempotent and can be safely restarted
|
|
22
|
-
|
|
23
|
-
Prerequisites:
|
|
24
|
-
1. Start Redis: docker run -d -p 6379:6379 redis:7-alpine
|
|
25
|
-
2. Start worker: pyworkflow --module examples.celery.transient.02_fault_tolerance worker run
|
|
26
|
-
|
|
27
|
-
Run with CLI:
|
|
28
|
-
# Default behavior (no recovery)
|
|
29
|
-
pyworkflow --module examples.celery.transient.02_fault_tolerance workflows run image_processor \
|
|
30
|
-
--arg image_id=img-123
|
|
31
|
-
|
|
32
|
-
# With recovery enabled
|
|
33
|
-
pyworkflow --module examples.celery.transient.02_fault_tolerance workflows run batch_processor \
|
|
34
|
-
--arg batch_id=batch-456
|
|
35
|
-
|
|
36
|
-
To test fault tolerance:
|
|
37
|
-
1. Start the workflow
|
|
38
|
-
2. Kill the worker during execution
|
|
39
|
-
3. Start a new worker
|
|
40
|
-
4. Observe the difference between recover_on_worker_loss=True/False
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
import asyncio
|
|
44
|
-
|
|
45
|
-
from pyworkflow import step, workflow
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
@step(name="transient_download_image")
|
|
49
|
-
async def download_image(image_id: str) -> dict:
|
|
50
|
-
"""Download image from storage."""
|
|
51
|
-
print(f"[Step] Downloading image {image_id}...")
|
|
52
|
-
await asyncio.sleep(2)
|
|
53
|
-
return {"image_id": image_id, "size_mb": 5.2, "downloaded": True}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
@step(name="transient_resize_image")
|
|
57
|
-
async def resize_image(image: dict) -> dict:
|
|
58
|
-
"""Resize image to standard dimensions."""
|
|
59
|
-
print(f"[Step] Resizing image {image['image_id']}...")
|
|
60
|
-
print(" (taking 8 seconds - kill worker now to test!)")
|
|
61
|
-
await asyncio.sleep(8) # Long operation - good time to kill worker
|
|
62
|
-
return {**image, "resized": True, "new_size_mb": 1.2}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
@step(name="transient_apply_filters")
|
|
66
|
-
async def apply_filters(image: dict) -> dict:
|
|
67
|
-
"""Apply visual filters to image."""
|
|
68
|
-
print(f"[Step] Applying filters to {image['image_id']}...")
|
|
69
|
-
print(" (taking 6 seconds - kill worker now to test!)")
|
|
70
|
-
await asyncio.sleep(6) # Another good time to kill worker
|
|
71
|
-
return {**image, "filtered": True}
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
@step(name="transient_upload_result")
|
|
75
|
-
async def upload_result(image: dict) -> dict:
|
|
76
|
-
"""Upload processed image."""
|
|
77
|
-
print(f"[Step] Uploading processed {image['image_id']}...")
|
|
78
|
-
await asyncio.sleep(2)
|
|
79
|
-
return {**image, "uploaded": True, "url": f"https://cdn.example.com/{image['image_id']}"}
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
# ============================================================================
|
|
83
|
-
# Workflow 1: No Recovery (Default for Transient)
|
|
84
|
-
# ============================================================================
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
@workflow(
|
|
88
|
-
durable=False,
|
|
89
|
-
recover_on_worker_loss=False, # DEFAULT for transient - no auto-recovery
|
|
90
|
-
tags=["celery", "transient"],
|
|
91
|
-
)
|
|
92
|
-
async def image_processor(image_id: str) -> dict:
|
|
93
|
-
"""
|
|
94
|
-
Image processing workflow - NO AUTO-RECOVERY.
|
|
95
|
-
|
|
96
|
-
This is the default behavior for transient workflows.
|
|
97
|
-
|
|
98
|
-
If a worker crashes during execution:
|
|
99
|
-
- The workflow is marked as FAILED
|
|
100
|
-
- No automatic retry occurs
|
|
101
|
-
- A new workflow must be manually started
|
|
102
|
-
|
|
103
|
-
Why use this:
|
|
104
|
-
- The upload step has side effects (can't safely repeat)
|
|
105
|
-
- Need manual review of failures
|
|
106
|
-
- Each image should only be processed once
|
|
107
|
-
"""
|
|
108
|
-
print(f"\n{'=' * 60}")
|
|
109
|
-
print(f"Image Processor (NO RECOVERY): {image_id}")
|
|
110
|
-
print("If worker crashes, workflow will FAIL permanently")
|
|
111
|
-
print(f"{'=' * 60}\n")
|
|
112
|
-
|
|
113
|
-
image = await download_image(image_id)
|
|
114
|
-
image = await resize_image(image)
|
|
115
|
-
image = await apply_filters(image)
|
|
116
|
-
image = await upload_result(image)
|
|
117
|
-
|
|
118
|
-
print(f"\n[Complete] Image available at: {image['url']}\n")
|
|
119
|
-
return image
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
# ============================================================================
|
|
123
|
-
# Workflow 2: With Recovery (Restart from Scratch)
|
|
124
|
-
# ============================================================================
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
@step(name="transient_fetch_batch_items")
|
|
128
|
-
async def fetch_batch_items(batch_id: str) -> dict:
|
|
129
|
-
"""Fetch items in a batch."""
|
|
130
|
-
print(f"[Step] Fetching batch {batch_id}...")
|
|
131
|
-
await asyncio.sleep(2)
|
|
132
|
-
return {"batch_id": batch_id, "items": ["a", "b", "c", "d", "e"], "fetched": True}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
@step(name="transient_process_batch_items")
|
|
136
|
-
async def process_batch_items(batch: dict) -> dict:
|
|
137
|
-
"""Process all items in batch (idempotent)."""
|
|
138
|
-
print(f"[Step] Processing {len(batch['items'])} items (kill worker during this step!)...")
|
|
139
|
-
for i, item in enumerate(batch["items"]):
|
|
140
|
-
print(f" Processing item {item} ({i + 1}/{len(batch['items'])})...")
|
|
141
|
-
await asyncio.sleep(3) # 3 seconds per item - plenty of time to kill worker
|
|
142
|
-
return {**batch, "processed": True, "processed_count": len(batch["items"])}
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
@step(name="transient_generate_report")
|
|
146
|
-
async def generate_report(batch: dict) -> dict:
|
|
147
|
-
"""Generate processing report (idempotent)."""
|
|
148
|
-
print(f"[Step] Generating report for batch {batch['batch_id']}...")
|
|
149
|
-
await asyncio.sleep(0.5)
|
|
150
|
-
return {
|
|
151
|
-
**batch,
|
|
152
|
-
"report": f"Processed {batch['processed_count']} items successfully",
|
|
153
|
-
"reported": True,
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
@workflow(
|
|
158
|
-
durable=False,
|
|
159
|
-
recover_on_worker_loss=True, # Enable recovery - restarts from scratch
|
|
160
|
-
max_recovery_attempts=3, # Allow up to 3 restarts
|
|
161
|
-
tags=["celery", "transient"],
|
|
162
|
-
)
|
|
163
|
-
async def batch_processor(batch_id: str) -> dict:
|
|
164
|
-
"""
|
|
165
|
-
Batch processing workflow - WITH AUTO-RECOVERY.
|
|
166
|
-
|
|
167
|
-
This transient workflow will restart from scratch on worker failure.
|
|
168
|
-
|
|
169
|
-
If a worker crashes during execution:
|
|
170
|
-
- A WORKFLOW_INTERRUPTED event is recorded (even for transient!)
|
|
171
|
-
- The workflow restarts from the beginning
|
|
172
|
-
- All steps run again (no event replay for transient)
|
|
173
|
-
- Up to 3 recovery attempts allowed
|
|
174
|
-
|
|
175
|
-
Why use this:
|
|
176
|
-
- All steps are idempotent (safe to repeat)
|
|
177
|
-
- Processing can be safely restarted
|
|
178
|
-
- Better reliability for batch jobs
|
|
179
|
-
- Items are processed atomically (all or nothing)
|
|
180
|
-
|
|
181
|
-
Note: For transient workflows, recovery means RESTART, not RESUME.
|
|
182
|
-
Unlike durable workflows, there are no events to replay.
|
|
183
|
-
"""
|
|
184
|
-
print(f"\n{'=' * 60}")
|
|
185
|
-
print(f"Batch Processor (WITH RECOVERY): {batch_id}")
|
|
186
|
-
print("If worker crashes, workflow will RESTART from scratch")
|
|
187
|
-
print(f"{'=' * 60}\n")
|
|
188
|
-
|
|
189
|
-
batch = await fetch_batch_items(batch_id)
|
|
190
|
-
batch = await process_batch_items(batch)
|
|
191
|
-
batch = await generate_report(batch)
|
|
192
|
-
|
|
193
|
-
print(f"\n[Complete] {batch['report']}\n")
|
|
194
|
-
return batch
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
# ============================================================================
|
|
198
|
-
# Comparison Helper
|
|
199
|
-
# ============================================================================
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
async def main() -> None:
|
|
203
|
-
"""Run the transient fault tolerance examples."""
|
|
204
|
-
import argparse
|
|
205
|
-
|
|
206
|
-
import pyworkflow
|
|
207
|
-
|
|
208
|
-
parser = argparse.ArgumentParser(
|
|
209
|
-
description="Transient Workflow Fault Tolerance Examples",
|
|
210
|
-
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
211
|
-
epilog="""
|
|
212
|
-
Examples:
|
|
213
|
-
# Run image processor (no recovery on failure)
|
|
214
|
-
python 02_fault_tolerance.py --workflow image --id img-123
|
|
215
|
-
|
|
216
|
-
# Run batch processor (restarts on failure)
|
|
217
|
-
python 02_fault_tolerance.py --workflow batch --id batch-456
|
|
218
|
-
|
|
219
|
-
To test:
|
|
220
|
-
1. Start the workflow
|
|
221
|
-
2. Kill the worker (Ctrl+C) during processing
|
|
222
|
-
3. Start a new worker
|
|
223
|
-
4. Observe: image_processor stays FAILED, batch_processor restarts
|
|
224
|
-
""",
|
|
225
|
-
)
|
|
226
|
-
parser.add_argument(
|
|
227
|
-
"--workflow",
|
|
228
|
-
choices=["image", "batch"],
|
|
229
|
-
default="batch",
|
|
230
|
-
help="Which workflow to run",
|
|
231
|
-
)
|
|
232
|
-
parser.add_argument("--id", default="test-001", help="ID for the workflow")
|
|
233
|
-
args = parser.parse_args()
|
|
234
|
-
|
|
235
|
-
print("\n" + "=" * 60)
|
|
236
|
-
print("TRANSIENT WORKFLOW FAULT TOLERANCE DEMO")
|
|
237
|
-
print("=" * 60)
|
|
238
|
-
|
|
239
|
-
if args.workflow == "image":
|
|
240
|
-
print("\nRunning: image_processor (recover_on_worker_loss=False)")
|
|
241
|
-
print("Behavior: On worker crash -> FAILED (no recovery)")
|
|
242
|
-
run_id = await pyworkflow.start(image_processor, args.id)
|
|
243
|
-
else:
|
|
244
|
-
print("\nRunning: batch_processor (recover_on_worker_loss=True)")
|
|
245
|
-
print("Behavior: On worker crash -> RESTART from scratch")
|
|
246
|
-
run_id = await pyworkflow.start(batch_processor, args.id)
|
|
247
|
-
|
|
248
|
-
print(f"\nWorkflow dispatched with run_id: {run_id}")
|
|
249
|
-
print("\nTo test fault tolerance:")
|
|
250
|
-
print(" 1. Watch the worker output")
|
|
251
|
-
print(" 2. Kill the worker during processing (Ctrl+C)")
|
|
252
|
-
print(" 3. Start a new worker")
|
|
253
|
-
print(" 4. Observe the recovery behavior")
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
if __name__ == "__main__":
|
|
257
|
-
asyncio.run(main())
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Celery Transient Workflow Examples
|
|
3
|
-
|
|
4
|
-
These examples demonstrate transient (non-durable) workflows running on Celery workers.
|
|
5
|
-
|
|
6
|
-
Transient workflows:
|
|
7
|
-
- Do NOT persist state to storage
|
|
8
|
-
- Do NOT record events
|
|
9
|
-
- Cannot be resumed after suspension
|
|
10
|
-
- Are simpler and faster for short-lived tasks
|
|
11
|
-
|
|
12
|
-
Key differences from durable workflows:
|
|
13
|
-
| Feature | Durable | Transient |
|
|
14
|
-
|-----------------------|-------------------|-------------------|
|
|
15
|
-
| Event recording | Yes | No |
|
|
16
|
-
| State persistence | Yes | No |
|
|
17
|
-
| Resumable after crash | Yes (from events) | No (starts fresh) |
|
|
18
|
-
| Sleep behavior | Suspends workflow | Blocks inline |
|
|
19
|
-
| Best for | Long-running | Quick tasks |
|
|
20
|
-
"""
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
# PyWorkflow Configuration for Celery Transient Examples
|
|
2
|
-
#
|
|
3
|
-
# This file configures the pyworkflow CLI when running from this directory.
|
|
4
|
-
# Simply run: pyworkflow worker run
|
|
5
|
-
#
|
|
6
|
-
# Priority order:
|
|
7
|
-
# 1. --module CLI argument
|
|
8
|
-
# 2. PYWORKFLOW_DISCOVER environment variable
|
|
9
|
-
# 3. This config file (pyworkflow.config.yaml)
|
|
10
|
-
|
|
11
|
-
# Module containing workflow definitions
|
|
12
|
-
module: examples.celery.transient
|
|
13
|
-
|
|
14
|
-
# Runtime configuration
|
|
15
|
-
runtime: celery
|
|
16
|
-
|
|
17
|
-
# Storage is minimal for transient workflows
|
|
18
|
-
# (only used for tracking run IDs, not for event sourcing)
|
|
19
|
-
storage:
|
|
20
|
-
type: memory
|
|
21
|
-
|
|
22
|
-
# Celery broker and result backend
|
|
23
|
-
celery:
|
|
24
|
-
broker: redis://localhost:6379/0
|
|
25
|
-
result_backend: redis://localhost:6379/1
|
examples/local/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
# PyWorkflow Local Examples Package
|