pyworkflow-engine 0.1.7__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +10 -1
- pyworkflow/celery/tasks.py +272 -24
- pyworkflow/cli/__init__.py +4 -1
- pyworkflow/cli/commands/runs.py +4 -4
- pyworkflow/cli/commands/setup.py +203 -4
- pyworkflow/cli/utils/config_generator.py +76 -3
- pyworkflow/cli/utils/docker_manager.py +232 -0
- pyworkflow/config.py +94 -17
- pyworkflow/context/__init__.py +13 -0
- pyworkflow/context/base.py +26 -0
- pyworkflow/context/local.py +80 -0
- pyworkflow/context/step_context.py +295 -0
- pyworkflow/core/registry.py +6 -1
- pyworkflow/core/step.py +141 -0
- pyworkflow/core/workflow.py +56 -0
- pyworkflow/engine/events.py +30 -0
- pyworkflow/engine/replay.py +39 -0
- pyworkflow/primitives/child_workflow.py +1 -1
- pyworkflow/runtime/local.py +1 -1
- pyworkflow/storage/__init__.py +14 -0
- pyworkflow/storage/base.py +35 -0
- pyworkflow/storage/cassandra.py +1747 -0
- pyworkflow/storage/config.py +69 -0
- pyworkflow/storage/dynamodb.py +31 -2
- pyworkflow/storage/file.py +28 -0
- pyworkflow/storage/memory.py +18 -0
- pyworkflow/storage/mysql.py +1159 -0
- pyworkflow/storage/postgres.py +27 -2
- pyworkflow/storage/schemas.py +4 -3
- pyworkflow/storage/sqlite.py +25 -2
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/METADATA +7 -4
- pyworkflow_engine-0.1.10.dist-info/RECORD +91 -0
- pyworkflow_engine-0.1.10.dist-info/top_level.txt +1 -0
- dashboard/backend/app/__init__.py +0 -1
- dashboard/backend/app/config.py +0 -32
- dashboard/backend/app/controllers/__init__.py +0 -6
- dashboard/backend/app/controllers/run_controller.py +0 -86
- dashboard/backend/app/controllers/workflow_controller.py +0 -33
- dashboard/backend/app/dependencies/__init__.py +0 -5
- dashboard/backend/app/dependencies/storage.py +0 -50
- dashboard/backend/app/repositories/__init__.py +0 -6
- dashboard/backend/app/repositories/run_repository.py +0 -80
- dashboard/backend/app/repositories/workflow_repository.py +0 -27
- dashboard/backend/app/rest/__init__.py +0 -8
- dashboard/backend/app/rest/v1/__init__.py +0 -12
- dashboard/backend/app/rest/v1/health.py +0 -33
- dashboard/backend/app/rest/v1/runs.py +0 -133
- dashboard/backend/app/rest/v1/workflows.py +0 -41
- dashboard/backend/app/schemas/__init__.py +0 -23
- dashboard/backend/app/schemas/common.py +0 -16
- dashboard/backend/app/schemas/event.py +0 -24
- dashboard/backend/app/schemas/hook.py +0 -25
- dashboard/backend/app/schemas/run.py +0 -54
- dashboard/backend/app/schemas/step.py +0 -28
- dashboard/backend/app/schemas/workflow.py +0 -31
- dashboard/backend/app/server.py +0 -87
- dashboard/backend/app/services/__init__.py +0 -6
- dashboard/backend/app/services/run_service.py +0 -240
- dashboard/backend/app/services/workflow_service.py +0 -155
- dashboard/backend/main.py +0 -18
- docs/concepts/cancellation.mdx +0 -362
- docs/concepts/continue-as-new.mdx +0 -434
- docs/concepts/events.mdx +0 -266
- docs/concepts/fault-tolerance.mdx +0 -370
- docs/concepts/hooks.mdx +0 -552
- docs/concepts/limitations.mdx +0 -167
- docs/concepts/schedules.mdx +0 -775
- docs/concepts/sleep.mdx +0 -312
- docs/concepts/steps.mdx +0 -301
- docs/concepts/workflows.mdx +0 -255
- docs/guides/cli.mdx +0 -942
- docs/guides/configuration.mdx +0 -560
- docs/introduction.mdx +0 -155
- docs/quickstart.mdx +0 -279
- examples/__init__.py +0 -1
- examples/celery/__init__.py +0 -1
- examples/celery/durable/docker-compose.yml +0 -55
- examples/celery/durable/pyworkflow.config.yaml +0 -12
- examples/celery/durable/workflows/__init__.py +0 -122
- examples/celery/durable/workflows/basic.py +0 -87
- examples/celery/durable/workflows/batch_processing.py +0 -102
- examples/celery/durable/workflows/cancellation.py +0 -273
- examples/celery/durable/workflows/child_workflow_patterns.py +0 -240
- examples/celery/durable/workflows/child_workflows.py +0 -202
- examples/celery/durable/workflows/continue_as_new.py +0 -260
- examples/celery/durable/workflows/fault_tolerance.py +0 -210
- examples/celery/durable/workflows/hooks.py +0 -211
- examples/celery/durable/workflows/idempotency.py +0 -112
- examples/celery/durable/workflows/long_running.py +0 -99
- examples/celery/durable/workflows/retries.py +0 -101
- examples/celery/durable/workflows/schedules.py +0 -209
- examples/celery/transient/01_basic_workflow.py +0 -91
- examples/celery/transient/02_fault_tolerance.py +0 -257
- examples/celery/transient/__init__.py +0 -20
- examples/celery/transient/pyworkflow.config.yaml +0 -25
- examples/local/__init__.py +0 -1
- examples/local/durable/01_basic_workflow.py +0 -94
- examples/local/durable/02_file_storage.py +0 -132
- examples/local/durable/03_retries.py +0 -169
- examples/local/durable/04_long_running.py +0 -119
- examples/local/durable/05_event_log.py +0 -145
- examples/local/durable/06_idempotency.py +0 -148
- examples/local/durable/07_hooks.py +0 -334
- examples/local/durable/08_cancellation.py +0 -233
- examples/local/durable/09_child_workflows.py +0 -198
- examples/local/durable/10_child_workflow_patterns.py +0 -265
- examples/local/durable/11_continue_as_new.py +0 -249
- examples/local/durable/12_schedules.py +0 -198
- examples/local/durable/__init__.py +0 -1
- examples/local/transient/01_quick_tasks.py +0 -87
- examples/local/transient/02_retries.py +0 -130
- examples/local/transient/03_sleep.py +0 -141
- examples/local/transient/__init__.py +0 -1
- pyworkflow_engine-0.1.7.dist-info/RECORD +0 -196
- pyworkflow_engine-0.1.7.dist-info/top_level.txt +0 -5
- tests/examples/__init__.py +0 -0
- tests/integration/__init__.py +0 -0
- tests/integration/test_cancellation.py +0 -330
- tests/integration/test_child_workflows.py +0 -439
- tests/integration/test_continue_as_new.py +0 -428
- tests/integration/test_dynamodb_storage.py +0 -1146
- tests/integration/test_fault_tolerance.py +0 -369
- tests/integration/test_schedule_storage.py +0 -484
- tests/unit/__init__.py +0 -0
- tests/unit/backends/__init__.py +0 -1
- tests/unit/backends/test_dynamodb_storage.py +0 -1554
- tests/unit/backends/test_postgres_storage.py +0 -1281
- tests/unit/backends/test_sqlite_storage.py +0 -1460
- tests/unit/conftest.py +0 -41
- tests/unit/test_cancellation.py +0 -364
- tests/unit/test_child_workflows.py +0 -680
- tests/unit/test_continue_as_new.py +0 -441
- tests/unit/test_event_limits.py +0 -316
- tests/unit/test_executor.py +0 -320
- tests/unit/test_fault_tolerance.py +0 -334
- tests/unit/test_hooks.py +0 -495
- tests/unit/test_registry.py +0 -261
- tests/unit/test_replay.py +0 -420
- tests/unit/test_schedule_schemas.py +0 -285
- tests/unit/test_schedule_utils.py +0 -286
- tests/unit/test_scheduled_workflow.py +0 -274
- tests/unit/test_step.py +0 -353
- tests/unit/test_workflow.py +0 -243
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,202 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Celery Durable Workflow - Child Workflows Basic Example
|
|
3
|
-
|
|
4
|
-
This example demonstrates how to spawn child workflows from a parent workflow.
|
|
5
|
-
- Parent workflow spawns child workflows using start_child_workflow()
|
|
6
|
-
- Children have their own run_id and event history
|
|
7
|
-
- wait_for_completion=True (default) waits for child to complete
|
|
8
|
-
- wait_for_completion=False returns a ChildWorkflowHandle immediately
|
|
9
|
-
- TERMINATE policy: when parent completes/fails/cancels, children are cancelled
|
|
10
|
-
|
|
11
|
-
Prerequisites:
|
|
12
|
-
1. Start Redis: docker run -d -p 6379:6379 redis:7-alpine
|
|
13
|
-
2. Start worker: pyworkflow --module examples.celery.durable.09_child_workflows worker run
|
|
14
|
-
|
|
15
|
-
Run with CLI:
|
|
16
|
-
pyworkflow --module examples.celery.durable.09_child_workflows workflows run order_fulfillment_workflow \
|
|
17
|
-
--arg order_id=order-456 --arg amount=149.99 --arg customer_email=customer@example.com
|
|
18
|
-
|
|
19
|
-
Check status:
|
|
20
|
-
pyworkflow runs list
|
|
21
|
-
pyworkflow runs status <run_id>
|
|
22
|
-
pyworkflow runs children <run_id>
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
from pyworkflow import (
|
|
26
|
-
ChildWorkflowHandle,
|
|
27
|
-
start_child_workflow,
|
|
28
|
-
step,
|
|
29
|
-
workflow,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
# --- Steps ---
|
|
34
|
-
@step(name="child_demo_validate_order")
|
|
35
|
-
async def validate_order(order_id: str) -> dict:
|
|
36
|
-
"""Validate order details."""
|
|
37
|
-
print(f" Validating order {order_id}...")
|
|
38
|
-
return {"order_id": order_id, "valid": True}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
@step(name="child_demo_process_payment")
|
|
42
|
-
async def process_payment(order_id: str, amount: float) -> dict:
|
|
43
|
-
"""Process payment for order."""
|
|
44
|
-
print(f" Processing payment ${amount:.2f} for {order_id}...")
|
|
45
|
-
return {"order_id": order_id, "amount": amount, "paid": True}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
@step(name="child_demo_ship_order")
|
|
49
|
-
async def ship_order(order_id: str) -> dict:
|
|
50
|
-
"""Ship the order."""
|
|
51
|
-
print(f" Shipping order {order_id}...")
|
|
52
|
-
return {"order_id": order_id, "shipped": True}
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
@step(name="child_demo_send_email")
|
|
56
|
-
async def send_email(recipient: str, subject: str) -> dict:
|
|
57
|
-
"""Send an email notification."""
|
|
58
|
-
print(f" Sending email to {recipient}: {subject}")
|
|
59
|
-
return {"recipient": recipient, "subject": subject, "sent": True}
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
# --- Child Workflows ---
|
|
63
|
-
@workflow(name="child_demo_payment_workflow", tags=["celery", "durable"])
|
|
64
|
-
async def payment_workflow(order_id: str, amount: float) -> dict:
|
|
65
|
-
"""Child workflow for payment processing."""
|
|
66
|
-
print(f" [PaymentWorkflow] Starting for order {order_id}")
|
|
67
|
-
result = await process_payment(order_id, amount)
|
|
68
|
-
print(f" [PaymentWorkflow] Completed for order {order_id}")
|
|
69
|
-
return result
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
@workflow(name="child_demo_shipping_workflow", tags=["celery", "durable"])
|
|
73
|
-
async def shipping_workflow(order_id: str) -> dict:
|
|
74
|
-
"""Child workflow for shipping."""
|
|
75
|
-
print(f" [ShippingWorkflow] Starting for order {order_id}")
|
|
76
|
-
result = await ship_order(order_id)
|
|
77
|
-
print(f" [ShippingWorkflow] Completed for order {order_id}")
|
|
78
|
-
return result
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
@workflow(name="child_demo_notification_workflow", tags=["celery", "durable"])
|
|
82
|
-
async def notification_workflow(email: str, order_id: str) -> dict:
|
|
83
|
-
"""Child workflow for sending notifications."""
|
|
84
|
-
print(f" [NotificationWorkflow] Starting for {email}")
|
|
85
|
-
result = await send_email(email, f"Order {order_id} update")
|
|
86
|
-
print(f" [NotificationWorkflow] Completed for {email}")
|
|
87
|
-
return result
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
# --- Parent Workflow ---
|
|
91
|
-
@workflow(tags=["celery", "durable"])
|
|
92
|
-
async def order_fulfillment_workflow(
|
|
93
|
-
order_id: str,
|
|
94
|
-
amount: float,
|
|
95
|
-
customer_email: str,
|
|
96
|
-
) -> dict:
|
|
97
|
-
"""
|
|
98
|
-
Parent workflow that orchestrates order fulfillment using child workflows.
|
|
99
|
-
|
|
100
|
-
This demonstrates:
|
|
101
|
-
1. wait_for_completion=True - Wait for child to complete (default)
|
|
102
|
-
2. wait_for_completion=False - Fire-and-forget with handle
|
|
103
|
-
"""
|
|
104
|
-
print(f"[OrderFulfillment] Starting for order {order_id}")
|
|
105
|
-
|
|
106
|
-
# Step 1: Validate order (regular step)
|
|
107
|
-
validation = await validate_order(order_id)
|
|
108
|
-
if not validation["valid"]:
|
|
109
|
-
return {"order_id": order_id, "status": "invalid"}
|
|
110
|
-
|
|
111
|
-
# Step 2: Process payment via child workflow (wait for completion)
|
|
112
|
-
print("[OrderFulfillment] Starting payment child workflow...")
|
|
113
|
-
payment_result = await start_child_workflow(
|
|
114
|
-
payment_workflow,
|
|
115
|
-
order_id,
|
|
116
|
-
amount,
|
|
117
|
-
wait_for_completion=True, # Default: wait for child to complete
|
|
118
|
-
)
|
|
119
|
-
print(f"[OrderFulfillment] Payment completed: {payment_result}")
|
|
120
|
-
|
|
121
|
-
# Step 3: Ship order via child workflow (wait for completion)
|
|
122
|
-
print("[OrderFulfillment] Starting shipping child workflow...")
|
|
123
|
-
shipping_result = await start_child_workflow(
|
|
124
|
-
shipping_workflow,
|
|
125
|
-
order_id,
|
|
126
|
-
wait_for_completion=True,
|
|
127
|
-
)
|
|
128
|
-
print(f"[OrderFulfillment] Shipping completed: {shipping_result}")
|
|
129
|
-
|
|
130
|
-
# Step 4: Send notification via fire-and-forget child workflow
|
|
131
|
-
# This returns immediately with a handle, parent continues
|
|
132
|
-
print("[OrderFulfillment] Starting notification child workflow (fire-and-forget)...")
|
|
133
|
-
notification_handle: ChildWorkflowHandle = await start_child_workflow(
|
|
134
|
-
notification_workflow,
|
|
135
|
-
customer_email,
|
|
136
|
-
order_id,
|
|
137
|
-
wait_for_completion=False, # Fire-and-forget
|
|
138
|
-
)
|
|
139
|
-
print(f"[OrderFulfillment] Notification child started: {notification_handle.child_run_id}")
|
|
140
|
-
|
|
141
|
-
result = {
|
|
142
|
-
"order_id": order_id,
|
|
143
|
-
"status": "fulfilled",
|
|
144
|
-
"payment": payment_result,
|
|
145
|
-
"shipping": shipping_result,
|
|
146
|
-
"notification_run_id": notification_handle.child_run_id,
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
print(f"[OrderFulfillment] Completed for order {order_id}")
|
|
150
|
-
return result
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
async def main() -> None:
|
|
154
|
-
"""Run the order fulfillment workflow example."""
|
|
155
|
-
import argparse
|
|
156
|
-
import asyncio
|
|
157
|
-
|
|
158
|
-
import pyworkflow
|
|
159
|
-
from pyworkflow import get_workflow_run
|
|
160
|
-
|
|
161
|
-
parser = argparse.ArgumentParser(description="Order Fulfillment Workflow with Child Workflows")
|
|
162
|
-
parser.add_argument("--order-id", default="order-456", help="Order ID to process")
|
|
163
|
-
parser.add_argument("--amount", type=float, default=149.99, help="Order amount")
|
|
164
|
-
parser.add_argument("--email", default="customer@example.com", help="Customer email")
|
|
165
|
-
args = parser.parse_args()
|
|
166
|
-
|
|
167
|
-
print("=== Child Workflows - Basic Example ===\n")
|
|
168
|
-
print("Running order fulfillment workflow with child workflows...\n")
|
|
169
|
-
|
|
170
|
-
# Start parent workflow
|
|
171
|
-
run_id = await pyworkflow.start(
|
|
172
|
-
order_fulfillment_workflow,
|
|
173
|
-
args.order_id,
|
|
174
|
-
args.amount,
|
|
175
|
-
args.email,
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
print(f"\nWorkflow started with run_id: {run_id}")
|
|
179
|
-
print("\nCheck status:")
|
|
180
|
-
print(f" pyworkflow runs status {run_id}")
|
|
181
|
-
print(f" pyworkflow runs children {run_id}")
|
|
182
|
-
|
|
183
|
-
# Poll for completion
|
|
184
|
-
print("\nWaiting for workflow to complete...")
|
|
185
|
-
for _ in range(30):
|
|
186
|
-
await asyncio.sleep(1)
|
|
187
|
-
run = await get_workflow_run(run_id)
|
|
188
|
-
if run.status.value in ("completed", "failed", "cancelled"):
|
|
189
|
-
print(f"\nWorkflow {run.status.value}!")
|
|
190
|
-
if run.result:
|
|
191
|
-
print(f"Result: {run.result}")
|
|
192
|
-
if run.error:
|
|
193
|
-
print(f"Error: {run.error}")
|
|
194
|
-
break
|
|
195
|
-
else:
|
|
196
|
-
print("\nTimeout waiting for workflow completion")
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
if __name__ == "__main__":
|
|
200
|
-
import asyncio
|
|
201
|
-
|
|
202
|
-
asyncio.run(main())
|
|
@@ -1,260 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Durable Workflow (Celery) - Continue-As-New
|
|
3
|
-
|
|
4
|
-
This example demonstrates continue_as_new() with Celery workers:
|
|
5
|
-
- Polling workflows that need fresh event history
|
|
6
|
-
- Batch processing with continuation
|
|
7
|
-
- Tracking workflow chains across distributed workers
|
|
8
|
-
|
|
9
|
-
Prerequisites:
|
|
10
|
-
1. Redis running: docker run -d -p 6379:6379 redis
|
|
11
|
-
2. Start Celery worker:
|
|
12
|
-
celery -A pyworkflow.celery.tasks worker -Q pyworkflow.workflows,pyworkflow.steps,pyworkflow.schedules -l info
|
|
13
|
-
|
|
14
|
-
Run: python examples/celery/durable/11_continue_as_new.py
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
import asyncio
|
|
18
|
-
|
|
19
|
-
from pyworkflow import (
|
|
20
|
-
configure,
|
|
21
|
-
continue_as_new,
|
|
22
|
-
get_workflow_chain,
|
|
23
|
-
reset_config,
|
|
24
|
-
start,
|
|
25
|
-
step,
|
|
26
|
-
workflow,
|
|
27
|
-
)
|
|
28
|
-
from pyworkflow.storage import FileStorageBackend
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
# --- Steps ---
|
|
32
|
-
@step()
|
|
33
|
-
async def fetch_batch(offset: int, batch_size: int) -> list:
|
|
34
|
-
"""Fetch a batch of items to process."""
|
|
35
|
-
# Simulate fetching items - returns empty when done
|
|
36
|
-
total_items = 50 # Simulate 50 total items
|
|
37
|
-
if offset >= total_items:
|
|
38
|
-
return []
|
|
39
|
-
end = min(offset + batch_size, total_items)
|
|
40
|
-
items = list(range(offset, end))
|
|
41
|
-
print(f" [Step] Fetched items {offset} to {end - 1}")
|
|
42
|
-
await asyncio.sleep(0.1) # Simulate I/O
|
|
43
|
-
return items
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
@step(name="continue_process_item")
|
|
47
|
-
async def process_item(item: int) -> dict:
|
|
48
|
-
"""Process a single item."""
|
|
49
|
-
await asyncio.sleep(0.05) # Simulate work
|
|
50
|
-
return {"item": item, "processed": True}
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
@step()
|
|
54
|
-
async def poll_for_messages(cursor: str | None) -> tuple[str | None, list]:
|
|
55
|
-
"""Poll message queue for new messages."""
|
|
56
|
-
# Simulate message queue polling
|
|
57
|
-
await asyncio.sleep(0.1)
|
|
58
|
-
|
|
59
|
-
if cursor is None:
|
|
60
|
-
return "msg_batch_1", [{"id": 1, "type": "order"}, {"id": 2, "type": "payment"}]
|
|
61
|
-
elif cursor == "msg_batch_1":
|
|
62
|
-
return "msg_batch_2", [{"id": 3, "type": "shipment"}]
|
|
63
|
-
elif cursor == "msg_batch_2":
|
|
64
|
-
return "msg_batch_3", [{"id": 4, "type": "notification"}]
|
|
65
|
-
else:
|
|
66
|
-
return None, [] # No more messages
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
@step()
|
|
70
|
-
async def handle_message(message: dict) -> dict:
|
|
71
|
-
"""Handle a single message."""
|
|
72
|
-
await asyncio.sleep(0.05)
|
|
73
|
-
return {"message_id": message["id"], "handled": True}
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
# --- Batch Processing Workflow ---
|
|
77
|
-
@workflow(durable=True, tags=["celery", "durable"])
|
|
78
|
-
async def batch_processor(offset: int = 0, batch_size: int = 10) -> str:
|
|
79
|
-
"""
|
|
80
|
-
Process items in batches using continue_as_new.
|
|
81
|
-
|
|
82
|
-
Each batch runs as a separate workflow execution with fresh
|
|
83
|
-
event history, preventing unbounded history growth.
|
|
84
|
-
|
|
85
|
-
This pattern is ideal for:
|
|
86
|
-
- ETL pipelines processing millions of records
|
|
87
|
-
- Data migration jobs
|
|
88
|
-
- Bulk update operations
|
|
89
|
-
"""
|
|
90
|
-
print(f"\n [Batch] Starting at offset {offset}")
|
|
91
|
-
|
|
92
|
-
items = await fetch_batch(offset, batch_size)
|
|
93
|
-
|
|
94
|
-
if not items:
|
|
95
|
-
return f"Batch processing complete! Total items: {offset}"
|
|
96
|
-
|
|
97
|
-
# Process items
|
|
98
|
-
for item in items:
|
|
99
|
-
await process_item(item)
|
|
100
|
-
|
|
101
|
-
print(f" [Batch] Processed {len(items)} items")
|
|
102
|
-
|
|
103
|
-
# Continue with next batch
|
|
104
|
-
continue_as_new(offset=offset + batch_size, batch_size=batch_size)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
# --- Message Consumer Workflow ---
|
|
108
|
-
@workflow(durable=True, tags=["celery", "durable"])
|
|
109
|
-
async def message_consumer(cursor: str | None = None, messages_processed: int = 0) -> str:
|
|
110
|
-
"""
|
|
111
|
-
Consume messages from a queue, continuing as new after each batch.
|
|
112
|
-
|
|
113
|
-
This pattern is useful for:
|
|
114
|
-
- Queue consumers that run indefinitely
|
|
115
|
-
- Event stream processors
|
|
116
|
-
- Real-time data ingestion
|
|
117
|
-
"""
|
|
118
|
-
print(f"\n [Consumer] Polling with cursor: {cursor}")
|
|
119
|
-
|
|
120
|
-
# Poll for messages
|
|
121
|
-
new_cursor, messages = await poll_for_messages(cursor)
|
|
122
|
-
|
|
123
|
-
if not messages and new_cursor is None:
|
|
124
|
-
return f"Consumer complete! Processed {messages_processed} messages"
|
|
125
|
-
|
|
126
|
-
# Handle each message
|
|
127
|
-
count = 0
|
|
128
|
-
for message in messages:
|
|
129
|
-
await handle_message(message)
|
|
130
|
-
count += 1
|
|
131
|
-
|
|
132
|
-
total = messages_processed + count
|
|
133
|
-
print(f" [Consumer] Handled {count} messages (total: {total})")
|
|
134
|
-
|
|
135
|
-
# Continue with new cursor
|
|
136
|
-
continue_as_new(cursor=new_cursor, messages_processed=total)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
# --- Recurring Task Workflow ---
|
|
140
|
-
@workflow(durable=True, tags=["celery", "durable"])
|
|
141
|
-
async def recurring_report(iteration: int = 1, max_iterations: int = 3) -> str:
|
|
142
|
-
"""
|
|
143
|
-
Generate reports on a schedule, continuing as new for each iteration.
|
|
144
|
-
|
|
145
|
-
This demonstrates a pattern for:
|
|
146
|
-
- Daily/weekly reports
|
|
147
|
-
- Scheduled cleanup tasks
|
|
148
|
-
- Periodic sync operations
|
|
149
|
-
|
|
150
|
-
In production, you might add sleep() between iterations.
|
|
151
|
-
"""
|
|
152
|
-
print(f"\n [Report] Generating report #{iteration}")
|
|
153
|
-
|
|
154
|
-
# Simulate report generation
|
|
155
|
-
await asyncio.sleep(0.1)
|
|
156
|
-
print(f" [Report] Report #{iteration} complete")
|
|
157
|
-
|
|
158
|
-
if iteration >= max_iterations:
|
|
159
|
-
return f"All {max_iterations} reports generated!"
|
|
160
|
-
|
|
161
|
-
# Continue with next iteration
|
|
162
|
-
continue_as_new(iteration=iteration + 1, max_iterations=max_iterations)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
async def run_examples():
|
|
166
|
-
"""Run all continue-as-new examples."""
|
|
167
|
-
print("\n=== Continue-As-New Examples (Celery) ===\n")
|
|
168
|
-
|
|
169
|
-
# Example 1: Batch Processing
|
|
170
|
-
print("--- Example 1: Batch Processing ---")
|
|
171
|
-
print("Processing 50 items in batches of 10...")
|
|
172
|
-
|
|
173
|
-
run_id = await start(batch_processor, offset=0, batch_size=10)
|
|
174
|
-
print(f"Started workflow: {run_id}")
|
|
175
|
-
|
|
176
|
-
# Wait for completion (in production, use webhooks or polling)
|
|
177
|
-
print("Waiting for completion...")
|
|
178
|
-
await asyncio.sleep(5)
|
|
179
|
-
|
|
180
|
-
# Check the chain
|
|
181
|
-
from pyworkflow import get_storage
|
|
182
|
-
|
|
183
|
-
storage = get_storage()
|
|
184
|
-
chain = await get_workflow_chain(run_id, storage=storage)
|
|
185
|
-
print(f"\nWorkflow chain has {len(chain)} runs:")
|
|
186
|
-
for i, run in enumerate(chain):
|
|
187
|
-
marker = " <- started here" if run.run_id == run_id else ""
|
|
188
|
-
print(f" {i + 1}. {run.run_id[:20]}... [{run.status.value}]{marker}")
|
|
189
|
-
|
|
190
|
-
# Example 2: Message Consumer
|
|
191
|
-
print("\n--- Example 2: Message Consumer ---")
|
|
192
|
-
print("Consuming messages until queue is empty...")
|
|
193
|
-
|
|
194
|
-
run_id2 = await start(message_consumer)
|
|
195
|
-
print(f"Started workflow: {run_id2}")
|
|
196
|
-
|
|
197
|
-
await asyncio.sleep(3)
|
|
198
|
-
|
|
199
|
-
chain2 = await get_workflow_chain(run_id2, storage=storage)
|
|
200
|
-
print(f"\nConsumer chain has {len(chain2)} runs")
|
|
201
|
-
if chain2:
|
|
202
|
-
final = chain2[-1]
|
|
203
|
-
if final.result:
|
|
204
|
-
print(f"Final result: {final.result}")
|
|
205
|
-
|
|
206
|
-
# Example 3: Recurring Task
|
|
207
|
-
print("\n--- Example 3: Recurring Report ---")
|
|
208
|
-
print("Running 3 report iterations...")
|
|
209
|
-
|
|
210
|
-
run_id3 = await start(recurring_report)
|
|
211
|
-
print(f"Started workflow: {run_id3}")
|
|
212
|
-
|
|
213
|
-
await asyncio.sleep(2)
|
|
214
|
-
|
|
215
|
-
chain3 = await get_workflow_chain(run_id3, storage=storage)
|
|
216
|
-
print(f"\nReport chain has {len(chain3)} runs")
|
|
217
|
-
|
|
218
|
-
# Summary
|
|
219
|
-
print("\n=== Summary ===")
|
|
220
|
-
print(f" Batch processor: {len(chain)} workflow executions")
|
|
221
|
-
print(f" Message consumer: {len(chain2)} workflow executions")
|
|
222
|
-
print(f" Recurring report: {len(chain3)} workflow executions")
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
def main():
|
|
226
|
-
"""Configure and run examples."""
|
|
227
|
-
print("Configuring PyWorkflow with Celery runtime...")
|
|
228
|
-
|
|
229
|
-
# Reset any existing config
|
|
230
|
-
reset_config()
|
|
231
|
-
|
|
232
|
-
# Configure storage
|
|
233
|
-
storage = FileStorageBackend(base_path=".workflow_data")
|
|
234
|
-
|
|
235
|
-
# Configure pyworkflow
|
|
236
|
-
configure(
|
|
237
|
-
storage=storage,
|
|
238
|
-
default_runtime="celery",
|
|
239
|
-
default_durable=True,
|
|
240
|
-
)
|
|
241
|
-
|
|
242
|
-
print("Configuration complete!")
|
|
243
|
-
print("\nMake sure Celery worker is running:")
|
|
244
|
-
print(
|
|
245
|
-
" celery -A pyworkflow.celery.tasks worker -Q pyworkflow.workflows,pyworkflow.steps,pyworkflow.schedules -l info\n"
|
|
246
|
-
)
|
|
247
|
-
|
|
248
|
-
# Run examples
|
|
249
|
-
asyncio.run(run_examples())
|
|
250
|
-
|
|
251
|
-
print("\n=== Key Takeaways ===")
|
|
252
|
-
print(" - continue_as_new() works across distributed Celery workers")
|
|
253
|
-
print(" - Each continuation is a new Celery task execution")
|
|
254
|
-
print(" - Event history is reset, preventing unbounded growth")
|
|
255
|
-
print(" - Chains can be tracked with get_workflow_chain()")
|
|
256
|
-
print(" - Useful for long-running polling, batch processing, recurring tasks")
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
if __name__ == "__main__":
|
|
260
|
-
main()
|
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Celery Durable Workflow - Fault Tolerance Example
|
|
3
|
-
|
|
4
|
-
This example demonstrates automatic recovery from worker failures for durable workflows.
|
|
5
|
-
|
|
6
|
-
Key features:
|
|
7
|
-
- Automatic recovery when workers crash mid-execution
|
|
8
|
-
- Event replay continues from the last completed step
|
|
9
|
-
- Configurable recovery attempts limit
|
|
10
|
-
- WORKFLOW_INTERRUPTED events recorded for auditing
|
|
11
|
-
|
|
12
|
-
When a worker crashes:
|
|
13
|
-
1. The task is automatically requeued (task_reject_on_worker_lost=True)
|
|
14
|
-
2. Another worker detects the RUNNING workflow and initiates recovery
|
|
15
|
-
3. WORKFLOW_INTERRUPTED event is recorded
|
|
16
|
-
4. Events are replayed to restore state
|
|
17
|
-
5. Workflow continues from the last checkpoint
|
|
18
|
-
|
|
19
|
-
Prerequisites:
|
|
20
|
-
1. Start Redis: docker run -d -p 6379:6379 redis:7-alpine
|
|
21
|
-
2. Start worker: pyworkflow --module examples.celery.durable.06_fault_tolerance worker run
|
|
22
|
-
|
|
23
|
-
Run with CLI:
|
|
24
|
-
pyworkflow --module examples.celery.durable.06_fault_tolerance workflows run data_pipeline \
|
|
25
|
-
--arg data_id=data-123
|
|
26
|
-
|
|
27
|
-
To test fault tolerance:
|
|
28
|
-
1. Start the workflow
|
|
29
|
-
2. While it's running, kill the worker (Ctrl+C)
|
|
30
|
-
3. Start a new worker
|
|
31
|
-
4. Watch the workflow recover and continue from the last completed step
|
|
32
|
-
|
|
33
|
-
Check status:
|
|
34
|
-
pyworkflow runs list
|
|
35
|
-
pyworkflow runs status <run_id>
|
|
36
|
-
pyworkflow runs logs <run_id> # Will show WORKFLOW_INTERRUPTED events
|
|
37
|
-
"""
|
|
38
|
-
|
|
39
|
-
import asyncio
|
|
40
|
-
|
|
41
|
-
from pyworkflow import sleep, step, workflow
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
@step()
|
|
45
|
-
async def fetch_data(data_id: str) -> dict:
|
|
46
|
-
"""Fetch data from external source."""
|
|
47
|
-
print(f"[Step 1] Fetching data for {data_id}...")
|
|
48
|
-
await asyncio.sleep(2) # Simulate network delay
|
|
49
|
-
return {"data_id": data_id, "records": 1000, "fetched": True}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
@step()
|
|
53
|
-
async def validate_data(data: dict) -> dict:
|
|
54
|
-
"""Validate the fetched data."""
|
|
55
|
-
print(f"[Step 2] Validating {data['records']} records...")
|
|
56
|
-
await asyncio.sleep(2) # Simulate validation time
|
|
57
|
-
return {**data, "valid_records": 950, "validated": True}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
@step()
|
|
61
|
-
async def transform_data(data: dict) -> dict:
|
|
62
|
-
"""Transform data for processing."""
|
|
63
|
-
print(f"[Step 3] Transforming {data['valid_records']} records...")
|
|
64
|
-
await asyncio.sleep(3) # Simulate CPU-intensive work
|
|
65
|
-
return {**data, "transformed_records": 950, "transformed": True}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
@step()
|
|
69
|
-
async def load_data(data: dict) -> dict:
|
|
70
|
-
"""Load transformed data into destination."""
|
|
71
|
-
print(f"[Step 4] Loading {data['transformed_records']} records...")
|
|
72
|
-
await asyncio.sleep(2) # Simulate database writes
|
|
73
|
-
return {**data, "loaded": True}
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
@step()
|
|
77
|
-
async def send_notification(data: dict) -> dict:
|
|
78
|
-
"""Send completion notification."""
|
|
79
|
-
print(f"[Step 5] Sending notification for {data['data_id']}...")
|
|
80
|
-
return {**data, "notified": True}
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
@workflow(
|
|
84
|
-
recover_on_worker_loss=True, # Enable automatic recovery (default for durable)
|
|
85
|
-
max_recovery_attempts=5, # Allow up to 5 recovery attempts
|
|
86
|
-
tags=["celery", "durable"],
|
|
87
|
-
)
|
|
88
|
-
async def data_pipeline(data_id: str) -> dict:
|
|
89
|
-
"""
|
|
90
|
-
Data processing pipeline with fault tolerance.
|
|
91
|
-
|
|
92
|
-
This workflow demonstrates automatic recovery from worker failures:
|
|
93
|
-
|
|
94
|
-
1. Fetch data from external source
|
|
95
|
-
2. Validate the data
|
|
96
|
-
3. Transform data for processing
|
|
97
|
-
4. Load into destination
|
|
98
|
-
5. Send completion notification
|
|
99
|
-
|
|
100
|
-
If a worker crashes during any step:
|
|
101
|
-
- The workflow will be automatically recovered by another worker
|
|
102
|
-
- Already completed steps will be skipped (results from event replay)
|
|
103
|
-
- Execution continues from where it left off
|
|
104
|
-
- Up to 5 recovery attempts are allowed
|
|
105
|
-
|
|
106
|
-
Test fault tolerance:
|
|
107
|
-
- Kill the worker during step 3 (transform_data) which takes longest
|
|
108
|
-
- Start a new worker and watch it recover
|
|
109
|
-
"""
|
|
110
|
-
print(f"\n{'=' * 60}")
|
|
111
|
-
print(f"Starting data pipeline for {data_id}")
|
|
112
|
-
print(f"{'=' * 60}\n")
|
|
113
|
-
|
|
114
|
-
data = await fetch_data(data_id)
|
|
115
|
-
print(f" -> Fetch complete: {data['records']} records\n")
|
|
116
|
-
|
|
117
|
-
print(" [Sleeping 10s before validation - kill worker now to test recovery!]")
|
|
118
|
-
await sleep("10s")
|
|
119
|
-
|
|
120
|
-
data = await validate_data(data)
|
|
121
|
-
print(f" -> Validation complete: {data['valid_records']} valid records\n")
|
|
122
|
-
|
|
123
|
-
print(" [Sleeping 10s before transform - kill worker now to test recovery!]")
|
|
124
|
-
await sleep("10s")
|
|
125
|
-
|
|
126
|
-
data = await transform_data(data)
|
|
127
|
-
print(f" -> Transform complete: {data['transformed_records']} records\n")
|
|
128
|
-
|
|
129
|
-
print(" [Sleeping 10s before load - kill worker now to test recovery!]")
|
|
130
|
-
await sleep("10s")
|
|
131
|
-
|
|
132
|
-
data = await load_data(data)
|
|
133
|
-
print(" -> Load complete\n")
|
|
134
|
-
|
|
135
|
-
data = await send_notification(data)
|
|
136
|
-
print(" -> Notification sent\n")
|
|
137
|
-
|
|
138
|
-
print(f"{'=' * 60}")
|
|
139
|
-
print("Pipeline completed successfully!")
|
|
140
|
-
print(f"{'=' * 60}\n")
|
|
141
|
-
|
|
142
|
-
return data
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
@workflow(
|
|
146
|
-
recover_on_worker_loss=False, # Disable recovery for this workflow
|
|
147
|
-
max_recovery_attempts=0,
|
|
148
|
-
tags=["celery", "durable"],
|
|
149
|
-
)
|
|
150
|
-
async def critical_pipeline(data_id: str) -> dict:
|
|
151
|
-
"""
|
|
152
|
-
Critical pipeline that should NOT auto-recover.
|
|
153
|
-
|
|
154
|
-
Some workflows should fail completely on worker loss rather than
|
|
155
|
-
recover, for example when:
|
|
156
|
-
- Steps have side effects that can't be safely repeated
|
|
157
|
-
- Human intervention is required after failures
|
|
158
|
-
- The workflow interacts with non-idempotent external systems
|
|
159
|
-
|
|
160
|
-
If a worker crashes during this workflow:
|
|
161
|
-
- The workflow will be marked as FAILED
|
|
162
|
-
- No automatic recovery will be attempted
|
|
163
|
-
- Manual intervention is required
|
|
164
|
-
|
|
165
|
-
Usage:
|
|
166
|
-
pyworkflow workflows run critical_pipeline --arg data_id=critical-001
|
|
167
|
-
"""
|
|
168
|
-
print(f"[Critical] Processing {data_id} - NO AUTO-RECOVERY")
|
|
169
|
-
|
|
170
|
-
data = await fetch_data(data_id)
|
|
171
|
-
data = await validate_data(data)
|
|
172
|
-
data = await transform_data(data)
|
|
173
|
-
|
|
174
|
-
print(f"[Critical] Completed {data_id}")
|
|
175
|
-
return data
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
async def main() -> None:
|
|
179
|
-
"""Run the fault tolerance example."""
|
|
180
|
-
import argparse
|
|
181
|
-
|
|
182
|
-
import pyworkflow
|
|
183
|
-
|
|
184
|
-
parser = argparse.ArgumentParser(description="Data Pipeline with Fault Tolerance")
|
|
185
|
-
parser.add_argument("--data-id", default="data-123", help="Data ID to process")
|
|
186
|
-
parser.add_argument(
|
|
187
|
-
"--critical",
|
|
188
|
-
action="store_true",
|
|
189
|
-
help="Run the critical pipeline (no auto-recovery)",
|
|
190
|
-
)
|
|
191
|
-
args = parser.parse_args()
|
|
192
|
-
|
|
193
|
-
if args.critical:
|
|
194
|
-
print(f"Starting CRITICAL pipeline for {args.data_id}...")
|
|
195
|
-
print("NOTE: This workflow will NOT auto-recover from worker failures")
|
|
196
|
-
run_id = await pyworkflow.start(critical_pipeline, args.data_id)
|
|
197
|
-
else:
|
|
198
|
-
print(f"Starting data pipeline for {args.data_id}...")
|
|
199
|
-
print("NOTE: This workflow WILL auto-recover from worker failures")
|
|
200
|
-
print(" Kill the worker during execution to test recovery")
|
|
201
|
-
run_id = await pyworkflow.start(data_pipeline, args.data_id)
|
|
202
|
-
|
|
203
|
-
print(f"\nWorkflow started with run_id: {run_id}")
|
|
204
|
-
print("\nMonitor with:")
|
|
205
|
-
print(f" pyworkflow runs status {run_id}")
|
|
206
|
-
print(f" pyworkflow runs logs {run_id}")
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
if __name__ == "__main__":
|
|
210
|
-
asyncio.run(main())
|