pyworkflow-engine 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +10 -1
- pyworkflow/celery/tasks.py +272 -24
- pyworkflow/cli/__init__.py +4 -1
- pyworkflow/cli/commands/runs.py +4 -4
- pyworkflow/cli/commands/setup.py +203 -4
- pyworkflow/cli/utils/config_generator.py +76 -3
- pyworkflow/cli/utils/docker_manager.py +232 -0
- pyworkflow/context/__init__.py +13 -0
- pyworkflow/context/base.py +26 -0
- pyworkflow/context/local.py +80 -0
- pyworkflow/context/step_context.py +295 -0
- pyworkflow/core/registry.py +6 -1
- pyworkflow/core/step.py +141 -0
- pyworkflow/core/workflow.py +56 -0
- pyworkflow/engine/events.py +30 -0
- pyworkflow/engine/replay.py +39 -0
- pyworkflow/primitives/child_workflow.py +1 -1
- pyworkflow/runtime/local.py +1 -1
- pyworkflow/storage/__init__.py +14 -0
- pyworkflow/storage/base.py +35 -0
- pyworkflow/storage/cassandra.py +1747 -0
- pyworkflow/storage/config.py +69 -0
- pyworkflow/storage/dynamodb.py +31 -2
- pyworkflow/storage/file.py +28 -0
- pyworkflow/storage/memory.py +18 -0
- pyworkflow/storage/mysql.py +1159 -0
- pyworkflow/storage/postgres.py +27 -2
- pyworkflow/storage/schemas.py +4 -3
- pyworkflow/storage/sqlite.py +25 -2
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/METADATA +7 -4
- pyworkflow_engine-0.1.9.dist-info/RECORD +91 -0
- pyworkflow_engine-0.1.9.dist-info/top_level.txt +1 -0
- dashboard/backend/app/__init__.py +0 -1
- dashboard/backend/app/config.py +0 -32
- dashboard/backend/app/controllers/__init__.py +0 -6
- dashboard/backend/app/controllers/run_controller.py +0 -86
- dashboard/backend/app/controllers/workflow_controller.py +0 -33
- dashboard/backend/app/dependencies/__init__.py +0 -5
- dashboard/backend/app/dependencies/storage.py +0 -50
- dashboard/backend/app/repositories/__init__.py +0 -6
- dashboard/backend/app/repositories/run_repository.py +0 -80
- dashboard/backend/app/repositories/workflow_repository.py +0 -27
- dashboard/backend/app/rest/__init__.py +0 -8
- dashboard/backend/app/rest/v1/__init__.py +0 -12
- dashboard/backend/app/rest/v1/health.py +0 -33
- dashboard/backend/app/rest/v1/runs.py +0 -133
- dashboard/backend/app/rest/v1/workflows.py +0 -41
- dashboard/backend/app/schemas/__init__.py +0 -23
- dashboard/backend/app/schemas/common.py +0 -16
- dashboard/backend/app/schemas/event.py +0 -24
- dashboard/backend/app/schemas/hook.py +0 -25
- dashboard/backend/app/schemas/run.py +0 -54
- dashboard/backend/app/schemas/step.py +0 -28
- dashboard/backend/app/schemas/workflow.py +0 -31
- dashboard/backend/app/server.py +0 -87
- dashboard/backend/app/services/__init__.py +0 -6
- dashboard/backend/app/services/run_service.py +0 -240
- dashboard/backend/app/services/workflow_service.py +0 -155
- dashboard/backend/main.py +0 -18
- docs/concepts/cancellation.mdx +0 -362
- docs/concepts/continue-as-new.mdx +0 -434
- docs/concepts/events.mdx +0 -266
- docs/concepts/fault-tolerance.mdx +0 -370
- docs/concepts/hooks.mdx +0 -552
- docs/concepts/limitations.mdx +0 -167
- docs/concepts/schedules.mdx +0 -775
- docs/concepts/sleep.mdx +0 -312
- docs/concepts/steps.mdx +0 -301
- docs/concepts/workflows.mdx +0 -255
- docs/guides/cli.mdx +0 -942
- docs/guides/configuration.mdx +0 -560
- docs/introduction.mdx +0 -155
- docs/quickstart.mdx +0 -279
- examples/__init__.py +0 -1
- examples/celery/__init__.py +0 -1
- examples/celery/durable/docker-compose.yml +0 -55
- examples/celery/durable/pyworkflow.config.yaml +0 -12
- examples/celery/durable/workflows/__init__.py +0 -122
- examples/celery/durable/workflows/basic.py +0 -87
- examples/celery/durable/workflows/batch_processing.py +0 -102
- examples/celery/durable/workflows/cancellation.py +0 -273
- examples/celery/durable/workflows/child_workflow_patterns.py +0 -240
- examples/celery/durable/workflows/child_workflows.py +0 -202
- examples/celery/durable/workflows/continue_as_new.py +0 -260
- examples/celery/durable/workflows/fault_tolerance.py +0 -210
- examples/celery/durable/workflows/hooks.py +0 -211
- examples/celery/durable/workflows/idempotency.py +0 -112
- examples/celery/durable/workflows/long_running.py +0 -99
- examples/celery/durable/workflows/retries.py +0 -101
- examples/celery/durable/workflows/schedules.py +0 -209
- examples/celery/transient/01_basic_workflow.py +0 -91
- examples/celery/transient/02_fault_tolerance.py +0 -257
- examples/celery/transient/__init__.py +0 -20
- examples/celery/transient/pyworkflow.config.yaml +0 -25
- examples/local/__init__.py +0 -1
- examples/local/durable/01_basic_workflow.py +0 -94
- examples/local/durable/02_file_storage.py +0 -132
- examples/local/durable/03_retries.py +0 -169
- examples/local/durable/04_long_running.py +0 -119
- examples/local/durable/05_event_log.py +0 -145
- examples/local/durable/06_idempotency.py +0 -148
- examples/local/durable/07_hooks.py +0 -334
- examples/local/durable/08_cancellation.py +0 -233
- examples/local/durable/09_child_workflows.py +0 -198
- examples/local/durable/10_child_workflow_patterns.py +0 -265
- examples/local/durable/11_continue_as_new.py +0 -249
- examples/local/durable/12_schedules.py +0 -198
- examples/local/durable/__init__.py +0 -1
- examples/local/transient/01_quick_tasks.py +0 -87
- examples/local/transient/02_retries.py +0 -130
- examples/local/transient/03_sleep.py +0 -141
- examples/local/transient/__init__.py +0 -1
- pyworkflow_engine-0.1.7.dist-info/RECORD +0 -196
- pyworkflow_engine-0.1.7.dist-info/top_level.txt +0 -5
- tests/examples/__init__.py +0 -0
- tests/integration/__init__.py +0 -0
- tests/integration/test_cancellation.py +0 -330
- tests/integration/test_child_workflows.py +0 -439
- tests/integration/test_continue_as_new.py +0 -428
- tests/integration/test_dynamodb_storage.py +0 -1146
- tests/integration/test_fault_tolerance.py +0 -369
- tests/integration/test_schedule_storage.py +0 -484
- tests/unit/__init__.py +0 -0
- tests/unit/backends/__init__.py +0 -1
- tests/unit/backends/test_dynamodb_storage.py +0 -1554
- tests/unit/backends/test_postgres_storage.py +0 -1281
- tests/unit/backends/test_sqlite_storage.py +0 -1460
- tests/unit/conftest.py +0 -41
- tests/unit/test_cancellation.py +0 -364
- tests/unit/test_child_workflows.py +0 -680
- tests/unit/test_continue_as_new.py +0 -441
- tests/unit/test_event_limits.py +0 -316
- tests/unit/test_executor.py +0 -320
- tests/unit/test_fault_tolerance.py +0 -334
- tests/unit/test_hooks.py +0 -495
- tests/unit/test_registry.py +0 -261
- tests/unit/test_replay.py +0 -420
- tests/unit/test_schedule_schemas.py +0 -285
- tests/unit/test_schedule_utils.py +0 -286
- tests/unit/test_scheduled_workflow.py +0 -274
- tests/unit/test_step.py +0 -353
- tests/unit/test_workflow.py +0 -243
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,434 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
title: 'Continue-As-New'
|
|
3
|
-
description: 'Reset event history by continuing workflows as new executions'
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
## Overview
|
|
7
|
-
|
|
8
|
-
Long-running workflows can accumulate large event histories that impact performance. `continue_as_new()` solves this by completing the current workflow and immediately starting a fresh execution with clean event history.
|
|
9
|
-
|
|
10
|
-
<CardGroup cols={2}>
|
|
11
|
-
<Card title="Fresh Event History" icon="rotate">
|
|
12
|
-
Each continuation starts with a clean event log.
|
|
13
|
-
</Card>
|
|
14
|
-
<Card title="Chain Tracking" icon="link">
|
|
15
|
-
Workflow runs are linked via `continued_from_run_id` and `continued_to_run_id`.
|
|
16
|
-
</Card>
|
|
17
|
-
<Card title="State Preservation" icon="database">
|
|
18
|
-
Pass state to the new execution via arguments.
|
|
19
|
-
</Card>
|
|
20
|
-
<Card title="Unlimited Duration" icon="infinity">
|
|
21
|
-
Run workflows indefinitely without unbounded history growth.
|
|
22
|
-
</Card>
|
|
23
|
-
</CardGroup>
|
|
24
|
-
|
|
25
|
-
## When to Use Continue-As-New
|
|
26
|
-
|
|
27
|
-
`continue_as_new()` is ideal for:
|
|
28
|
-
|
|
29
|
-
| Use Case | Description |
|
|
30
|
-
|----------|-------------|
|
|
31
|
-
| **Polling Workflows** | Continuously poll for updates without accumulating events |
|
|
32
|
-
| **Batch Processing** | Process large datasets in chunks, resetting history between batches |
|
|
33
|
-
| **Recurring Tasks** | Daily/weekly reports or scheduled jobs that run indefinitely |
|
|
34
|
-
| **Queue Consumers** | Process messages from a queue without history growth |
|
|
35
|
-
| **Long-Running Sync** | Sync data between systems over extended periods |
|
|
36
|
-
|
|
37
|
-
## Basic Usage
|
|
38
|
-
|
|
39
|
-
Call `continue_as_new()` with the arguments for the new execution:
|
|
40
|
-
|
|
41
|
-
```python
|
|
42
|
-
from pyworkflow import workflow, step, continue_as_new
|
|
43
|
-
|
|
44
|
-
@step()
|
|
45
|
-
async def fetch_batch(offset: int, batch_size: int) -> list:
|
|
46
|
-
"""Fetch a batch of items to process."""
|
|
47
|
-
items = await db.query(offset=offset, limit=batch_size)
|
|
48
|
-
return items
|
|
49
|
-
|
|
50
|
-
@step()
|
|
51
|
-
async def process_item(item: dict) -> dict:
|
|
52
|
-
"""Process a single item."""
|
|
53
|
-
return await transform(item)
|
|
54
|
-
|
|
55
|
-
@workflow()
|
|
56
|
-
async def batch_processor(offset: int = 0, batch_size: int = 100) -> str:
|
|
57
|
-
"""Process items in batches with fresh event history."""
|
|
58
|
-
items = await fetch_batch(offset, batch_size)
|
|
59
|
-
|
|
60
|
-
if not items:
|
|
61
|
-
# No more items - workflow complete
|
|
62
|
-
return f"Processed {offset} total items"
|
|
63
|
-
|
|
64
|
-
# Process this batch
|
|
65
|
-
for item in items:
|
|
66
|
-
await process_item(item)
|
|
67
|
-
|
|
68
|
-
# Continue with next batch (fresh event history)
|
|
69
|
-
continue_as_new(offset=offset + batch_size, batch_size=batch_size)
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
<Note>
|
|
73
|
-
`continue_as_new()` never returns - it raises an internal signal that the executor catches. Any code after it will not execute.
|
|
74
|
-
</Note>
|
|
75
|
-
|
|
76
|
-
## How It Works
|
|
77
|
-
|
|
78
|
-
When `continue_as_new()` is called:
|
|
79
|
-
|
|
80
|
-
```
|
|
81
|
-
┌─────────────────────────────────────────────────────────────────────┐
|
|
82
|
-
│ Continue-As-New Flow │
|
|
83
|
-
│ │
|
|
84
|
-
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐│
|
|
85
|
-
│ │ Run #1 │────────▶│ Run #2 │────────▶│ Run #3 ││
|
|
86
|
-
│ │ offset=0 │ │ offset=100 │ │ offset=200 ││
|
|
87
|
-
│ │ ─────── │ │ ─────── │ │ ─────── ││
|
|
88
|
-
│ │ 10 events │ │ 10 events │ │ 5 events ││
|
|
89
|
-
│ │ CONTINUED │ │ CONTINUED │ │ COMPLETED ││
|
|
90
|
-
│ └──────────────┘ └──────────────┘ └──────────────┘│
|
|
91
|
-
│ │ │ │ │
|
|
92
|
-
│ └────────────────────────┴────────────────────────┘ │
|
|
93
|
-
│ Workflow Chain │
|
|
94
|
-
└─────────────────────────────────────────────────────────────────────┘
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
1. Current run is marked as `CONTINUED_AS_NEW`
|
|
98
|
-
2. A `WORKFLOW_CONTINUED_AS_NEW` event is recorded
|
|
99
|
-
3. A new run is created with `continued_from_run_id` set
|
|
100
|
-
4. The new run starts executing with the provided arguments
|
|
101
|
-
5. New run has fresh, empty event history
|
|
102
|
-
|
|
103
|
-
## Patterns
|
|
104
|
-
|
|
105
|
-
### Polling Workflow
|
|
106
|
-
|
|
107
|
-
```python
|
|
108
|
-
@workflow()
|
|
109
|
-
async def polling_workflow(cursor: str | None = None, poll_count: int = 0):
|
|
110
|
-
"""Poll for updates indefinitely."""
|
|
111
|
-
# Check for new data
|
|
112
|
-
new_cursor, updates = await check_for_updates(cursor)
|
|
113
|
-
|
|
114
|
-
if updates:
|
|
115
|
-
for update in updates:
|
|
116
|
-
await process_update(update)
|
|
117
|
-
|
|
118
|
-
if new_cursor is None:
|
|
119
|
-
return f"Polling complete after {poll_count + 1} polls"
|
|
120
|
-
|
|
121
|
-
# Continue polling with new cursor
|
|
122
|
-
continue_as_new(cursor=new_cursor, poll_count=poll_count + 1)
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
### Recurring Task with Sleep
|
|
126
|
-
|
|
127
|
-
```python
|
|
128
|
-
@workflow()
|
|
129
|
-
async def daily_report(day: int = 1):
|
|
130
|
-
"""Generate daily reports indefinitely."""
|
|
131
|
-
await generate_report(day)
|
|
132
|
-
|
|
133
|
-
# Wait until next day
|
|
134
|
-
await sleep("24h")
|
|
135
|
-
|
|
136
|
-
# Continue with next day (fresh history)
|
|
137
|
-
continue_as_new(day=day + 1)
|
|
138
|
-
```
|
|
139
|
-
|
|
140
|
-
### Bounded Iterations
|
|
141
|
-
|
|
142
|
-
```python
|
|
143
|
-
@workflow()
|
|
144
|
-
async def bounded_workflow(iteration: int = 1, max_iterations: int = 10):
|
|
145
|
-
"""Run for a fixed number of iterations."""
|
|
146
|
-
await do_work(iteration)
|
|
147
|
-
|
|
148
|
-
if iteration >= max_iterations:
|
|
149
|
-
return f"Completed {max_iterations} iterations"
|
|
150
|
-
|
|
151
|
-
continue_as_new(iteration=iteration + 1, max_iterations=max_iterations)
|
|
152
|
-
```
|
|
153
|
-
|
|
154
|
-
## Tracking Workflow Chains
|
|
155
|
-
|
|
156
|
-
Use `get_workflow_chain()` to retrieve all runs in a continuation chain:
|
|
157
|
-
|
|
158
|
-
<Tabs>
|
|
159
|
-
<Tab title="Python API">
|
|
160
|
-
```python
|
|
161
|
-
from pyworkflow import get_workflow_chain
|
|
162
|
-
|
|
163
|
-
# Get the full chain from any run in it
|
|
164
|
-
chain = await get_workflow_chain("run_abc123")
|
|
165
|
-
|
|
166
|
-
for run in chain:
|
|
167
|
-
print(f"{run.run_id}: {run.status.value}")
|
|
168
|
-
print(f" From: {run.continued_from_run_id}")
|
|
169
|
-
print(f" To: {run.continued_to_run_id}")
|
|
170
|
-
```
|
|
171
|
-
</Tab>
|
|
172
|
-
<Tab title="CLI">
|
|
173
|
-
```bash
|
|
174
|
-
# View the continuation chain
|
|
175
|
-
pyworkflow runs chain run_abc123
|
|
176
|
-
|
|
177
|
-
# Output:
|
|
178
|
-
# Continue-As-New Chain
|
|
179
|
-
# ────────────────────────────────────────────────────────
|
|
180
|
-
# Chain length: 3 run(s)
|
|
181
|
-
#
|
|
182
|
-
# START
|
|
183
|
-
# Run ID: run_abc123def456
|
|
184
|
-
# Workflow: batch_processor
|
|
185
|
-
# Status: continued_as_new
|
|
186
|
-
# Duration: 2.3s
|
|
187
|
-
#
|
|
188
|
-
# ↓ continued as new
|
|
189
|
-
#
|
|
190
|
-
# #2
|
|
191
|
-
# Run ID: run_789xyz123abc
|
|
192
|
-
# Workflow: batch_processor
|
|
193
|
-
# Status: continued_as_new
|
|
194
|
-
# Duration: 1.8s
|
|
195
|
-
#
|
|
196
|
-
# ↓ continued as new
|
|
197
|
-
#
|
|
198
|
-
# CURRENT
|
|
199
|
-
# Run ID: run_456def789xyz
|
|
200
|
-
# Workflow: batch_processor
|
|
201
|
-
# Status: completed
|
|
202
|
-
# Duration: 0.5s
|
|
203
|
-
```
|
|
204
|
-
</Tab>
|
|
205
|
-
</Tabs>
|
|
206
|
-
|
|
207
|
-
## Workflow Run Schema
|
|
208
|
-
|
|
209
|
-
The `WorkflowRun` schema includes continuation tracking fields:
|
|
210
|
-
|
|
211
|
-
| Field | Type | Description |
|
|
212
|
-
|-------|------|-------------|
|
|
213
|
-
| `continued_from_run_id` | `str \| None` | Run ID this execution continued from |
|
|
214
|
-
| `continued_to_run_id` | `str \| None` | Run ID this execution continued to |
|
|
215
|
-
|
|
216
|
-
## Important Behaviors
|
|
217
|
-
|
|
218
|
-
### Arguments Are Required
|
|
219
|
-
|
|
220
|
-
`continue_as_new()` requires at least one argument:
|
|
221
|
-
|
|
222
|
-
```python
|
|
223
|
-
# Valid - explicit arguments
|
|
224
|
-
continue_as_new(offset=100)
|
|
225
|
-
continue_as_new(cursor="abc123", count=5)
|
|
226
|
-
|
|
227
|
-
# Invalid - will raise ValueError
|
|
228
|
-
continue_as_new() # No arguments provided
|
|
229
|
-
```
|
|
230
|
-
|
|
231
|
-
<Warning>
|
|
232
|
-
Unlike some workflow systems, PyWorkflow does not automatically use the original arguments. You must explicitly pass all arguments needed for the next execution.
|
|
233
|
-
</Warning>
|
|
234
|
-
|
|
235
|
-
### Child Workflows Are Cancelled
|
|
236
|
-
|
|
237
|
-
When a parent workflow continues as new, all running child workflows are cancelled:
|
|
238
|
-
|
|
239
|
-
```python
|
|
240
|
-
@workflow()
|
|
241
|
-
async def parent_workflow(iteration: int = 1):
|
|
242
|
-
# Start child workflow
|
|
243
|
-
handle = await start_child_workflow(child_workflow, "data")
|
|
244
|
-
|
|
245
|
-
# When we continue as new, the child is cancelled
|
|
246
|
-
continue_as_new(iteration=iteration + 1)
|
|
247
|
-
```
|
|
248
|
-
|
|
249
|
-
### Cancellation Takes Precedence
|
|
250
|
-
|
|
251
|
-
If a workflow is cancelled, `continue_as_new()` will raise `CancellationError` instead:
|
|
252
|
-
|
|
253
|
-
```python
|
|
254
|
-
@workflow()
|
|
255
|
-
async def my_workflow(count: int):
|
|
256
|
-
# If cancellation was requested, this raises CancellationError
|
|
257
|
-
# not ContinueAsNewSignal
|
|
258
|
-
continue_as_new(count=count + 1)
|
|
259
|
-
```
|
|
260
|
-
|
|
261
|
-
### Status is Terminal
|
|
262
|
-
|
|
263
|
-
`CONTINUED_AS_NEW` is a terminal status like `COMPLETED` or `FAILED`:
|
|
264
|
-
|
|
265
|
-
```python
|
|
266
|
-
# Cannot cancel a workflow that already continued
|
|
267
|
-
result = await cancel_workflow("run_that_continued")
|
|
268
|
-
# Returns False
|
|
269
|
-
```
|
|
270
|
-
|
|
271
|
-
## Events
|
|
272
|
-
|
|
273
|
-
The continuation is recorded as a `WORKFLOW_CONTINUED_AS_NEW` event:
|
|
274
|
-
|
|
275
|
-
```json
|
|
276
|
-
{
|
|
277
|
-
"type": "workflow.continued_as_new",
|
|
278
|
-
"run_id": "run_abc123",
|
|
279
|
-
"timestamp": "2025-01-15T10:30:00Z",
|
|
280
|
-
"data": {
|
|
281
|
-
"new_run_id": "run_def456",
|
|
282
|
-
"args": "[100]",
|
|
283
|
-
"kwargs": "{\"batch_size\": 50}",
|
|
284
|
-
"reason": null
|
|
285
|
-
}
|
|
286
|
-
}
|
|
287
|
-
```
|
|
288
|
-
|
|
289
|
-
View continuation events with the CLI:
|
|
290
|
-
|
|
291
|
-
```bash
|
|
292
|
-
pyworkflow runs logs run_abc123 --filter continued
|
|
293
|
-
```
|
|
294
|
-
|
|
295
|
-
## Best Practices
|
|
296
|
-
|
|
297
|
-
<AccordionGroup>
|
|
298
|
-
<Accordion title="Use for unbounded workflows">
|
|
299
|
-
Any workflow that could run indefinitely (polling, queues, recurring tasks) should use `continue_as_new()` to prevent unbounded event history growth.
|
|
300
|
-
|
|
301
|
-
```python
|
|
302
|
-
@workflow()
|
|
303
|
-
async def message_consumer():
|
|
304
|
-
while True: # Don't do this!
|
|
305
|
-
msg = await get_next_message()
|
|
306
|
-
await process_message(msg)
|
|
307
|
-
|
|
308
|
-
# Better - use continue_as_new
|
|
309
|
-
@workflow()
|
|
310
|
-
async def message_consumer(messages_processed: int = 0):
|
|
311
|
-
msg = await get_next_message()
|
|
312
|
-
if msg:
|
|
313
|
-
await process_message(msg)
|
|
314
|
-
continue_as_new(messages_processed=messages_processed + 1)
|
|
315
|
-
return f"Processed {messages_processed} messages"
|
|
316
|
-
```
|
|
317
|
-
</Accordion>
|
|
318
|
-
|
|
319
|
-
<Accordion title="Pass minimal state">
|
|
320
|
-
Only pass the state needed for the next execution. Large payloads increase storage and serialization costs.
|
|
321
|
-
|
|
322
|
-
```python
|
|
323
|
-
# Good - minimal state
|
|
324
|
-
continue_as_new(cursor="abc123", processed_count=1000)
|
|
325
|
-
|
|
326
|
-
# Bad - passing large data
|
|
327
|
-
continue_as_new(all_results=huge_list_of_results)
|
|
328
|
-
```
|
|
329
|
-
</Accordion>
|
|
330
|
-
|
|
331
|
-
<Accordion title="Include progress tracking">
|
|
332
|
-
Include counters or timestamps to track overall progress across the chain:
|
|
333
|
-
|
|
334
|
-
```python
|
|
335
|
-
@workflow()
|
|
336
|
-
async def sync_workflow(
|
|
337
|
-
cursor: str | None = None,
|
|
338
|
-
total_synced: int = 0,
|
|
339
|
-
started_at: str | None = None
|
|
340
|
-
):
|
|
341
|
-
if started_at is None:
|
|
342
|
-
started_at = datetime.now().isoformat()
|
|
343
|
-
|
|
344
|
-
items, new_cursor = await fetch_items(cursor)
|
|
345
|
-
await sync_items(items)
|
|
346
|
-
|
|
347
|
-
if new_cursor:
|
|
348
|
-
continue_as_new(
|
|
349
|
-
cursor=new_cursor,
|
|
350
|
-
total_synced=total_synced + len(items),
|
|
351
|
-
started_at=started_at
|
|
352
|
-
)
|
|
353
|
-
|
|
354
|
-
return {
|
|
355
|
-
"total_synced": total_synced + len(items),
|
|
356
|
-
"started_at": started_at,
|
|
357
|
-
"completed_at": datetime.now().isoformat()
|
|
358
|
-
}
|
|
359
|
-
```
|
|
360
|
-
</Accordion>
|
|
361
|
-
|
|
362
|
-
<Accordion title="Handle the final iteration">
|
|
363
|
-
Always have a termination condition that returns normally:
|
|
364
|
-
|
|
365
|
-
```python
|
|
366
|
-
@workflow()
|
|
367
|
-
async def batch_workflow(offset: int = 0):
|
|
368
|
-
items = await fetch_items(offset)
|
|
369
|
-
|
|
370
|
-
if not items:
|
|
371
|
-
# Terminal condition - return result
|
|
372
|
-
return {"processed": offset, "status": "complete"}
|
|
373
|
-
|
|
374
|
-
await process_items(items)
|
|
375
|
-
continue_as_new(offset=offset + len(items))
|
|
376
|
-
```
|
|
377
|
-
</Accordion>
|
|
378
|
-
</AccordionGroup>
|
|
379
|
-
|
|
380
|
-
## API Reference
|
|
381
|
-
|
|
382
|
-
### `continue_as_new()`
|
|
383
|
-
|
|
384
|
-
```python
|
|
385
|
-
def continue_as_new(*args: Any, **kwargs: Any) -> NoReturn
|
|
386
|
-
```
|
|
387
|
-
|
|
388
|
-
Complete the current workflow and start a new execution with fresh event history.
|
|
389
|
-
|
|
390
|
-
| Parameter | Type | Description |
|
|
391
|
-
|-----------|------|-------------|
|
|
392
|
-
| `*args` | `Any` | Positional arguments for the new execution |
|
|
393
|
-
| `**kwargs` | `Any` | Keyword arguments for the new execution |
|
|
394
|
-
|
|
395
|
-
**Raises:**
|
|
396
|
-
- `ContinueAsNewSignal` - Internal signal caught by the executor
|
|
397
|
-
- `ValueError` - If no arguments are provided
|
|
398
|
-
- `RuntimeError` - If called outside a workflow context
|
|
399
|
-
- `CancellationError` - If workflow is being cancelled
|
|
400
|
-
|
|
401
|
-
### `get_workflow_chain()`
|
|
402
|
-
|
|
403
|
-
```python
|
|
404
|
-
async def get_workflow_chain(
|
|
405
|
-
run_id: str,
|
|
406
|
-
storage: StorageBackend | None = None,
|
|
407
|
-
) -> list[WorkflowRun]
|
|
408
|
-
```
|
|
409
|
-
|
|
410
|
-
Get all workflow runs in a continuation chain.
|
|
411
|
-
|
|
412
|
-
| Parameter | Type | Default | Description |
|
|
413
|
-
|-----------|------|---------|-------------|
|
|
414
|
-
| `run_id` | `str` | required | Any run ID in the chain |
|
|
415
|
-
| `storage` | `StorageBackend` | `None` | Storage backend (uses configured default) |
|
|
416
|
-
|
|
417
|
-
**Returns:** List of `WorkflowRun` objects ordered from first to last in the chain.
|
|
418
|
-
|
|
419
|
-
## Next Steps
|
|
420
|
-
|
|
421
|
-
<CardGroup cols={2}>
|
|
422
|
-
<Card title="Sleep" icon="clock" href="/concepts/sleep">
|
|
423
|
-
Learn about durable sleep for delays.
|
|
424
|
-
</Card>
|
|
425
|
-
<Card title="Hooks" icon="webhook" href="/concepts/hooks">
|
|
426
|
-
Wait for external events in your workflows.
|
|
427
|
-
</Card>
|
|
428
|
-
<Card title="Fault Tolerance" icon="shield-check" href="/concepts/fault-tolerance">
|
|
429
|
-
Automatic recovery from worker crashes.
|
|
430
|
-
</Card>
|
|
431
|
-
<Card title="CLI Guide" icon="terminal" href="/guides/cli">
|
|
432
|
-
Manage workflows from the command line.
|
|
433
|
-
</Card>
|
|
434
|
-
</CardGroup>
|
docs/concepts/events.mdx
DELETED
|
@@ -1,266 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
title: 'Events'
|
|
3
|
-
description: 'Event sourcing provides durability, auditability, and deterministic replay'
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
## What is Event Sourcing?
|
|
7
|
-
|
|
8
|
-
PyWorkflow uses event sourcing to achieve durable, fault-tolerant execution. Instead of storing just the current state, every state change is recorded as an immutable event in an append-only log. This enables:
|
|
9
|
-
|
|
10
|
-
- **Durability**: Workflows survive crashes and restarts
|
|
11
|
-
- **Replay**: Workflows can resume from any point
|
|
12
|
-
- **Auditability**: Complete history of everything that happened
|
|
13
|
-
|
|
14
|
-
```
|
|
15
|
-
Workflow Execution Timeline
|
|
16
|
-
────────────────────────────────────────────────────────►
|
|
17
|
-
|
|
18
|
-
Event 1 Event 2 Event 3 Event 4
|
|
19
|
-
workflow_started → step_completed → sleep_started → ...
|
|
20
|
-
|
|
21
|
-
│ │ │
|
|
22
|
-
▼ ▼ ▼
|
|
23
|
-
┌──────────────────────────────────────────────────┐
|
|
24
|
-
│ Event Log (Append-Only) │
|
|
25
|
-
└──────────────────────────────────────────────────┘
|
|
26
|
-
```
|
|
27
|
-
|
|
28
|
-
## How It Works
|
|
29
|
-
|
|
30
|
-
### Recording Events
|
|
31
|
-
|
|
32
|
-
As your workflow executes, PyWorkflow automatically records events:
|
|
33
|
-
|
|
34
|
-
```python
|
|
35
|
-
@workflow()
|
|
36
|
-
async def order_workflow(order_id: str):
|
|
37
|
-
# Event: workflow_started
|
|
38
|
-
|
|
39
|
-
order = await validate_order(order_id)
|
|
40
|
-
# Event: step_completed (validate_order)
|
|
41
|
-
|
|
42
|
-
await sleep("1h")
|
|
43
|
-
# Event: sleep_started
|
|
44
|
-
# Workflow suspends here...
|
|
45
|
-
|
|
46
|
-
# ... 1 hour later ...
|
|
47
|
-
# Event: workflow_resumed
|
|
48
|
-
|
|
49
|
-
await send_confirmation(order)
|
|
50
|
-
# Event: step_completed (send_confirmation)
|
|
51
|
-
|
|
52
|
-
return order
|
|
53
|
-
# Event: workflow_completed
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
### Replaying Events
|
|
57
|
-
|
|
58
|
-
When a workflow resumes after suspension, PyWorkflow replays all recorded events to restore the exact state:
|
|
59
|
-
|
|
60
|
-
```python
|
|
61
|
-
# Replay process:
|
|
62
|
-
# 1. Load all events for this run_id
|
|
63
|
-
# 2. For each step_completed event, cache the result
|
|
64
|
-
# 3. Re-execute the workflow function
|
|
65
|
-
# 4. When a step is called, return cached result instead of executing
|
|
66
|
-
# 5. Continue from where we left off
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
<Note>
|
|
70
|
-
During replay, steps are not re-executed. Their cached results from the event log are returned immediately. This ensures deterministic execution.
|
|
71
|
-
</Note>
|
|
72
|
-
|
|
73
|
-
## Event Types
|
|
74
|
-
|
|
75
|
-
PyWorkflow records 16 different event types:
|
|
76
|
-
|
|
77
|
-
### Workflow Events
|
|
78
|
-
|
|
79
|
-
| Event | Description |
|
|
80
|
-
|-------|-------------|
|
|
81
|
-
| `workflow_started` | Workflow execution began |
|
|
82
|
-
| `workflow_completed` | Workflow finished successfully |
|
|
83
|
-
| `workflow_failed` | Workflow terminated with an error |
|
|
84
|
-
| `workflow_suspended` | Workflow paused (sleep, webhook) |
|
|
85
|
-
| `workflow_resumed` | Workflow continued after suspension |
|
|
86
|
-
|
|
87
|
-
### Step Events
|
|
88
|
-
|
|
89
|
-
| Event | Description |
|
|
90
|
-
|-------|-------------|
|
|
91
|
-
| `step_started` | Step execution began |
|
|
92
|
-
| `step_completed` | Step finished successfully (result cached) |
|
|
93
|
-
| `step_failed` | Step failed (may retry) |
|
|
94
|
-
| `step_retrying` | Step is being retried |
|
|
95
|
-
|
|
96
|
-
### Sleep Events
|
|
97
|
-
|
|
98
|
-
| Event | Description |
|
|
99
|
-
|-------|-------------|
|
|
100
|
-
| `sleep_started` | Sleep/delay began |
|
|
101
|
-
| `sleep_completed` | Sleep finished, workflow resuming |
|
|
102
|
-
|
|
103
|
-
### Log Events
|
|
104
|
-
|
|
105
|
-
| Event | Description |
|
|
106
|
-
|-------|-------------|
|
|
107
|
-
| `log_info` | Info-level log message |
|
|
108
|
-
| `log_warning` | Warning-level log message |
|
|
109
|
-
| `log_error` | Error-level log message |
|
|
110
|
-
| `log_debug` | Debug-level log message |
|
|
111
|
-
|
|
112
|
-
## Event Structure
|
|
113
|
-
|
|
114
|
-
Each event contains:
|
|
115
|
-
|
|
116
|
-
```python
|
|
117
|
-
{
|
|
118
|
-
"id": "evt_abc123", # Unique event ID
|
|
119
|
-
"run_id": "run_xyz789", # Workflow run ID
|
|
120
|
-
"type": "step_completed", # Event type
|
|
121
|
-
"timestamp": "2025-01-15T10:30:45Z",
|
|
122
|
-
"sequence": 3, # Order in the event log
|
|
123
|
-
"data": { # Event-specific data
|
|
124
|
-
"step_id": "validate_order",
|
|
125
|
-
"step_name": "validate_order",
|
|
126
|
-
"result": {"order_id": "ORD-123", "valid": True},
|
|
127
|
-
"duration_ms": 150
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
```
|
|
131
|
-
|
|
132
|
-
## Inspecting Events
|
|
133
|
-
|
|
134
|
-
### Via Storage Backend
|
|
135
|
-
|
|
136
|
-
```python
|
|
137
|
-
from pyworkflow.storage.file import FileStorageBackend
|
|
138
|
-
|
|
139
|
-
storage = FileStorageBackend()
|
|
140
|
-
|
|
141
|
-
# Get all events for a workflow run
|
|
142
|
-
events = await storage.get_events("run_xyz789")
|
|
143
|
-
|
|
144
|
-
for event in events:
|
|
145
|
-
print(f"{event.sequence}: {event.type}")
|
|
146
|
-
print(f" Data: {event.data}")
|
|
147
|
-
print(f" Time: {event.timestamp}")
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
### Example Event Log
|
|
151
|
-
|
|
152
|
-
```
|
|
153
|
-
Sequence | Type | Data
|
|
154
|
-
---------|-------------------|----------------------------------
|
|
155
|
-
1 | workflow_started | {workflow: "order_processing"}
|
|
156
|
-
2 | step_started | {step_id: "validate_order"}
|
|
157
|
-
3 | step_completed | {step_id: "validate_order", result: {...}}
|
|
158
|
-
4 | step_started | {step_id: "process_payment"}
|
|
159
|
-
5 | step_failed | {step_id: "process_payment", error: "timeout"}
|
|
160
|
-
6 | step_retrying | {step_id: "process_payment", attempt: 2}
|
|
161
|
-
7 | step_started | {step_id: "process_payment"}
|
|
162
|
-
8 | step_completed | {step_id: "process_payment", result: {...}}
|
|
163
|
-
9 | sleep_started | {duration: "1h", wake_time: "..."}
|
|
164
|
-
10 | workflow_suspended| {reason: "sleep"}
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
## Deterministic Replay
|
|
168
|
-
|
|
169
|
-
For replay to work correctly, workflows must be deterministic:
|
|
170
|
-
|
|
171
|
-
<Warning>
|
|
172
|
-
**Don't do this** - Non-deterministic operations break replay:
|
|
173
|
-
|
|
174
|
-
```python
|
|
175
|
-
@workflow()
|
|
176
|
-
async def bad_workflow():
|
|
177
|
-
# BAD: Random values differ on replay
|
|
178
|
-
order_id = f"ORD-{random.randint(1000, 9999)}"
|
|
179
|
-
|
|
180
|
-
# BAD: Current time differs on replay
|
|
181
|
-
if datetime.now().hour < 12:
|
|
182
|
-
await morning_flow()
|
|
183
|
-
else:
|
|
184
|
-
await afternoon_flow()
|
|
185
|
-
```
|
|
186
|
-
</Warning>
|
|
187
|
-
|
|
188
|
-
<Tip>
|
|
189
|
-
**Do this instead** - Use steps for non-deterministic operations:
|
|
190
|
-
|
|
191
|
-
```python
|
|
192
|
-
@step()
|
|
193
|
-
async def generate_order_id():
|
|
194
|
-
# Results are cached, so same ID on replay
|
|
195
|
-
return f"ORD-{random.randint(1000, 9999)}"
|
|
196
|
-
|
|
197
|
-
@step()
|
|
198
|
-
async def get_current_time():
|
|
199
|
-
# Cached, so same time on replay
|
|
200
|
-
return datetime.now()
|
|
201
|
-
|
|
202
|
-
@workflow()
|
|
203
|
-
async def good_workflow():
|
|
204
|
-
order_id = await generate_order_id()
|
|
205
|
-
current_time = await get_current_time()
|
|
206
|
-
|
|
207
|
-
if current_time.hour < 12:
|
|
208
|
-
await morning_flow()
|
|
209
|
-
else:
|
|
210
|
-
await afternoon_flow()
|
|
211
|
-
```
|
|
212
|
-
</Tip>
|
|
213
|
-
|
|
214
|
-
## Storage Backends
|
|
215
|
-
|
|
216
|
-
Events are stored in a pluggable storage backend:
|
|
217
|
-
|
|
218
|
-
| Backend | Status | Use Case |
|
|
219
|
-
|---------|--------|----------|
|
|
220
|
-
| **File** | Available | Development, single-machine |
|
|
221
|
-
| **Redis** | Planned | Production, distributed |
|
|
222
|
-
| **PostgreSQL** | Planned | Enterprise, complex queries |
|
|
223
|
-
| **SQLite** | Planned | Embedded applications |
|
|
224
|
-
|
|
225
|
-
### Configuring Storage
|
|
226
|
-
|
|
227
|
-
```python
|
|
228
|
-
from pyworkflow.storage.file import FileStorageBackend
|
|
229
|
-
|
|
230
|
-
# File storage (default)
|
|
231
|
-
storage = FileStorageBackend(
|
|
232
|
-
base_path="/var/lib/pyworkflow/events"
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
# Configure PyWorkflow to use this storage
|
|
236
|
-
from pyworkflow import configure
|
|
237
|
-
configure(storage=storage)
|
|
238
|
-
```
|
|
239
|
-
|
|
240
|
-
## Benefits of Event Sourcing
|
|
241
|
-
|
|
242
|
-
<CardGroup cols={2}>
|
|
243
|
-
<Card title="Complete Audit Trail" icon="scroll">
|
|
244
|
-
Every action is recorded. Know exactly what happened and when.
|
|
245
|
-
</Card>
|
|
246
|
-
<Card title="Time Travel Debugging" icon="clock-rotate-left">
|
|
247
|
-
Replay workflows to debug issues. See the exact state at any point.
|
|
248
|
-
</Card>
|
|
249
|
-
<Card title="Failure Recovery" icon="shield">
|
|
250
|
-
Resume from the last successful point after a crash or restart.
|
|
251
|
-
</Card>
|
|
252
|
-
<Card title="Event-Driven Architecture" icon="bolt">
|
|
253
|
-
Events can trigger other systems, enabling loose coupling.
|
|
254
|
-
</Card>
|
|
255
|
-
</CardGroup>
|
|
256
|
-
|
|
257
|
-
## Next Steps
|
|
258
|
-
|
|
259
|
-
<CardGroup cols={2}>
|
|
260
|
-
<Card title="Sleep" icon="clock" href="/concepts/sleep">
|
|
261
|
-
Learn how workflows suspend and resume with sleep.
|
|
262
|
-
</Card>
|
|
263
|
-
<Card title="Deployment" icon="rocket" href="/guides/deployment">
|
|
264
|
-
Configure storage backends for production.
|
|
265
|
-
</Card>
|
|
266
|
-
</CardGroup>
|