pyworkflow-engine 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. pyworkflow/__init__.py +10 -1
  2. pyworkflow/celery/tasks.py +272 -24
  3. pyworkflow/cli/__init__.py +4 -1
  4. pyworkflow/cli/commands/runs.py +4 -4
  5. pyworkflow/cli/commands/setup.py +203 -4
  6. pyworkflow/cli/utils/config_generator.py +76 -3
  7. pyworkflow/cli/utils/docker_manager.py +232 -0
  8. pyworkflow/context/__init__.py +13 -0
  9. pyworkflow/context/base.py +26 -0
  10. pyworkflow/context/local.py +80 -0
  11. pyworkflow/context/step_context.py +295 -0
  12. pyworkflow/core/registry.py +6 -1
  13. pyworkflow/core/step.py +141 -0
  14. pyworkflow/core/workflow.py +56 -0
  15. pyworkflow/engine/events.py +30 -0
  16. pyworkflow/engine/replay.py +39 -0
  17. pyworkflow/primitives/child_workflow.py +1 -1
  18. pyworkflow/runtime/local.py +1 -1
  19. pyworkflow/storage/__init__.py +14 -0
  20. pyworkflow/storage/base.py +35 -0
  21. pyworkflow/storage/cassandra.py +1747 -0
  22. pyworkflow/storage/config.py +69 -0
  23. pyworkflow/storage/dynamodb.py +31 -2
  24. pyworkflow/storage/file.py +28 -0
  25. pyworkflow/storage/memory.py +18 -0
  26. pyworkflow/storage/mysql.py +1159 -0
  27. pyworkflow/storage/postgres.py +27 -2
  28. pyworkflow/storage/schemas.py +4 -3
  29. pyworkflow/storage/sqlite.py +25 -2
  30. {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/METADATA +7 -4
  31. pyworkflow_engine-0.1.9.dist-info/RECORD +91 -0
  32. pyworkflow_engine-0.1.9.dist-info/top_level.txt +1 -0
  33. dashboard/backend/app/__init__.py +0 -1
  34. dashboard/backend/app/config.py +0 -32
  35. dashboard/backend/app/controllers/__init__.py +0 -6
  36. dashboard/backend/app/controllers/run_controller.py +0 -86
  37. dashboard/backend/app/controllers/workflow_controller.py +0 -33
  38. dashboard/backend/app/dependencies/__init__.py +0 -5
  39. dashboard/backend/app/dependencies/storage.py +0 -50
  40. dashboard/backend/app/repositories/__init__.py +0 -6
  41. dashboard/backend/app/repositories/run_repository.py +0 -80
  42. dashboard/backend/app/repositories/workflow_repository.py +0 -27
  43. dashboard/backend/app/rest/__init__.py +0 -8
  44. dashboard/backend/app/rest/v1/__init__.py +0 -12
  45. dashboard/backend/app/rest/v1/health.py +0 -33
  46. dashboard/backend/app/rest/v1/runs.py +0 -133
  47. dashboard/backend/app/rest/v1/workflows.py +0 -41
  48. dashboard/backend/app/schemas/__init__.py +0 -23
  49. dashboard/backend/app/schemas/common.py +0 -16
  50. dashboard/backend/app/schemas/event.py +0 -24
  51. dashboard/backend/app/schemas/hook.py +0 -25
  52. dashboard/backend/app/schemas/run.py +0 -54
  53. dashboard/backend/app/schemas/step.py +0 -28
  54. dashboard/backend/app/schemas/workflow.py +0 -31
  55. dashboard/backend/app/server.py +0 -87
  56. dashboard/backend/app/services/__init__.py +0 -6
  57. dashboard/backend/app/services/run_service.py +0 -240
  58. dashboard/backend/app/services/workflow_service.py +0 -155
  59. dashboard/backend/main.py +0 -18
  60. docs/concepts/cancellation.mdx +0 -362
  61. docs/concepts/continue-as-new.mdx +0 -434
  62. docs/concepts/events.mdx +0 -266
  63. docs/concepts/fault-tolerance.mdx +0 -370
  64. docs/concepts/hooks.mdx +0 -552
  65. docs/concepts/limitations.mdx +0 -167
  66. docs/concepts/schedules.mdx +0 -775
  67. docs/concepts/sleep.mdx +0 -312
  68. docs/concepts/steps.mdx +0 -301
  69. docs/concepts/workflows.mdx +0 -255
  70. docs/guides/cli.mdx +0 -942
  71. docs/guides/configuration.mdx +0 -560
  72. docs/introduction.mdx +0 -155
  73. docs/quickstart.mdx +0 -279
  74. examples/__init__.py +0 -1
  75. examples/celery/__init__.py +0 -1
  76. examples/celery/durable/docker-compose.yml +0 -55
  77. examples/celery/durable/pyworkflow.config.yaml +0 -12
  78. examples/celery/durable/workflows/__init__.py +0 -122
  79. examples/celery/durable/workflows/basic.py +0 -87
  80. examples/celery/durable/workflows/batch_processing.py +0 -102
  81. examples/celery/durable/workflows/cancellation.py +0 -273
  82. examples/celery/durable/workflows/child_workflow_patterns.py +0 -240
  83. examples/celery/durable/workflows/child_workflows.py +0 -202
  84. examples/celery/durable/workflows/continue_as_new.py +0 -260
  85. examples/celery/durable/workflows/fault_tolerance.py +0 -210
  86. examples/celery/durable/workflows/hooks.py +0 -211
  87. examples/celery/durable/workflows/idempotency.py +0 -112
  88. examples/celery/durable/workflows/long_running.py +0 -99
  89. examples/celery/durable/workflows/retries.py +0 -101
  90. examples/celery/durable/workflows/schedules.py +0 -209
  91. examples/celery/transient/01_basic_workflow.py +0 -91
  92. examples/celery/transient/02_fault_tolerance.py +0 -257
  93. examples/celery/transient/__init__.py +0 -20
  94. examples/celery/transient/pyworkflow.config.yaml +0 -25
  95. examples/local/__init__.py +0 -1
  96. examples/local/durable/01_basic_workflow.py +0 -94
  97. examples/local/durable/02_file_storage.py +0 -132
  98. examples/local/durable/03_retries.py +0 -169
  99. examples/local/durable/04_long_running.py +0 -119
  100. examples/local/durable/05_event_log.py +0 -145
  101. examples/local/durable/06_idempotency.py +0 -148
  102. examples/local/durable/07_hooks.py +0 -334
  103. examples/local/durable/08_cancellation.py +0 -233
  104. examples/local/durable/09_child_workflows.py +0 -198
  105. examples/local/durable/10_child_workflow_patterns.py +0 -265
  106. examples/local/durable/11_continue_as_new.py +0 -249
  107. examples/local/durable/12_schedules.py +0 -198
  108. examples/local/durable/__init__.py +0 -1
  109. examples/local/transient/01_quick_tasks.py +0 -87
  110. examples/local/transient/02_retries.py +0 -130
  111. examples/local/transient/03_sleep.py +0 -141
  112. examples/local/transient/__init__.py +0 -1
  113. pyworkflow_engine-0.1.7.dist-info/RECORD +0 -196
  114. pyworkflow_engine-0.1.7.dist-info/top_level.txt +0 -5
  115. tests/examples/__init__.py +0 -0
  116. tests/integration/__init__.py +0 -0
  117. tests/integration/test_cancellation.py +0 -330
  118. tests/integration/test_child_workflows.py +0 -439
  119. tests/integration/test_continue_as_new.py +0 -428
  120. tests/integration/test_dynamodb_storage.py +0 -1146
  121. tests/integration/test_fault_tolerance.py +0 -369
  122. tests/integration/test_schedule_storage.py +0 -484
  123. tests/unit/__init__.py +0 -0
  124. tests/unit/backends/__init__.py +0 -1
  125. tests/unit/backends/test_dynamodb_storage.py +0 -1554
  126. tests/unit/backends/test_postgres_storage.py +0 -1281
  127. tests/unit/backends/test_sqlite_storage.py +0 -1460
  128. tests/unit/conftest.py +0 -41
  129. tests/unit/test_cancellation.py +0 -364
  130. tests/unit/test_child_workflows.py +0 -680
  131. tests/unit/test_continue_as_new.py +0 -441
  132. tests/unit/test_event_limits.py +0 -316
  133. tests/unit/test_executor.py +0 -320
  134. tests/unit/test_fault_tolerance.py +0 -334
  135. tests/unit/test_hooks.py +0 -495
  136. tests/unit/test_registry.py +0 -261
  137. tests/unit/test_replay.py +0 -420
  138. tests/unit/test_schedule_schemas.py +0 -285
  139. tests/unit/test_schedule_utils.py +0 -286
  140. tests/unit/test_scheduled_workflow.py +0 -274
  141. tests/unit/test_step.py +0 -353
  142. tests/unit/test_workflow.py +0 -243
  143. {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/WHEEL +0 -0
  144. {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/entry_points.txt +0 -0
  145. {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.9.dist-info}/licenses/LICENSE +0 -0
@@ -1,434 +0,0 @@
1
- ---
2
- title: 'Continue-As-New'
3
- description: 'Reset event history by continuing workflows as new executions'
4
- ---
5
-
6
- ## Overview
7
-
8
- Long-running workflows can accumulate large event histories that impact performance. `continue_as_new()` solves this by completing the current workflow and immediately starting a fresh execution with clean event history.
9
-
10
- <CardGroup cols={2}>
11
- <Card title="Fresh Event History" icon="rotate">
12
- Each continuation starts with a clean event log.
13
- </Card>
14
- <Card title="Chain Tracking" icon="link">
15
- Workflow runs are linked via `continued_from_run_id` and `continued_to_run_id`.
16
- </Card>
17
- <Card title="State Preservation" icon="database">
18
- Pass state to the new execution via arguments.
19
- </Card>
20
- <Card title="Unlimited Duration" icon="infinity">
21
- Run workflows indefinitely without unbounded history growth.
22
- </Card>
23
- </CardGroup>
24
-
25
- ## When to Use Continue-As-New
26
-
27
- `continue_as_new()` is ideal for:
28
-
29
- | Use Case | Description |
30
- |----------|-------------|
31
- | **Polling Workflows** | Continuously poll for updates without accumulating events |
32
- | **Batch Processing** | Process large datasets in chunks, resetting history between batches |
33
- | **Recurring Tasks** | Daily/weekly reports or scheduled jobs that run indefinitely |
34
- | **Queue Consumers** | Process messages from a queue without history growth |
35
- | **Long-Running Sync** | Sync data between systems over extended periods |
36
-
37
- ## Basic Usage
38
-
39
- Call `continue_as_new()` with the arguments for the new execution:
40
-
41
- ```python
42
- from pyworkflow import workflow, step, continue_as_new
43
-
44
- @step()
45
- async def fetch_batch(offset: int, batch_size: int) -> list:
46
- """Fetch a batch of items to process."""
47
- items = await db.query(offset=offset, limit=batch_size)
48
- return items
49
-
50
- @step()
51
- async def process_item(item: dict) -> dict:
52
- """Process a single item."""
53
- return await transform(item)
54
-
55
- @workflow()
56
- async def batch_processor(offset: int = 0, batch_size: int = 100) -> str:
57
- """Process items in batches with fresh event history."""
58
- items = await fetch_batch(offset, batch_size)
59
-
60
- if not items:
61
- # No more items - workflow complete
62
- return f"Processed {offset} total items"
63
-
64
- # Process this batch
65
- for item in items:
66
- await process_item(item)
67
-
68
- # Continue with next batch (fresh event history)
69
- continue_as_new(offset=offset + batch_size, batch_size=batch_size)
70
- ```
71
-
72
- <Note>
73
- `continue_as_new()` never returns - it raises an internal signal that the executor catches. Any code after it will not execute.
74
- </Note>
75
-
76
- ## How It Works
77
-
78
- When `continue_as_new()` is called:
79
-
80
- ```
81
- ┌─────────────────────────────────────────────────────────────────────┐
82
- │ Continue-As-New Flow │
83
- │ │
84
- │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐│
85
- │ │ Run #1 │────────▶│ Run #2 │────────▶│ Run #3 ││
86
- │ │ offset=0 │ │ offset=100 │ │ offset=200 ││
87
- │ │ ─────── │ │ ─────── │ │ ─────── ││
88
- │ │ 10 events │ │ 10 events │ │ 5 events ││
89
- │ │ CONTINUED │ │ CONTINUED │ │ COMPLETED ││
90
- │ └──────────────┘ └──────────────┘ └──────────────┘│
91
- │ │ │ │ │
92
- │ └────────────────────────┴────────────────────────┘ │
93
- │ Workflow Chain │
94
- └─────────────────────────────────────────────────────────────────────┘
95
- ```
96
-
97
- 1. Current run is marked as `CONTINUED_AS_NEW`
98
- 2. A `WORKFLOW_CONTINUED_AS_NEW` event is recorded
99
- 3. A new run is created with `continued_from_run_id` set
100
- 4. The new run starts executing with the provided arguments
101
- 5. New run has fresh, empty event history
102
-
103
- ## Patterns
104
-
105
- ### Polling Workflow
106
-
107
- ```python
108
- @workflow()
109
- async def polling_workflow(cursor: str | None = None, poll_count: int = 0):
110
- """Poll for updates indefinitely."""
111
- # Check for new data
112
- new_cursor, updates = await check_for_updates(cursor)
113
-
114
- if updates:
115
- for update in updates:
116
- await process_update(update)
117
-
118
- if new_cursor is None:
119
- return f"Polling complete after {poll_count + 1} polls"
120
-
121
- # Continue polling with new cursor
122
- continue_as_new(cursor=new_cursor, poll_count=poll_count + 1)
123
- ```
124
-
125
- ### Recurring Task with Sleep
126
-
127
- ```python
128
- @workflow()
129
- async def daily_report(day: int = 1):
130
- """Generate daily reports indefinitely."""
131
- await generate_report(day)
132
-
133
- # Wait until next day
134
- await sleep("24h")
135
-
136
- # Continue with next day (fresh history)
137
- continue_as_new(day=day + 1)
138
- ```
139
-
140
- ### Bounded Iterations
141
-
142
- ```python
143
- @workflow()
144
- async def bounded_workflow(iteration: int = 1, max_iterations: int = 10):
145
- """Run for a fixed number of iterations."""
146
- await do_work(iteration)
147
-
148
- if iteration >= max_iterations:
149
- return f"Completed {max_iterations} iterations"
150
-
151
- continue_as_new(iteration=iteration + 1, max_iterations=max_iterations)
152
- ```
153
-
154
- ## Tracking Workflow Chains
155
-
156
- Use `get_workflow_chain()` to retrieve all runs in a continuation chain:
157
-
158
- <Tabs>
159
- <Tab title="Python API">
160
- ```python
161
- from pyworkflow import get_workflow_chain
162
-
163
- # Get the full chain from any run in it
164
- chain = await get_workflow_chain("run_abc123")
165
-
166
- for run in chain:
167
- print(f"{run.run_id}: {run.status.value}")
168
- print(f" From: {run.continued_from_run_id}")
169
- print(f" To: {run.continued_to_run_id}")
170
- ```
171
- </Tab>
172
- <Tab title="CLI">
173
- ```bash
174
- # View the continuation chain
175
- pyworkflow runs chain run_abc123
176
-
177
- # Output:
178
- # Continue-As-New Chain
179
- # ────────────────────────────────────────────────────────
180
- # Chain length: 3 run(s)
181
- #
182
- # START
183
- # Run ID: run_abc123def456
184
- # Workflow: batch_processor
185
- # Status: continued_as_new
186
- # Duration: 2.3s
187
- #
188
- # ↓ continued as new
189
- #
190
- # #2
191
- # Run ID: run_789xyz123abc
192
- # Workflow: batch_processor
193
- # Status: continued_as_new
194
- # Duration: 1.8s
195
- #
196
- # ↓ continued as new
197
- #
198
- # CURRENT
199
- # Run ID: run_456def789xyz
200
- # Workflow: batch_processor
201
- # Status: completed
202
- # Duration: 0.5s
203
- ```
204
- </Tab>
205
- </Tabs>
206
-
207
- ## Workflow Run Schema
208
-
209
- The `WorkflowRun` schema includes continuation tracking fields:
210
-
211
- | Field | Type | Description |
212
- |-------|------|-------------|
213
- | `continued_from_run_id` | `str \| None` | Run ID this execution continued from |
214
- | `continued_to_run_id` | `str \| None` | Run ID this execution continued to |
215
-
216
- ## Important Behaviors
217
-
218
- ### Arguments Are Required
219
-
220
- `continue_as_new()` requires at least one argument:
221
-
222
- ```python
223
- # Valid - explicit arguments
224
- continue_as_new(offset=100)
225
- continue_as_new(cursor="abc123", count=5)
226
-
227
- # Invalid - will raise ValueError
228
- continue_as_new() # No arguments provided
229
- ```
230
-
231
- <Warning>
232
- Unlike some workflow systems, PyWorkflow does not automatically use the original arguments. You must explicitly pass all arguments needed for the next execution.
233
- </Warning>
234
-
235
- ### Child Workflows Are Cancelled
236
-
237
- When a parent workflow continues as new, all running child workflows are cancelled:
238
-
239
- ```python
240
- @workflow()
241
- async def parent_workflow(iteration: int = 1):
242
- # Start child workflow
243
- handle = await start_child_workflow(child_workflow, "data")
244
-
245
- # When we continue as new, the child is cancelled
246
- continue_as_new(iteration=iteration + 1)
247
- ```
248
-
249
- ### Cancellation Takes Precedence
250
-
251
- If a workflow is cancelled, `continue_as_new()` will raise `CancellationError` instead:
252
-
253
- ```python
254
- @workflow()
255
- async def my_workflow(count: int):
256
- # If cancellation was requested, this raises CancellationError
257
- # not ContinueAsNewSignal
258
- continue_as_new(count=count + 1)
259
- ```
260
-
261
- ### Status is Terminal
262
-
263
- `CONTINUED_AS_NEW` is a terminal status like `COMPLETED` or `FAILED`:
264
-
265
- ```python
266
- # Cannot cancel a workflow that already continued
267
- result = await cancel_workflow("run_that_continued")
268
- # Returns False
269
- ```
270
-
271
- ## Events
272
-
273
- The continuation is recorded as a `WORKFLOW_CONTINUED_AS_NEW` event:
274
-
275
- ```json
276
- {
277
- "type": "workflow.continued_as_new",
278
- "run_id": "run_abc123",
279
- "timestamp": "2025-01-15T10:30:00Z",
280
- "data": {
281
- "new_run_id": "run_def456",
282
- "args": "[100]",
283
- "kwargs": "{\"batch_size\": 50}",
284
- "reason": null
285
- }
286
- }
287
- ```
288
-
289
- View continuation events with the CLI:
290
-
291
- ```bash
292
- pyworkflow runs logs run_abc123 --filter continued
293
- ```
294
-
295
- ## Best Practices
296
-
297
- <AccordionGroup>
298
- <Accordion title="Use for unbounded workflows">
299
- Any workflow that could run indefinitely (polling, queues, recurring tasks) should use `continue_as_new()` to prevent unbounded event history growth.
300
-
301
- ```python
302
- @workflow()
303
- async def message_consumer():
304
- while True: # Don't do this!
305
- msg = await get_next_message()
306
- await process_message(msg)
307
-
308
- # Better - use continue_as_new
309
- @workflow()
310
- async def message_consumer(messages_processed: int = 0):
311
- msg = await get_next_message()
312
- if msg:
313
- await process_message(msg)
314
- continue_as_new(messages_processed=messages_processed + 1)
315
- return f"Processed {messages_processed} messages"
316
- ```
317
- </Accordion>
318
-
319
- <Accordion title="Pass minimal state">
320
- Only pass the state needed for the next execution. Large payloads increase storage and serialization costs.
321
-
322
- ```python
323
- # Good - minimal state
324
- continue_as_new(cursor="abc123", processed_count=1000)
325
-
326
- # Bad - passing large data
327
- continue_as_new(all_results=huge_list_of_results)
328
- ```
329
- </Accordion>
330
-
331
- <Accordion title="Include progress tracking">
332
- Include counters or timestamps to track overall progress across the chain:
333
-
334
- ```python
335
- @workflow()
336
- async def sync_workflow(
337
- cursor: str | None = None,
338
- total_synced: int = 0,
339
- started_at: str | None = None
340
- ):
341
- if started_at is None:
342
- started_at = datetime.now().isoformat()
343
-
344
- items, new_cursor = await fetch_items(cursor)
345
- await sync_items(items)
346
-
347
- if new_cursor:
348
- continue_as_new(
349
- cursor=new_cursor,
350
- total_synced=total_synced + len(items),
351
- started_at=started_at
352
- )
353
-
354
- return {
355
- "total_synced": total_synced + len(items),
356
- "started_at": started_at,
357
- "completed_at": datetime.now().isoformat()
358
- }
359
- ```
360
- </Accordion>
361
-
362
- <Accordion title="Handle the final iteration">
363
- Always have a termination condition that returns normally:
364
-
365
- ```python
366
- @workflow()
367
- async def batch_workflow(offset: int = 0):
368
- items = await fetch_items(offset)
369
-
370
- if not items:
371
- # Terminal condition - return result
372
- return {"processed": offset, "status": "complete"}
373
-
374
- await process_items(items)
375
- continue_as_new(offset=offset + len(items))
376
- ```
377
- </Accordion>
378
- </AccordionGroup>
379
-
380
- ## API Reference
381
-
382
- ### `continue_as_new()`
383
-
384
- ```python
385
- def continue_as_new(*args: Any, **kwargs: Any) -> NoReturn
386
- ```
387
-
388
- Complete the current workflow and start a new execution with fresh event history.
389
-
390
- | Parameter | Type | Description |
391
- |-----------|------|-------------|
392
- | `*args` | `Any` | Positional arguments for the new execution |
393
- | `**kwargs` | `Any` | Keyword arguments for the new execution |
394
-
395
- **Raises:**
396
- - `ContinueAsNewSignal` - Internal signal caught by the executor
397
- - `ValueError` - If no arguments are provided
398
- - `RuntimeError` - If called outside a workflow context
399
- - `CancellationError` - If workflow is being cancelled
400
-
401
- ### `get_workflow_chain()`
402
-
403
- ```python
404
- async def get_workflow_chain(
405
- run_id: str,
406
- storage: StorageBackend | None = None,
407
- ) -> list[WorkflowRun]
408
- ```
409
-
410
- Get all workflow runs in a continuation chain.
411
-
412
- | Parameter | Type | Default | Description |
413
- |-----------|------|---------|-------------|
414
- | `run_id` | `str` | required | Any run ID in the chain |
415
- | `storage` | `StorageBackend` | `None` | Storage backend (uses configured default) |
416
-
417
- **Returns:** List of `WorkflowRun` objects ordered from first to last in the chain.
418
-
419
- ## Next Steps
420
-
421
- <CardGroup cols={2}>
422
- <Card title="Sleep" icon="clock" href="/concepts/sleep">
423
- Learn about durable sleep for delays.
424
- </Card>
425
- <Card title="Hooks" icon="webhook" href="/concepts/hooks">
426
- Wait for external events in your workflows.
427
- </Card>
428
- <Card title="Fault Tolerance" icon="shield-check" href="/concepts/fault-tolerance">
429
- Automatic recovery from worker crashes.
430
- </Card>
431
- <Card title="CLI Guide" icon="terminal" href="/guides/cli">
432
- Manage workflows from the command line.
433
- </Card>
434
- </CardGroup>
docs/concepts/events.mdx DELETED
@@ -1,266 +0,0 @@
1
- ---
2
- title: 'Events'
3
- description: 'Event sourcing provides durability, auditability, and deterministic replay'
4
- ---
5
-
6
- ## What is Event Sourcing?
7
-
8
- PyWorkflow uses event sourcing to achieve durable, fault-tolerant execution. Instead of storing just the current state, every state change is recorded as an immutable event in an append-only log. This enables:
9
-
10
- - **Durability**: Workflows survive crashes and restarts
11
- - **Replay**: Workflows can resume from any point
12
- - **Auditability**: Complete history of everything that happened
13
-
14
- ```
15
- Workflow Execution Timeline
16
- ────────────────────────────────────────────────────────►
17
-
18
- Event 1 Event 2 Event 3 Event 4
19
- workflow_started → step_completed → sleep_started → ...
20
-
21
- │ │ │
22
- ▼ ▼ ▼
23
- ┌──────────────────────────────────────────────────┐
24
- │ Event Log (Append-Only) │
25
- └──────────────────────────────────────────────────┘
26
- ```
27
-
28
- ## How It Works
29
-
30
- ### Recording Events
31
-
32
- As your workflow executes, PyWorkflow automatically records events:
33
-
34
- ```python
35
- @workflow()
36
- async def order_workflow(order_id: str):
37
- # Event: workflow_started
38
-
39
- order = await validate_order(order_id)
40
- # Event: step_completed (validate_order)
41
-
42
- await sleep("1h")
43
- # Event: sleep_started
44
- # Workflow suspends here...
45
-
46
- # ... 1 hour later ...
47
- # Event: workflow_resumed
48
-
49
- await send_confirmation(order)
50
- # Event: step_completed (send_confirmation)
51
-
52
- return order
53
- # Event: workflow_completed
54
- ```
55
-
56
- ### Replaying Events
57
-
58
- When a workflow resumes after suspension, PyWorkflow replays all recorded events to restore the exact state:
59
-
60
- ```python
61
- # Replay process:
62
- # 1. Load all events for this run_id
63
- # 2. For each step_completed event, cache the result
64
- # 3. Re-execute the workflow function
65
- # 4. When a step is called, return cached result instead of executing
66
- # 5. Continue from where we left off
67
- ```
68
-
69
- <Note>
70
- During replay, steps are not re-executed. Their cached results from the event log are returned immediately. This ensures deterministic execution.
71
- </Note>
72
-
73
- ## Event Types
74
-
75
- PyWorkflow records 16 different event types:
76
-
77
- ### Workflow Events
78
-
79
- | Event | Description |
80
- |-------|-------------|
81
- | `workflow_started` | Workflow execution began |
82
- | `workflow_completed` | Workflow finished successfully |
83
- | `workflow_failed` | Workflow terminated with an error |
84
- | `workflow_suspended` | Workflow paused (sleep, webhook) |
85
- | `workflow_resumed` | Workflow continued after suspension |
86
-
87
- ### Step Events
88
-
89
- | Event | Description |
90
- |-------|-------------|
91
- | `step_started` | Step execution began |
92
- | `step_completed` | Step finished successfully (result cached) |
93
- | `step_failed` | Step failed (may retry) |
94
- | `step_retrying` | Step is being retried |
95
-
96
- ### Sleep Events
97
-
98
- | Event | Description |
99
- |-------|-------------|
100
- | `sleep_started` | Sleep/delay began |
101
- | `sleep_completed` | Sleep finished, workflow resuming |
102
-
103
- ### Log Events
104
-
105
- | Event | Description |
106
- |-------|-------------|
107
- | `log_info` | Info-level log message |
108
- | `log_warning` | Warning-level log message |
109
- | `log_error` | Error-level log message |
110
- | `log_debug` | Debug-level log message |
111
-
112
- ## Event Structure
113
-
114
- Each event contains:
115
-
116
- ```python
117
- {
118
- "id": "evt_abc123", # Unique event ID
119
- "run_id": "run_xyz789", # Workflow run ID
120
- "type": "step_completed", # Event type
121
- "timestamp": "2025-01-15T10:30:45Z",
122
- "sequence": 3, # Order in the event log
123
- "data": { # Event-specific data
124
- "step_id": "validate_order",
125
- "step_name": "validate_order",
126
- "result": {"order_id": "ORD-123", "valid": True},
127
- "duration_ms": 150
128
- }
129
- }
130
- ```
131
-
132
- ## Inspecting Events
133
-
134
- ### Via Storage Backend
135
-
136
- ```python
137
- from pyworkflow.storage.file import FileStorageBackend
138
-
139
- storage = FileStorageBackend()
140
-
141
- # Get all events for a workflow run
142
- events = await storage.get_events("run_xyz789")
143
-
144
- for event in events:
145
- print(f"{event.sequence}: {event.type}")
146
- print(f" Data: {event.data}")
147
- print(f" Time: {event.timestamp}")
148
- ```
149
-
150
- ### Example Event Log
151
-
152
- ```
153
- Sequence | Type | Data
154
- ---------|-------------------|----------------------------------
155
- 1 | workflow_started | {workflow: "order_processing"}
156
- 2 | step_started | {step_id: "validate_order"}
157
- 3 | step_completed | {step_id: "validate_order", result: {...}}
158
- 4 | step_started | {step_id: "process_payment"}
159
- 5 | step_failed | {step_id: "process_payment", error: "timeout"}
160
- 6 | step_retrying | {step_id: "process_payment", attempt: 2}
161
- 7 | step_started | {step_id: "process_payment"}
162
- 8 | step_completed | {step_id: "process_payment", result: {...}}
163
- 9 | sleep_started | {duration: "1h", wake_time: "..."}
164
- 10 | workflow_suspended| {reason: "sleep"}
165
- ```
166
-
167
- ## Deterministic Replay
168
-
169
- For replay to work correctly, workflows must be deterministic:
170
-
171
- <Warning>
172
- **Don't do this** - Non-deterministic operations break replay:
173
-
174
- ```python
175
- @workflow()
176
- async def bad_workflow():
177
- # BAD: Random values differ on replay
178
- order_id = f"ORD-{random.randint(1000, 9999)}"
179
-
180
- # BAD: Current time differs on replay
181
- if datetime.now().hour < 12:
182
- await morning_flow()
183
- else:
184
- await afternoon_flow()
185
- ```
186
- </Warning>
187
-
188
- <Tip>
189
- **Do this instead** - Use steps for non-deterministic operations:
190
-
191
- ```python
192
- @step()
193
- async def generate_order_id():
194
- # Results are cached, so same ID on replay
195
- return f"ORD-{random.randint(1000, 9999)}"
196
-
197
- @step()
198
- async def get_current_time():
199
- # Cached, so same time on replay
200
- return datetime.now()
201
-
202
- @workflow()
203
- async def good_workflow():
204
- order_id = await generate_order_id()
205
- current_time = await get_current_time()
206
-
207
- if current_time.hour < 12:
208
- await morning_flow()
209
- else:
210
- await afternoon_flow()
211
- ```
212
- </Tip>
213
-
214
- ## Storage Backends
215
-
216
- Events are stored in a pluggable storage backend:
217
-
218
- | Backend | Status | Use Case |
219
- |---------|--------|----------|
220
- | **File** | Available | Development, single-machine |
221
- | **Redis** | Planned | Production, distributed |
222
- | **PostgreSQL** | Planned | Enterprise, complex queries |
223
- | **SQLite** | Planned | Embedded applications |
224
-
225
- ### Configuring Storage
226
-
227
- ```python
228
- from pyworkflow.storage.file import FileStorageBackend
229
-
230
- # File storage (default)
231
- storage = FileStorageBackend(
232
- base_path="/var/lib/pyworkflow/events"
233
- )
234
-
235
- # Configure PyWorkflow to use this storage
236
- from pyworkflow import configure
237
- configure(storage=storage)
238
- ```
239
-
240
- ## Benefits of Event Sourcing
241
-
242
- <CardGroup cols={2}>
243
- <Card title="Complete Audit Trail" icon="scroll">
244
- Every action is recorded. Know exactly what happened and when.
245
- </Card>
246
- <Card title="Time Travel Debugging" icon="clock-rotate-left">
247
- Replay workflows to debug issues. See the exact state at any point.
248
- </Card>
249
- <Card title="Failure Recovery" icon="shield">
250
- Resume from the last successful point after a crash or restart.
251
- </Card>
252
- <Card title="Event-Driven Architecture" icon="bolt">
253
- Events can trigger other systems, enabling loose coupling.
254
- </Card>
255
- </CardGroup>
256
-
257
- ## Next Steps
258
-
259
- <CardGroup cols={2}>
260
- <Card title="Sleep" icon="clock" href="/concepts/sleep">
261
- Learn how workflows suspend and resume with sleep.
262
- </Card>
263
- <Card title="Deployment" icon="rocket" href="/guides/deployment">
264
- Configure storage backends for production.
265
- </Card>
266
- </CardGroup>