pyworkflow-engine 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. pyworkflow_engine-0.1.7/CLAUDE.md +638 -0
  2. pyworkflow_engine-0.1.7/DISTRIBUTED.md +360 -0
  3. pyworkflow_engine-0.1.7/LICENSE +21 -0
  4. pyworkflow_engine-0.1.7/MANIFEST.in +42 -0
  5. pyworkflow_engine-0.1.7/PKG-INFO +687 -0
  6. pyworkflow_engine-0.1.7/README.md +614 -0
  7. pyworkflow_engine-0.1.7/RELEASING.md +358 -0
  8. pyworkflow_engine-0.1.7/dashboard/backend/app/__init__.py +1 -0
  9. pyworkflow_engine-0.1.7/dashboard/backend/app/config.py +32 -0
  10. pyworkflow_engine-0.1.7/dashboard/backend/app/controllers/__init__.py +6 -0
  11. pyworkflow_engine-0.1.7/dashboard/backend/app/controllers/run_controller.py +86 -0
  12. pyworkflow_engine-0.1.7/dashboard/backend/app/controllers/workflow_controller.py +33 -0
  13. pyworkflow_engine-0.1.7/dashboard/backend/app/dependencies/__init__.py +5 -0
  14. pyworkflow_engine-0.1.7/dashboard/backend/app/dependencies/storage.py +50 -0
  15. pyworkflow_engine-0.1.7/dashboard/backend/app/repositories/__init__.py +6 -0
  16. pyworkflow_engine-0.1.7/dashboard/backend/app/repositories/run_repository.py +80 -0
  17. pyworkflow_engine-0.1.7/dashboard/backend/app/repositories/workflow_repository.py +27 -0
  18. pyworkflow_engine-0.1.7/dashboard/backend/app/rest/__init__.py +8 -0
  19. pyworkflow_engine-0.1.7/dashboard/backend/app/rest/v1/__init__.py +12 -0
  20. pyworkflow_engine-0.1.7/dashboard/backend/app/rest/v1/health.py +33 -0
  21. pyworkflow_engine-0.1.7/dashboard/backend/app/rest/v1/runs.py +133 -0
  22. pyworkflow_engine-0.1.7/dashboard/backend/app/rest/v1/workflows.py +41 -0
  23. pyworkflow_engine-0.1.7/dashboard/backend/app/schemas/__init__.py +23 -0
  24. pyworkflow_engine-0.1.7/dashboard/backend/app/schemas/common.py +16 -0
  25. pyworkflow_engine-0.1.7/dashboard/backend/app/schemas/event.py +24 -0
  26. pyworkflow_engine-0.1.7/dashboard/backend/app/schemas/hook.py +25 -0
  27. pyworkflow_engine-0.1.7/dashboard/backend/app/schemas/run.py +54 -0
  28. pyworkflow_engine-0.1.7/dashboard/backend/app/schemas/step.py +28 -0
  29. pyworkflow_engine-0.1.7/dashboard/backend/app/schemas/workflow.py +31 -0
  30. pyworkflow_engine-0.1.7/dashboard/backend/app/server.py +87 -0
  31. pyworkflow_engine-0.1.7/dashboard/backend/app/services/__init__.py +6 -0
  32. pyworkflow_engine-0.1.7/dashboard/backend/app/services/run_service.py +240 -0
  33. pyworkflow_engine-0.1.7/dashboard/backend/app/services/workflow_service.py +155 -0
  34. pyworkflow_engine-0.1.7/dashboard/backend/main.py +18 -0
  35. pyworkflow_engine-0.1.7/docs/concepts/cancellation.mdx +362 -0
  36. pyworkflow_engine-0.1.7/docs/concepts/continue-as-new.mdx +434 -0
  37. pyworkflow_engine-0.1.7/docs/concepts/events.mdx +266 -0
  38. pyworkflow_engine-0.1.7/docs/concepts/fault-tolerance.mdx +370 -0
  39. pyworkflow_engine-0.1.7/docs/concepts/hooks.mdx +552 -0
  40. pyworkflow_engine-0.1.7/docs/concepts/limitations.mdx +167 -0
  41. pyworkflow_engine-0.1.7/docs/concepts/schedules.mdx +775 -0
  42. pyworkflow_engine-0.1.7/docs/concepts/sleep.mdx +312 -0
  43. pyworkflow_engine-0.1.7/docs/concepts/steps.mdx +301 -0
  44. pyworkflow_engine-0.1.7/docs/concepts/workflows.mdx +255 -0
  45. pyworkflow_engine-0.1.7/docs/guides/cli.mdx +942 -0
  46. pyworkflow_engine-0.1.7/docs/guides/configuration.mdx +560 -0
  47. pyworkflow_engine-0.1.7/docs/introduction.mdx +155 -0
  48. pyworkflow_engine-0.1.7/docs/quickstart.mdx +279 -0
  49. pyworkflow_engine-0.1.7/examples/__init__.py +1 -0
  50. pyworkflow_engine-0.1.7/examples/celery/__init__.py +1 -0
  51. pyworkflow_engine-0.1.7/examples/celery/durable/docker-compose.yml +55 -0
  52. pyworkflow_engine-0.1.7/examples/celery/durable/pyworkflow.config.yaml +12 -0
  53. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/__init__.py +122 -0
  54. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/basic.py +87 -0
  55. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/batch_processing.py +102 -0
  56. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/cancellation.py +273 -0
  57. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/child_workflow_patterns.py +240 -0
  58. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/child_workflows.py +202 -0
  59. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/continue_as_new.py +260 -0
  60. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/fault_tolerance.py +210 -0
  61. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/hooks.py +211 -0
  62. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/idempotency.py +112 -0
  63. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/long_running.py +99 -0
  64. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/retries.py +101 -0
  65. pyworkflow_engine-0.1.7/examples/celery/durable/workflows/schedules.py +209 -0
  66. pyworkflow_engine-0.1.7/examples/celery/transient/01_basic_workflow.py +91 -0
  67. pyworkflow_engine-0.1.7/examples/celery/transient/02_fault_tolerance.py +257 -0
  68. pyworkflow_engine-0.1.7/examples/celery/transient/__init__.py +20 -0
  69. pyworkflow_engine-0.1.7/examples/celery/transient/pyworkflow.config.yaml +25 -0
  70. pyworkflow_engine-0.1.7/examples/local/__init__.py +1 -0
  71. pyworkflow_engine-0.1.7/examples/local/durable/01_basic_workflow.py +94 -0
  72. pyworkflow_engine-0.1.7/examples/local/durable/02_file_storage.py +132 -0
  73. pyworkflow_engine-0.1.7/examples/local/durable/03_retries.py +169 -0
  74. pyworkflow_engine-0.1.7/examples/local/durable/04_long_running.py +119 -0
  75. pyworkflow_engine-0.1.7/examples/local/durable/05_event_log.py +145 -0
  76. pyworkflow_engine-0.1.7/examples/local/durable/06_idempotency.py +148 -0
  77. pyworkflow_engine-0.1.7/examples/local/durable/07_hooks.py +334 -0
  78. pyworkflow_engine-0.1.7/examples/local/durable/08_cancellation.py +233 -0
  79. pyworkflow_engine-0.1.7/examples/local/durable/09_child_workflows.py +198 -0
  80. pyworkflow_engine-0.1.7/examples/local/durable/10_child_workflow_patterns.py +265 -0
  81. pyworkflow_engine-0.1.7/examples/local/durable/11_continue_as_new.py +249 -0
  82. pyworkflow_engine-0.1.7/examples/local/durable/12_schedules.py +198 -0
  83. pyworkflow_engine-0.1.7/examples/local/durable/__init__.py +1 -0
  84. pyworkflow_engine-0.1.7/examples/local/transient/01_quick_tasks.py +87 -0
  85. pyworkflow_engine-0.1.7/examples/local/transient/02_retries.py +130 -0
  86. pyworkflow_engine-0.1.7/examples/local/transient/03_sleep.py +141 -0
  87. pyworkflow_engine-0.1.7/examples/local/transient/__init__.py +1 -0
  88. pyworkflow_engine-0.1.7/pyproject.toml +255 -0
  89. pyworkflow_engine-0.1.7/pyworkflow/__init__.py +256 -0
  90. pyworkflow_engine-0.1.7/pyworkflow/aws/__init__.py +68 -0
  91. pyworkflow_engine-0.1.7/pyworkflow/aws/context.py +234 -0
  92. pyworkflow_engine-0.1.7/pyworkflow/aws/handler.py +184 -0
  93. pyworkflow_engine-0.1.7/pyworkflow/aws/testing.py +310 -0
  94. pyworkflow_engine-0.1.7/pyworkflow/celery/__init__.py +41 -0
  95. pyworkflow_engine-0.1.7/pyworkflow/celery/app.py +198 -0
  96. pyworkflow_engine-0.1.7/pyworkflow/celery/scheduler.py +315 -0
  97. pyworkflow_engine-0.1.7/pyworkflow/celery/tasks.py +1746 -0
  98. pyworkflow_engine-0.1.7/pyworkflow/cli/__init__.py +132 -0
  99. pyworkflow_engine-0.1.7/pyworkflow/cli/__main__.py +6 -0
  100. pyworkflow_engine-0.1.7/pyworkflow/cli/commands/__init__.py +1 -0
  101. pyworkflow_engine-0.1.7/pyworkflow/cli/commands/hooks.py +640 -0
  102. pyworkflow_engine-0.1.7/pyworkflow/cli/commands/quickstart.py +495 -0
  103. pyworkflow_engine-0.1.7/pyworkflow/cli/commands/runs.py +773 -0
  104. pyworkflow_engine-0.1.7/pyworkflow/cli/commands/scheduler.py +130 -0
  105. pyworkflow_engine-0.1.7/pyworkflow/cli/commands/schedules.py +794 -0
  106. pyworkflow_engine-0.1.7/pyworkflow/cli/commands/setup.py +703 -0
  107. pyworkflow_engine-0.1.7/pyworkflow/cli/commands/worker.py +413 -0
  108. pyworkflow_engine-0.1.7/pyworkflow/cli/commands/workflows.py +1257 -0
  109. pyworkflow_engine-0.1.7/pyworkflow/cli/output/__init__.py +1 -0
  110. pyworkflow_engine-0.1.7/pyworkflow/cli/output/formatters.py +321 -0
  111. pyworkflow_engine-0.1.7/pyworkflow/cli/output/styles.py +121 -0
  112. pyworkflow_engine-0.1.7/pyworkflow/cli/utils/__init__.py +1 -0
  113. pyworkflow_engine-0.1.7/pyworkflow/cli/utils/async_helpers.py +30 -0
  114. pyworkflow_engine-0.1.7/pyworkflow/cli/utils/config.py +130 -0
  115. pyworkflow_engine-0.1.7/pyworkflow/cli/utils/config_generator.py +344 -0
  116. pyworkflow_engine-0.1.7/pyworkflow/cli/utils/discovery.py +53 -0
  117. pyworkflow_engine-0.1.7/pyworkflow/cli/utils/docker_manager.py +651 -0
  118. pyworkflow_engine-0.1.7/pyworkflow/cli/utils/interactive.py +364 -0
  119. pyworkflow_engine-0.1.7/pyworkflow/cli/utils/storage.py +115 -0
  120. pyworkflow_engine-0.1.7/pyworkflow/config.py +329 -0
  121. pyworkflow_engine-0.1.7/pyworkflow/context/__init__.py +63 -0
  122. pyworkflow_engine-0.1.7/pyworkflow/context/aws.py +230 -0
  123. pyworkflow_engine-0.1.7/pyworkflow/context/base.py +416 -0
  124. pyworkflow_engine-0.1.7/pyworkflow/context/local.py +930 -0
  125. pyworkflow_engine-0.1.7/pyworkflow/context/mock.py +381 -0
  126. pyworkflow_engine-0.1.7/pyworkflow/core/__init__.py +0 -0
  127. pyworkflow_engine-0.1.7/pyworkflow/core/exceptions.py +353 -0
  128. pyworkflow_engine-0.1.7/pyworkflow/core/registry.py +313 -0
  129. pyworkflow_engine-0.1.7/pyworkflow/core/scheduled.py +328 -0
  130. pyworkflow_engine-0.1.7/pyworkflow/core/step.py +494 -0
  131. pyworkflow_engine-0.1.7/pyworkflow/core/workflow.py +294 -0
  132. pyworkflow_engine-0.1.7/pyworkflow/discovery.py +248 -0
  133. pyworkflow_engine-0.1.7/pyworkflow/engine/__init__.py +0 -0
  134. pyworkflow_engine-0.1.7/pyworkflow/engine/events.py +879 -0
  135. pyworkflow_engine-0.1.7/pyworkflow/engine/executor.py +682 -0
  136. pyworkflow_engine-0.1.7/pyworkflow/engine/replay.py +273 -0
  137. pyworkflow_engine-0.1.7/pyworkflow/observability/__init__.py +19 -0
  138. pyworkflow_engine-0.1.7/pyworkflow/observability/logging.py +234 -0
  139. pyworkflow_engine-0.1.7/pyworkflow/primitives/__init__.py +33 -0
  140. pyworkflow_engine-0.1.7/pyworkflow/primitives/child_handle.py +174 -0
  141. pyworkflow_engine-0.1.7/pyworkflow/primitives/child_workflow.py +372 -0
  142. pyworkflow_engine-0.1.7/pyworkflow/primitives/continue_as_new.py +101 -0
  143. pyworkflow_engine-0.1.7/pyworkflow/primitives/define_hook.py +150 -0
  144. pyworkflow_engine-0.1.7/pyworkflow/primitives/hooks.py +97 -0
  145. pyworkflow_engine-0.1.7/pyworkflow/primitives/resume_hook.py +210 -0
  146. pyworkflow_engine-0.1.7/pyworkflow/primitives/schedule.py +545 -0
  147. pyworkflow_engine-0.1.7/pyworkflow/primitives/shield.py +96 -0
  148. pyworkflow_engine-0.1.7/pyworkflow/primitives/sleep.py +100 -0
  149. pyworkflow_engine-0.1.7/pyworkflow/runtime/__init__.py +21 -0
  150. pyworkflow_engine-0.1.7/pyworkflow/runtime/base.py +179 -0
  151. pyworkflow_engine-0.1.7/pyworkflow/runtime/celery.py +310 -0
  152. pyworkflow_engine-0.1.7/pyworkflow/runtime/factory.py +101 -0
  153. pyworkflow_engine-0.1.7/pyworkflow/runtime/local.py +706 -0
  154. pyworkflow_engine-0.1.7/pyworkflow/scheduler/__init__.py +9 -0
  155. pyworkflow_engine-0.1.7/pyworkflow/scheduler/local.py +248 -0
  156. pyworkflow_engine-0.1.7/pyworkflow/serialization/__init__.py +0 -0
  157. pyworkflow_engine-0.1.7/pyworkflow/serialization/decoder.py +146 -0
  158. pyworkflow_engine-0.1.7/pyworkflow/serialization/encoder.py +162 -0
  159. pyworkflow_engine-0.1.7/pyworkflow/storage/__init__.py +54 -0
  160. pyworkflow_engine-0.1.7/pyworkflow/storage/base.py +612 -0
  161. pyworkflow_engine-0.1.7/pyworkflow/storage/config.py +185 -0
  162. pyworkflow_engine-0.1.7/pyworkflow/storage/dynamodb.py +1315 -0
  163. pyworkflow_engine-0.1.7/pyworkflow/storage/file.py +827 -0
  164. pyworkflow_engine-0.1.7/pyworkflow/storage/memory.py +549 -0
  165. pyworkflow_engine-0.1.7/pyworkflow/storage/postgres.py +1161 -0
  166. pyworkflow_engine-0.1.7/pyworkflow/storage/schemas.py +486 -0
  167. pyworkflow_engine-0.1.7/pyworkflow/storage/sqlite.py +1136 -0
  168. pyworkflow_engine-0.1.7/pyworkflow/utils/__init__.py +0 -0
  169. pyworkflow_engine-0.1.7/pyworkflow/utils/duration.py +177 -0
  170. pyworkflow_engine-0.1.7/pyworkflow/utils/schedule.py +391 -0
  171. pyworkflow_engine-0.1.7/pyworkflow_engine.egg-info/SOURCES.txt +197 -0
  172. pyworkflow_engine-0.1.7/setup.cfg +4 -0
  173. pyworkflow_engine-0.1.7/tests/examples/__init__.py +0 -0
  174. pyworkflow_engine-0.1.7/tests/integration/__init__.py +0 -0
  175. pyworkflow_engine-0.1.7/tests/integration/test_cancellation.py +330 -0
  176. pyworkflow_engine-0.1.7/tests/integration/test_child_workflows.py +439 -0
  177. pyworkflow_engine-0.1.7/tests/integration/test_continue_as_new.py +428 -0
  178. pyworkflow_engine-0.1.7/tests/integration/test_dynamodb_storage.py +1146 -0
  179. pyworkflow_engine-0.1.7/tests/integration/test_fault_tolerance.py +369 -0
  180. pyworkflow_engine-0.1.7/tests/integration/test_schedule_storage.py +484 -0
  181. pyworkflow_engine-0.1.7/tests/unit/__init__.py +0 -0
  182. pyworkflow_engine-0.1.7/tests/unit/backends/__init__.py +1 -0
  183. pyworkflow_engine-0.1.7/tests/unit/backends/test_dynamodb_storage.py +1554 -0
  184. pyworkflow_engine-0.1.7/tests/unit/backends/test_postgres_storage.py +1281 -0
  185. pyworkflow_engine-0.1.7/tests/unit/backends/test_sqlite_storage.py +1460 -0
  186. pyworkflow_engine-0.1.7/tests/unit/conftest.py +41 -0
  187. pyworkflow_engine-0.1.7/tests/unit/test_cancellation.py +364 -0
  188. pyworkflow_engine-0.1.7/tests/unit/test_child_workflows.py +680 -0
  189. pyworkflow_engine-0.1.7/tests/unit/test_continue_as_new.py +441 -0
  190. pyworkflow_engine-0.1.7/tests/unit/test_event_limits.py +316 -0
  191. pyworkflow_engine-0.1.7/tests/unit/test_executor.py +320 -0
  192. pyworkflow_engine-0.1.7/tests/unit/test_fault_tolerance.py +334 -0
  193. pyworkflow_engine-0.1.7/tests/unit/test_hooks.py +495 -0
  194. pyworkflow_engine-0.1.7/tests/unit/test_registry.py +261 -0
  195. pyworkflow_engine-0.1.7/tests/unit/test_replay.py +420 -0
  196. pyworkflow_engine-0.1.7/tests/unit/test_schedule_schemas.py +285 -0
  197. pyworkflow_engine-0.1.7/tests/unit/test_schedule_utils.py +286 -0
  198. pyworkflow_engine-0.1.7/tests/unit/test_scheduled_workflow.py +274 -0
  199. pyworkflow_engine-0.1.7/tests/unit/test_step.py +353 -0
  200. pyworkflow_engine-0.1.7/tests/unit/test_workflow.py +243 -0
@@ -0,0 +1,638 @@
1
+ # PyWorkflow - Claude Code Assistant Guide
2
+
3
+ This guide helps Claude Code (or other AI assistants) understand and effectively work with the PyWorkflow codebase.
4
+
5
+ ## Project Overview
6
+
7
+ PyWorkflow is a Python implementation of durable, event-sourced workflow orchestration inspired by Vercel's Workflow Development Kit. It enables developers to build fault-tolerant, long-running workflows with automatic retry, sleep/delay capabilities, and webhook integration.
8
+
9
+ ### Core Concepts
10
+
11
+ 1. **Workflows**: Orchestration functions that coordinate steps (decorated with `@workflow` or inheriting from `Workflow` class)
12
+ 2. **Steps**: Isolated, retryable units of work (decorated with `@step` or inheriting from `Step` class)
13
+ 3. **Event Sourcing**: All state changes recorded as events for deterministic replay
14
+ 4. **Suspension/Resumption**: Workflows can pause (sleep, webhooks) and resume without holding resources
15
+ 5. **Dual API**: Both functional (decorators) and OOP (classes) interfaces
16
+
17
+ ## Architecture
18
+
19
+ ### High-Level Flow
20
+
21
+ ```
22
+ User Code (Workflow + Steps)
23
+ ↓
24
+ Decorators / Base Classes
25
+ ↓
26
+ Execution Context + Event Log
27
+ ↓
28
+ Celery Tasks (Distributed Execution)
29
+ ↓
30
+ Storage Backend (File/Redis/SQLite/PostgreSQL)
31
+ ```
32
+
33
+ ### Event Sourcing Model
34
+
35
+ ```
36
+ Workflow Execution:
37
+ 1. Create WorkflowRun record
38
+ 2. Record workflow_started event
39
+ 3. Execute workflow function
40
+ 4. When step encountered:
41
+ - Check if step_completed event exists (replay mode)
42
+ - If yes: return cached result
43
+ - If no: execute step, record step_completed event
44
+ 5. When sleep() encountered:
45
+ - Record sleep_started event
46
+ - Raise SuspensionSignal
47
+ - Schedule Celery task for resumption
48
+ 6. When hook encountered:
49
+ - Record hook_created event
50
+ - Raise SuspensionSignal
51
+ - Wait for webhook to trigger resumption
52
+ 7. On resumption:
53
+ - Replay all events
54
+ - Fast-forward to suspension point
55
+ - Continue execution
56
+ ```
57
+
58
+ ## Project Structure
59
+
60
+ ```
61
+ pyworkflow/
62
+ ├── pyworkflow/ # Main package
63
+ │ ├── __init__.py # Public API exports
64
+ │ ├── core/ # Core decorators and classes
65
+ │ │ ├── workflow.py # @workflow decorator
66
+ │ │ ├── workflow_base.py # Workflow base class (OOP)
67
+ │ │ ├── step.py # @step decorator
68
+ │ │ ├── step_base.py # Step base class (OOP)
69
+ │ │ ├── context.py # WorkflowContext, execution state
70
+ │ │ ├── registry.py # Workflow/step registration
71
+ │ │ └── exceptions.py # Error classes
72
+ │ ├── engine/ # Execution engine
73
+ │ │ ├── executor.py # Main execution logic
74
+ │ │ ├── events.py # Event types and schemas
75
+ │ │ ├── replay.py # Event replay mechanism
76
+ │ │ └── state.py # State machine
77
+ │ ├── celery/ # Celery integration
78
+ │ │ ├── tasks.py # Task definitions
79
+ │ │ ├── config.py # Configuration
80
+ │ │ └── integration.py # Workflow-Celery bridge
81
+ │ ├── storage/ # Storage backends
82
+ │ │ ├── base.py # StorageBackend ABC
83
+ │ │ ├── schemas.py # Data models
84
+ │ │ ├── file.py # File backend
85
+ │ │ ├── redis.py # Redis backend
86
+ │ │ ├── sqlite.py # SQLite backend
87
+ │ │ └── postgres.py # PostgreSQL backend
88
+ │ ├── primitives/ # Workflow primitives
89
+ │ │ ├── sleep.py # sleep() function
90
+ │ │ ├── hooks.py # Hooks/webhooks
91
+ │ │ ├── parallel.py # Parallel execution helper
92
+ │ │ └── retry.py # Retry strategies
93
+ │ ├── serialization/ # Serialization layer
94
+ │ │ ├── encoder.py # Encoding complex types
95
+ │ │ └── decoder.py # Decoding complex types
96
+ │ ├── observability/ # Logging and metrics
97
+ │ │ ├── logging.py # Loguru integration
98
+ │ │ └── metrics.py # Metrics collection
99
+ │ └── utils/ # Utilities
100
+ │ ├── duration.py # Duration parsing ("5s", "2m", etc.)
101
+ │ └── helpers.py # General utilities
102
+ ├── tests/ # Test suite
103
+ ├── examples/ # Example workflows
104
+ └── docs/ # Documentation
105
+ ```
106
+
107
+ ## Key Design Patterns
108
+
109
+ ### 1. Decorator Pattern (Functional API)
110
+
111
+ ```python
112
+ @workflow
113
+ async def my_workflow(arg: str):
114
+ result = await my_step(arg)
115
+ return result
116
+
117
+ @step(max_retries=3)
118
+ async def my_step(arg: str):
119
+ return f"processed: {arg}"
120
+ ```
121
+
122
+ **How it works:**
123
+ - `@workflow` wraps the function, registers it, and adds execution context
124
+ - `@step` wraps the function, adds retry logic, and integrates with Celery
125
+
126
+ ### 2. Base Class Pattern (OOP API)
127
+
128
+ ```python
129
+ class MyWorkflow(Workflow):
130
+ async def run(self, arg: str):
131
+ result = await MyStep()(arg)
132
+ return result
133
+
134
+ class MyStep(Step):
135
+ max_retries = 3
136
+
137
+ async def execute(self, arg: str):
138
+ return f"processed: {arg}"
139
+ ```
140
+
141
+ **How it works:**
142
+ - `Workflow.run()` is the entry point (abstract method)
143
+ - `Step.execute()` contains business logic (abstract method)
144
+ - `Step.__call__()` applies the `@step` decorator internally
145
+
146
+ ### 3. Context Pattern
147
+
148
+ ```python
149
+ from pyworkflow.context import get_context
150
+
151
+ def some_function():
152
+ ctx = get_context()
153
+ print(ctx.run_id, ctx.workflow_name)
154
+ ```
155
+
156
+ **How it works:**
157
+ - Context stored in `contextvars.ContextVar`
158
+ - Accessible from any function in the call stack
159
+ - Contains run_id, workflow_name, event_log, step_results, etc.
160
+
161
+ ### 4. Suspension Pattern
162
+
163
+ ```python
164
+ async def sleep(duration):
165
+ # Record event
166
+ await ctx.storage.record_event(Event(type=EventType.SLEEP_STARTED, ...))
167
+
168
+ # Raise signal
169
+ raise SuspensionSignal(reason="sleep", wake_time=...)
170
+
171
+ # Workflow executor catches SuspensionSignal
172
+ try:
173
+ result = await workflow_func(*args)
174
+ except SuspensionSignal as e:
175
+ # Schedule resumption
176
+ schedule_resumption(e)
177
+ ```
178
+
179
+ ### 5. Event Replay Pattern
180
+
181
+ ```python
182
+ async def replay_events(ctx, events):
183
+ for event in sorted(events, key=lambda e: e.sequence):
184
+ if event.type == EventType.STEP_COMPLETED:
185
+ ctx.step_results[event.data["step_id"]] = event.data["result"]
186
+ elif event.type == EventType.HOOK_RECEIVED:
187
+ ctx.hook_results[event.data["hook_id"]] = event.data["payload"]
188
+ ```
189
+
190
+ ## Common Development Tasks
191
+
192
+ ### Adding a New Event Type
193
+
194
+ 1. Add to `EventType` enum in `pyworkflow/engine/events.py`
195
+ 2. Update `EventReplayer._apply_event()` in `pyworkflow/engine/replay.py`
196
+ 3. Record event in relevant code (workflow.py, step.py, primitives/)
197
+ 4. Add test in `tests/unit/test_events.py`
198
+
199
+ ### Adding a New Storage Backend
200
+
201
+ 1. Create `pyworkflow/storage/your_backend.py`
202
+ 2. Inherit from `StorageBackend` in `storage/base.py`
203
+ 3. Implement all abstract methods:
204
+ - `create_run()`, `get_run()`, `update_run_status()`
205
+ - `record_event()`, `get_events()`
206
+ - `create_hook()`, `get_hook()`, `update_hook_payload()`
207
+ 4. Add backend to `__init__.py` exports
208
+ 5. Add tests in `tests/integration/test_storage_backends.py`
209
+
210
+ ### Adding a New Primitive
211
+
212
+ 1. Create `pyworkflow/primitives/your_primitive.py`
213
+ 2. Implement the function/class
214
+ 3. Handle suspension if needed (raise `SuspensionSignal`)
215
+ 4. Record appropriate events
216
+ 5. Add to `pyworkflow/__init__.py` exports
217
+ 6. Add examples in `examples/`
218
+ 7. Add tests in `tests/unit/` and `tests/integration/`
219
+
220
+ ### Debugging Workflows
221
+
222
+ **1. Enable Debug Logging:**
223
+ ```python
224
+ from pyworkflow import configure_logging
225
+ configure_logging(level="DEBUG")
226
+ ```
227
+
228
+ **2. Inspect Event Log:**
229
+ ```python
230
+ from pyworkflow import get_workflow_run
231
+ run = await storage.get_run(run_id)
232
+ events = await storage.get_events(run_id)
233
+ for event in events:
234
+ print(f"{event.sequence}: {event.type} - {event.data}")
235
+ ```
236
+
237
+ **3. Check Workflow Status:**
238
+ ```python
239
+ run = await storage.get_run(run_id)
240
+ print(f"Status: {run.status}")
241
+ print(f"Error: {run.error}")
242
+ ```
243
+
244
+ **4. Test Event Replay:**
245
+ ```python
246
+ # Manually trigger replay
247
+ from pyworkflow.engine.replay import EventReplayer
248
+ replayer = EventReplayer()
249
+ await replayer.replay(ctx, events)
250
+ ```
251
+
252
+ ## Important Implementation Notes
253
+
254
+ ### Serialization
255
+
256
+ **Supported Types:**
257
+ - Primitives: int, str, bool, float, None
258
+ - Collections: list, dict, tuple, set
259
+ - Dates: datetime, date, timedelta
260
+ - Special: Decimal, Enum, Exception, bytes
261
+ - Complex: Any object (via cloudpickle)
262
+
263
+ **Implementation:**
264
+ - Simple types → JSON (human-readable)
265
+ - Complex types → cloudpickle → base64 (fallback)
266
+ - Custom encoders in `serialization/encoder.py`
267
+ - Custom decoders in `serialization/decoder.py`
268
+
269
+ ### Error Handling
270
+
271
+ **Error Hierarchy:**
272
+ ```
273
+ WorkflowError (base)
274
+ ├── FatalError (don't retry)
275
+ └── RetryableError (auto-retry)
276
+ └── retry_after: delay before retry
277
+ ```
278
+
279
+ **Usage:**
280
+ ```python
281
+ # Don't retry
282
+ raise FatalError("Invalid input")
283
+
284
+ # Retry with default delay
285
+ raise RetryableError("Temporary failure")
286
+
287
+ # Retry with specific delay
288
+ raise RetryableError("Rate limited", retry_after="60s")
289
+ ```
290
+
291
+ ### Auto Recovery and Fault Tolerance
292
+
293
+ PyWorkflow automatically recovers workflows from worker crashes using event replay.
294
+
295
+ **Worker Loss Detection:**
296
+ - Celery detects worker loss via `WorkerLostError`
297
+ - Task is requeued; new worker detects `RUNNING` or `INTERRUPTED` status
298
+ - Recovery is triggered if `recover_on_worker_loss=True`
299
+
300
+ **Recovery Flow:**
301
+ 1. Detect workflow in `RUNNING`/`INTERRUPTED` status on task start
302
+ 2. Check `recovery_attempts < max_recovery_attempts`
303
+ 3. Record `WORKFLOW_INTERRUPTED` event
304
+ 4. Complete any pending sleeps (mark `SLEEP_COMPLETED`)
305
+ 5. Replay event log to restore cached step results
306
+ 6. Continue execution from the last checkpoint
307
+
308
+ **Key Functions (`pyworkflow/celery/tasks.py`):**
309
+ - `_handle_workflow_recovery()` - Authorize and initiate recovery (lines 239-318)
310
+ - `_recover_workflow_on_worker()` - Execute recovery with event replay (lines 321-428)
311
+ - `_complete_pending_sleeps()` - Mark pending sleeps as completed (lines 684-731)
312
+
313
+ **Configuration Priority:**
314
+ 1. `@workflow()` decorator parameters (highest)
315
+ 2. `pyworkflow.configure()` global settings
316
+ 3. Defaults: `True` for durable workflows, `False` for transient
317
+
318
+ **Configuration:**
319
+ ```python
320
+ # Per-workflow
321
+ @workflow(
322
+ recover_on_worker_loss=True, # Enable recovery
323
+ max_recovery_attempts=5, # Max attempts before failure
324
+ )
325
+ async def my_workflow():
326
+ pass
327
+
328
+ # Global default
329
+ pyworkflow.configure(
330
+ default_recover_on_worker_loss=True,
331
+ default_max_recovery_attempts=3,
332
+ )
333
+ ```
334
+
335
+ **Durable vs Transient Recovery:**
336
+ - **Durable**: Events are replayed, execution resumes from last checkpoint
337
+ - **Transient**: No events recorded, workflow restarts from the beginning
338
+
339
+ **New Event Type:**
340
+ - `WORKFLOW_INTERRUPTED` - Recorded when worker crash is detected
341
+
342
+ **New Run Status:**
343
+ - `INTERRUPTED` - Workflow awaiting recovery after worker crash
344
+
345
+ ### Cancellation
346
+
347
+ PyWorkflow supports graceful workflow cancellation via `cancel_workflow()`.
348
+
349
+ **Key APIs:**
350
+ - `cancel_workflow(run_id, reason=None, wait=False)` - Request workflow cancellation
351
+ - `CancellationError` - Raised when workflow/step is cancelled
352
+ - `shield()` - Context manager to protect critical sections from cancellation
353
+
354
+ **Cancellation Check Points:**
355
+ Cancellation is checked at these points:
356
+ - Before each step execution
357
+ - Before sleep suspension
358
+ - Before hook suspension
359
+
360
+ **Important Limitation:**
361
+ Cancellation does NOT interrupt a step that is already executing. If a step function takes a long time (e.g., a 10-minute API call), cancellation will only be detected after the step completes. This is by design to avoid leaving operations in an inconsistent state.
362
+
363
+ For long-running operations that need to be cancellable mid-execution, the step should periodically call `ctx.check_cancellation()` to cooperatively check for cancellation:
364
+
365
+ ```python
366
+ @step()
367
+ async def long_running_step():
368
+ ctx = get_context()
369
+ for chunk in process_large_dataset():
370
+ ctx.check_cancellation() # Cooperative cancellation check
371
+ await process_chunk(chunk)
372
+ return result
373
+ ```
374
+
375
+ **Shield for Critical Sections:**
376
+ Use `shield()` to protect cleanup or critical code from cancellation:
377
+
378
+ ```python
379
+ async with shield():
380
+ # This code will complete even if cancellation is requested
381
+ await cleanup_resources()
382
+ ```
383
+
384
+ ### Celery Integration
385
+
386
+ **Two Queue System:**
387
+ - `workflows` queue: Orchestration (lightweight)
388
+ - `steps` queue: Actual work (heavy)
389
+
390
+ **Task Routing:**
391
+ ```python
392
+ task_routes = {
393
+ 'execute_workflow_task': {'queue': 'workflows'},
394
+ 'execute_step_task': {'queue': 'steps'},
395
+ }
396
+ ```
397
+
398
+ **Starting Workers:**
399
+ ```bash
400
+ # Workflow worker
401
+ celery -A pyworkflow.celery.tasks worker -Q workflows -n workflow@%h
402
+
403
+ # Step worker (scalable)
404
+ celery -A pyworkflow.celery.tasks worker -Q steps -n step@%h --concurrency=4
405
+ ```
406
+
407
+ ### Logging with Loguru
408
+
409
+ **Context-Aware Logging:**
410
+ ```python
411
+ from pyworkflow.observability.logging import get_logger
412
+
413
+ logger = get_logger()
414
+ logger.info("Processing order", order_id=order_id, amount=99.99)
415
+ # Output: 2025-01-15 10:30:45 | INFO | run_abc123 | process_order | Processing order
416
+ ```
417
+
418
+ **Configuration:**
419
+ ```python
420
+ from pyworkflow import configure_logging
421
+
422
+ # JSON output for production
423
+ configure_logging(level="INFO", serialize=True)
424
+
425
+ # Pretty output for development
426
+ configure_logging(level="DEBUG", serialize=False)
427
+ ```
428
+
429
+ ## Testing Strategy
430
+
431
+ ### Unit Tests
432
+ - Test individual components in isolation
433
+ - Mock dependencies (storage, Celery)
434
+ - Fast execution (<1s total)
435
+
436
+ **Example:**
437
+ ```python
438
+ def test_event_creation():
439
+ event = Event(
440
+ run_id="test_run",
441
+ type=EventType.STEP_COMPLETED,
442
+ timestamp=datetime.utcnow(),
443
+ data={"step_id": "step_1", "result": 42}
444
+ )
445
+ assert event.type == EventType.STEP_COMPLETED
446
+ assert event.data["result"] == 42
447
+ ```
448
+
449
+ ### Integration Tests
450
+ - Test components working together
451
+ - Use real storage backends (in-memory, temporary files)
452
+ - Test end-to-end workflows
453
+
454
+ **Example:**
455
+ ```python
456
+ @pytest.mark.asyncio
457
+ async def test_workflow_execution():
458
+ @workflow
459
+ async def test_wf():
460
+ return await test_step()
461
+
462
+ @step
463
+ async def test_step():
464
+ return 42
465
+
466
+ run_id = await start(test_wf)
467
+ # Wait for completion
468
+ await asyncio.sleep(2)
469
+
470
+ run = await storage.get_run(run_id)
471
+ assert run.status == RunStatus.COMPLETED
472
+ ```
473
+
474
+ ### Example Tests
475
+ - Full workflow scenarios
476
+ - Test retry behavior
477
+ - Test sleep and webhooks
478
+ - Test error handling
479
+
480
+ ## Code Style Guidelines
481
+
482
+ ### Type Hints
483
+ Always use type hints:
484
+ ```python
485
+ async def process_order(order_id: str) -> Dict[str, Any]:
486
+ order: Order = await get_order(order_id)
487
+ return order.to_dict()
488
+ ```
489
+
490
+ ### Async/Await
491
+ Prefer async/await for all I/O operations:
492
+ ```python
493
+ # Good
494
+ async def fetch_data():
495
+ return await httpx.get(url)
496
+
497
+ # Avoid
498
+ def fetch_data():
499
+ return requests.get(url) # Blocking
500
+ ```
501
+
502
+ ### Error Messages
503
+ Provide clear, actionable error messages:
504
+ ```python
505
+ # Good
506
+ raise ValueError(f"Order {order_id} not found. Please check the order ID.")
507
+
508
+ # Avoid
509
+ raise ValueError("Not found")
510
+ ```
511
+
512
+ ### Documentation
513
+ Add docstrings to public APIs:
514
+ ```python
515
+ async def sleep(duration: Union[str, int, timedelta]):
516
+ """
517
+ Pause workflow execution for specified duration.
518
+
519
+ Args:
520
+ duration: Sleep duration as string ("5s", "2m"), int (seconds),
521
+ or timedelta object
522
+
523
+ Examples:
524
+ await sleep("5m") # 5 minutes
525
+ await sleep(300) # 300 seconds
526
+
527
+ Raises:
528
+ ValueError: If duration format is invalid
529
+ """
530
+ ```
531
+
532
+ ## Common Pitfalls
533
+
534
+ ### 1. Forgetting to Record Events
535
+ **Wrong:**
536
+ ```python
537
+ async def sleep(duration):
538
+ await asyncio.sleep(duration) # Loses state!
539
+ ```
540
+
541
+ **Right:**
542
+ ```python
543
+ async def sleep(duration):
544
+ await ctx.storage.record_event(Event(...))
545
+ raise SuspensionSignal(...) # Proper suspension
546
+ ```
547
+
548
+ ### 2. Not Using Context
549
+ **Wrong:**
550
+ ```python
551
+ @step
552
+ async def my_step():
553
+ # How do we know which workflow this belongs to?
554
+ pass
555
+ ```
556
+
557
+ **Right:**
558
+ ```python
559
+ @step
560
+ async def my_step():
561
+ ctx = get_current_context()
562
+ logger.info("Step running", run_id=ctx.run_id)
563
+ ```
564
+
565
+ ### 3. Blocking I/O
566
+ **Wrong:**
567
+ ```python
568
+ @step
569
+ async def fetch_data():
570
+ return requests.get(url) # Blocks event loop!
571
+ ```
572
+
573
+ **Right:**
574
+ ```python
575
+ @step
576
+ async def fetch_data():
577
+ async with httpx.AsyncClient() as client:
578
+ return await client.get(url)
579
+ ```
580
+
581
+ ### 4. Mutating Cached Results
582
+ **Wrong:**
583
+ ```python
584
+ result = ctx.step_results["step_1"] # Cached from replay
585
+ result["modified"] = True # Mutates cached data!
586
+ ```
587
+
588
+ **Right:**
589
+ ```python
590
+ result = copy.deepcopy(ctx.step_results["step_1"])
591
+ result["modified"] = True
592
+ ```
593
+
594
+ ## Performance Considerations
595
+
596
+ ### Event Replay Optimization
597
+ - Long workflows with many events may have replay overhead
598
+ - Future: Implement event compaction/snapshotting
599
+ - For now: Keep workflows reasonably sized
600
+
601
+ ### Storage Backend Choice
602
+ - **Development**: File storage (simple, no dependencies)
603
+ - **Production (small)**: SQLite (embedded, single-file)
604
+ - **Production (medium)**: Redis (fast, in-memory)
605
+ - **Production (large)**: PostgreSQL (scalable, full SQL)
606
+
607
+ ### Celery Concurrency
608
+ - Workflow workers: Low concurrency (lightweight orchestration)
609
+ - Step workers: High concurrency (actual work)
610
+ - Scale step workers horizontally as needed
611
+
612
+ ## References
613
+
614
+ - [Vercel Workflow Docs](https://useworkflow.dev/)
615
+ - [Vercel Workflow GitHub](https://github.com/vercel/workflow)
616
+ - [Celery Documentation](https://docs.celeryq.dev/)
617
+ - [Loguru Documentation](https://loguru.readthedocs.io/)
618
+ - [Pydantic Documentation](https://docs.pydantic.dev/)
619
+
620
+ ## Getting Help
621
+
622
+ When asking for help or reporting issues, provide:
623
+ 1. Workflow code
624
+ 2. Event log (`await storage.get_events(run_id)`)
625
+ 3. Run status (`await storage.get_run(run_id)`)
626
+ 4. Error traceback
627
+ 5. Celery logs (if applicable)
628
+
629
+ ## Version Information
630
+
631
+ - **Python**: 3.11+
632
+ - **Celery**: 5.x
633
+ - **Pydantic**: 2.x
634
+ - **Loguru**: 0.7.x
635
+
636
+ ---
637
+
638
+ **Happy coding with PyWorkflow!** 🚀