pyworkflow-engine 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. dashboard/backend/app/__init__.py +1 -0
  2. dashboard/backend/app/config.py +32 -0
  3. dashboard/backend/app/controllers/__init__.py +6 -0
  4. dashboard/backend/app/controllers/run_controller.py +86 -0
  5. dashboard/backend/app/controllers/workflow_controller.py +33 -0
  6. dashboard/backend/app/dependencies/__init__.py +5 -0
  7. dashboard/backend/app/dependencies/storage.py +50 -0
  8. dashboard/backend/app/repositories/__init__.py +6 -0
  9. dashboard/backend/app/repositories/run_repository.py +80 -0
  10. dashboard/backend/app/repositories/workflow_repository.py +27 -0
  11. dashboard/backend/app/rest/__init__.py +8 -0
  12. dashboard/backend/app/rest/v1/__init__.py +12 -0
  13. dashboard/backend/app/rest/v1/health.py +33 -0
  14. dashboard/backend/app/rest/v1/runs.py +133 -0
  15. dashboard/backend/app/rest/v1/workflows.py +41 -0
  16. dashboard/backend/app/schemas/__init__.py +23 -0
  17. dashboard/backend/app/schemas/common.py +16 -0
  18. dashboard/backend/app/schemas/event.py +24 -0
  19. dashboard/backend/app/schemas/hook.py +25 -0
  20. dashboard/backend/app/schemas/run.py +54 -0
  21. dashboard/backend/app/schemas/step.py +28 -0
  22. dashboard/backend/app/schemas/workflow.py +31 -0
  23. dashboard/backend/app/server.py +87 -0
  24. dashboard/backend/app/services/__init__.py +6 -0
  25. dashboard/backend/app/services/run_service.py +240 -0
  26. dashboard/backend/app/services/workflow_service.py +155 -0
  27. dashboard/backend/main.py +18 -0
  28. docs/concepts/cancellation.mdx +362 -0
  29. docs/concepts/continue-as-new.mdx +434 -0
  30. docs/concepts/events.mdx +266 -0
  31. docs/concepts/fault-tolerance.mdx +370 -0
  32. docs/concepts/hooks.mdx +552 -0
  33. docs/concepts/limitations.mdx +167 -0
  34. docs/concepts/schedules.mdx +775 -0
  35. docs/concepts/sleep.mdx +312 -0
  36. docs/concepts/steps.mdx +301 -0
  37. docs/concepts/workflows.mdx +255 -0
  38. docs/guides/cli.mdx +942 -0
  39. docs/guides/configuration.mdx +560 -0
  40. docs/introduction.mdx +155 -0
  41. docs/quickstart.mdx +279 -0
  42. examples/__init__.py +1 -0
  43. examples/celery/__init__.py +1 -0
  44. examples/celery/durable/docker-compose.yml +55 -0
  45. examples/celery/durable/pyworkflow.config.yaml +12 -0
  46. examples/celery/durable/workflows/__init__.py +122 -0
  47. examples/celery/durable/workflows/basic.py +87 -0
  48. examples/celery/durable/workflows/batch_processing.py +102 -0
  49. examples/celery/durable/workflows/cancellation.py +273 -0
  50. examples/celery/durable/workflows/child_workflow_patterns.py +240 -0
  51. examples/celery/durable/workflows/child_workflows.py +202 -0
  52. examples/celery/durable/workflows/continue_as_new.py +260 -0
  53. examples/celery/durable/workflows/fault_tolerance.py +210 -0
  54. examples/celery/durable/workflows/hooks.py +211 -0
  55. examples/celery/durable/workflows/idempotency.py +112 -0
  56. examples/celery/durable/workflows/long_running.py +99 -0
  57. examples/celery/durable/workflows/retries.py +101 -0
  58. examples/celery/durable/workflows/schedules.py +209 -0
  59. examples/celery/transient/01_basic_workflow.py +91 -0
  60. examples/celery/transient/02_fault_tolerance.py +257 -0
  61. examples/celery/transient/__init__.py +20 -0
  62. examples/celery/transient/pyworkflow.config.yaml +25 -0
  63. examples/local/__init__.py +1 -0
  64. examples/local/durable/01_basic_workflow.py +94 -0
  65. examples/local/durable/02_file_storage.py +132 -0
  66. examples/local/durable/03_retries.py +169 -0
  67. examples/local/durable/04_long_running.py +119 -0
  68. examples/local/durable/05_event_log.py +145 -0
  69. examples/local/durable/06_idempotency.py +148 -0
  70. examples/local/durable/07_hooks.py +334 -0
  71. examples/local/durable/08_cancellation.py +233 -0
  72. examples/local/durable/09_child_workflows.py +198 -0
  73. examples/local/durable/10_child_workflow_patterns.py +265 -0
  74. examples/local/durable/11_continue_as_new.py +249 -0
  75. examples/local/durable/12_schedules.py +198 -0
  76. examples/local/durable/__init__.py +1 -0
  77. examples/local/transient/01_quick_tasks.py +87 -0
  78. examples/local/transient/02_retries.py +130 -0
  79. examples/local/transient/03_sleep.py +141 -0
  80. examples/local/transient/__init__.py +1 -0
  81. pyworkflow/__init__.py +256 -0
  82. pyworkflow/aws/__init__.py +68 -0
  83. pyworkflow/aws/context.py +234 -0
  84. pyworkflow/aws/handler.py +184 -0
  85. pyworkflow/aws/testing.py +310 -0
  86. pyworkflow/celery/__init__.py +41 -0
  87. pyworkflow/celery/app.py +198 -0
  88. pyworkflow/celery/scheduler.py +315 -0
  89. pyworkflow/celery/tasks.py +1746 -0
  90. pyworkflow/cli/__init__.py +132 -0
  91. pyworkflow/cli/__main__.py +6 -0
  92. pyworkflow/cli/commands/__init__.py +1 -0
  93. pyworkflow/cli/commands/hooks.py +640 -0
  94. pyworkflow/cli/commands/quickstart.py +495 -0
  95. pyworkflow/cli/commands/runs.py +773 -0
  96. pyworkflow/cli/commands/scheduler.py +130 -0
  97. pyworkflow/cli/commands/schedules.py +794 -0
  98. pyworkflow/cli/commands/setup.py +703 -0
  99. pyworkflow/cli/commands/worker.py +413 -0
  100. pyworkflow/cli/commands/workflows.py +1257 -0
  101. pyworkflow/cli/output/__init__.py +1 -0
  102. pyworkflow/cli/output/formatters.py +321 -0
  103. pyworkflow/cli/output/styles.py +121 -0
  104. pyworkflow/cli/utils/__init__.py +1 -0
  105. pyworkflow/cli/utils/async_helpers.py +30 -0
  106. pyworkflow/cli/utils/config.py +130 -0
  107. pyworkflow/cli/utils/config_generator.py +344 -0
  108. pyworkflow/cli/utils/discovery.py +53 -0
  109. pyworkflow/cli/utils/docker_manager.py +651 -0
  110. pyworkflow/cli/utils/interactive.py +364 -0
  111. pyworkflow/cli/utils/storage.py +115 -0
  112. pyworkflow/config.py +329 -0
  113. pyworkflow/context/__init__.py +63 -0
  114. pyworkflow/context/aws.py +230 -0
  115. pyworkflow/context/base.py +416 -0
  116. pyworkflow/context/local.py +930 -0
  117. pyworkflow/context/mock.py +381 -0
  118. pyworkflow/core/__init__.py +0 -0
  119. pyworkflow/core/exceptions.py +353 -0
  120. pyworkflow/core/registry.py +313 -0
  121. pyworkflow/core/scheduled.py +328 -0
  122. pyworkflow/core/step.py +494 -0
  123. pyworkflow/core/workflow.py +294 -0
  124. pyworkflow/discovery.py +248 -0
  125. pyworkflow/engine/__init__.py +0 -0
  126. pyworkflow/engine/events.py +879 -0
  127. pyworkflow/engine/executor.py +682 -0
  128. pyworkflow/engine/replay.py +273 -0
  129. pyworkflow/observability/__init__.py +19 -0
  130. pyworkflow/observability/logging.py +234 -0
  131. pyworkflow/primitives/__init__.py +33 -0
  132. pyworkflow/primitives/child_handle.py +174 -0
  133. pyworkflow/primitives/child_workflow.py +372 -0
  134. pyworkflow/primitives/continue_as_new.py +101 -0
  135. pyworkflow/primitives/define_hook.py +150 -0
  136. pyworkflow/primitives/hooks.py +97 -0
  137. pyworkflow/primitives/resume_hook.py +210 -0
  138. pyworkflow/primitives/schedule.py +545 -0
  139. pyworkflow/primitives/shield.py +96 -0
  140. pyworkflow/primitives/sleep.py +100 -0
  141. pyworkflow/runtime/__init__.py +21 -0
  142. pyworkflow/runtime/base.py +179 -0
  143. pyworkflow/runtime/celery.py +310 -0
  144. pyworkflow/runtime/factory.py +101 -0
  145. pyworkflow/runtime/local.py +706 -0
  146. pyworkflow/scheduler/__init__.py +9 -0
  147. pyworkflow/scheduler/local.py +248 -0
  148. pyworkflow/serialization/__init__.py +0 -0
  149. pyworkflow/serialization/decoder.py +146 -0
  150. pyworkflow/serialization/encoder.py +162 -0
  151. pyworkflow/storage/__init__.py +54 -0
  152. pyworkflow/storage/base.py +612 -0
  153. pyworkflow/storage/config.py +185 -0
  154. pyworkflow/storage/dynamodb.py +1315 -0
  155. pyworkflow/storage/file.py +827 -0
  156. pyworkflow/storage/memory.py +549 -0
  157. pyworkflow/storage/postgres.py +1161 -0
  158. pyworkflow/storage/schemas.py +486 -0
  159. pyworkflow/storage/sqlite.py +1136 -0
  160. pyworkflow/utils/__init__.py +0 -0
  161. pyworkflow/utils/duration.py +177 -0
  162. pyworkflow/utils/schedule.py +391 -0
  163. pyworkflow_engine-0.1.7.dist-info/METADATA +687 -0
  164. pyworkflow_engine-0.1.7.dist-info/RECORD +196 -0
  165. pyworkflow_engine-0.1.7.dist-info/WHEEL +5 -0
  166. pyworkflow_engine-0.1.7.dist-info/entry_points.txt +2 -0
  167. pyworkflow_engine-0.1.7.dist-info/licenses/LICENSE +21 -0
  168. pyworkflow_engine-0.1.7.dist-info/top_level.txt +5 -0
  169. tests/examples/__init__.py +0 -0
  170. tests/integration/__init__.py +0 -0
  171. tests/integration/test_cancellation.py +330 -0
  172. tests/integration/test_child_workflows.py +439 -0
  173. tests/integration/test_continue_as_new.py +428 -0
  174. tests/integration/test_dynamodb_storage.py +1146 -0
  175. tests/integration/test_fault_tolerance.py +369 -0
  176. tests/integration/test_schedule_storage.py +484 -0
  177. tests/unit/__init__.py +0 -0
  178. tests/unit/backends/__init__.py +1 -0
  179. tests/unit/backends/test_dynamodb_storage.py +1554 -0
  180. tests/unit/backends/test_postgres_storage.py +1281 -0
  181. tests/unit/backends/test_sqlite_storage.py +1460 -0
  182. tests/unit/conftest.py +41 -0
  183. tests/unit/test_cancellation.py +364 -0
  184. tests/unit/test_child_workflows.py +680 -0
  185. tests/unit/test_continue_as_new.py +441 -0
  186. tests/unit/test_event_limits.py +316 -0
  187. tests/unit/test_executor.py +320 -0
  188. tests/unit/test_fault_tolerance.py +334 -0
  189. tests/unit/test_hooks.py +495 -0
  190. tests/unit/test_registry.py +261 -0
  191. tests/unit/test_replay.py +420 -0
  192. tests/unit/test_schedule_schemas.py +285 -0
  193. tests/unit/test_schedule_utils.py +286 -0
  194. tests/unit/test_scheduled_workflow.py +274 -0
  195. tests/unit/test_step.py +353 -0
  196. tests/unit/test_workflow.py +243 -0
@@ -0,0 +1,202 @@
1
+ """
2
+ Celery Durable Workflow - Child Workflows Basic Example
3
+
4
+ This example demonstrates how to spawn child workflows from a parent workflow.
5
+ - Parent workflow spawns child workflows using start_child_workflow()
6
+ - Children have their own run_id and event history
7
+ - wait_for_completion=True (default) waits for child to complete
8
+ - wait_for_completion=False returns a ChildWorkflowHandle immediately
9
+ - TERMINATE policy: when parent completes/fails/cancels, children are cancelled
10
+
11
+ Prerequisites:
12
+ 1. Start Redis: docker run -d -p 6379:6379 redis:7-alpine
13
+ 2. Start worker: pyworkflow --module examples.celery.durable.09_child_workflows worker run
14
+
15
+ Run with CLI:
16
+ pyworkflow --module examples.celery.durable.09_child_workflows workflows run order_fulfillment_workflow \
17
+ --arg order_id=order-456 --arg amount=149.99 --arg customer_email=customer@example.com
18
+
19
+ Check status:
20
+ pyworkflow runs list
21
+ pyworkflow runs status <run_id>
22
+ pyworkflow runs children <run_id>
23
+ """
24
+
25
+ from pyworkflow import (
26
+ ChildWorkflowHandle,
27
+ start_child_workflow,
28
+ step,
29
+ workflow,
30
+ )
31
+
32
+
33
+ # --- Steps ---
34
+ @step(name="child_demo_validate_order")
35
+ async def validate_order(order_id: str) -> dict:
36
+ """Validate order details."""
37
+ print(f" Validating order {order_id}...")
38
+ return {"order_id": order_id, "valid": True}
39
+
40
+
41
+ @step(name="child_demo_process_payment")
42
+ async def process_payment(order_id: str, amount: float) -> dict:
43
+ """Process payment for order."""
44
+ print(f" Processing payment ${amount:.2f} for {order_id}...")
45
+ return {"order_id": order_id, "amount": amount, "paid": True}
46
+
47
+
48
+ @step(name="child_demo_ship_order")
49
+ async def ship_order(order_id: str) -> dict:
50
+ """Ship the order."""
51
+ print(f" Shipping order {order_id}...")
52
+ return {"order_id": order_id, "shipped": True}
53
+
54
+
55
+ @step(name="child_demo_send_email")
56
+ async def send_email(recipient: str, subject: str) -> dict:
57
+ """Send an email notification."""
58
+ print(f" Sending email to {recipient}: {subject}")
59
+ return {"recipient": recipient, "subject": subject, "sent": True}
60
+
61
+
62
+ # --- Child Workflows ---
63
+ @workflow(name="child_demo_payment_workflow", tags=["celery", "durable"])
64
+ async def payment_workflow(order_id: str, amount: float) -> dict:
65
+ """Child workflow for payment processing."""
66
+ print(f" [PaymentWorkflow] Starting for order {order_id}")
67
+ result = await process_payment(order_id, amount)
68
+ print(f" [PaymentWorkflow] Completed for order {order_id}")
69
+ return result
70
+
71
+
72
+ @workflow(name="child_demo_shipping_workflow", tags=["celery", "durable"])
73
+ async def shipping_workflow(order_id: str) -> dict:
74
+ """Child workflow for shipping."""
75
+ print(f" [ShippingWorkflow] Starting for order {order_id}")
76
+ result = await ship_order(order_id)
77
+ print(f" [ShippingWorkflow] Completed for order {order_id}")
78
+ return result
79
+
80
+
81
+ @workflow(name="child_demo_notification_workflow", tags=["celery", "durable"])
82
+ async def notification_workflow(email: str, order_id: str) -> dict:
83
+ """Child workflow for sending notifications."""
84
+ print(f" [NotificationWorkflow] Starting for {email}")
85
+ result = await send_email(email, f"Order {order_id} update")
86
+ print(f" [NotificationWorkflow] Completed for {email}")
87
+ return result
88
+
89
+
90
+ # --- Parent Workflow ---
91
+ @workflow(tags=["celery", "durable"])
92
+ async def order_fulfillment_workflow(
93
+ order_id: str,
94
+ amount: float,
95
+ customer_email: str,
96
+ ) -> dict:
97
+ """
98
+ Parent workflow that orchestrates order fulfillment using child workflows.
99
+
100
+ This demonstrates:
101
+ 1. wait_for_completion=True - Wait for child to complete (default)
102
+ 2. wait_for_completion=False - Fire-and-forget with handle
103
+ """
104
+ print(f"[OrderFulfillment] Starting for order {order_id}")
105
+
106
+ # Step 1: Validate order (regular step)
107
+ validation = await validate_order(order_id)
108
+ if not validation["valid"]:
109
+ return {"order_id": order_id, "status": "invalid"}
110
+
111
+ # Step 2: Process payment via child workflow (wait for completion)
112
+ print("[OrderFulfillment] Starting payment child workflow...")
113
+ payment_result = await start_child_workflow(
114
+ payment_workflow,
115
+ order_id,
116
+ amount,
117
+ wait_for_completion=True, # Default: wait for child to complete
118
+ )
119
+ print(f"[OrderFulfillment] Payment completed: {payment_result}")
120
+
121
+ # Step 3: Ship order via child workflow (wait for completion)
122
+ print("[OrderFulfillment] Starting shipping child workflow...")
123
+ shipping_result = await start_child_workflow(
124
+ shipping_workflow,
125
+ order_id,
126
+ wait_for_completion=True,
127
+ )
128
+ print(f"[OrderFulfillment] Shipping completed: {shipping_result}")
129
+
130
+ # Step 4: Send notification via fire-and-forget child workflow
131
+ # This returns immediately with a handle, parent continues
132
+ print("[OrderFulfillment] Starting notification child workflow (fire-and-forget)...")
133
+ notification_handle: ChildWorkflowHandle = await start_child_workflow(
134
+ notification_workflow,
135
+ customer_email,
136
+ order_id,
137
+ wait_for_completion=False, # Fire-and-forget
138
+ )
139
+ print(f"[OrderFulfillment] Notification child started: {notification_handle.child_run_id}")
140
+
141
+ result = {
142
+ "order_id": order_id,
143
+ "status": "fulfilled",
144
+ "payment": payment_result,
145
+ "shipping": shipping_result,
146
+ "notification_run_id": notification_handle.child_run_id,
147
+ }
148
+
149
+ print(f"[OrderFulfillment] Completed for order {order_id}")
150
+ return result
151
+
152
+
153
+ async def main() -> None:
154
+ """Run the order fulfillment workflow example."""
155
+ import argparse
156
+ import asyncio
157
+
158
+ import pyworkflow
159
+ from pyworkflow import get_workflow_run
160
+
161
+ parser = argparse.ArgumentParser(description="Order Fulfillment Workflow with Child Workflows")
162
+ parser.add_argument("--order-id", default="order-456", help="Order ID to process")
163
+ parser.add_argument("--amount", type=float, default=149.99, help="Order amount")
164
+ parser.add_argument("--email", default="customer@example.com", help="Customer email")
165
+ args = parser.parse_args()
166
+
167
+ print("=== Child Workflows - Basic Example ===\n")
168
+ print("Running order fulfillment workflow with child workflows...\n")
169
+
170
+ # Start parent workflow
171
+ run_id = await pyworkflow.start(
172
+ order_fulfillment_workflow,
173
+ args.order_id,
174
+ args.amount,
175
+ args.email,
176
+ )
177
+
178
+ print(f"\nWorkflow started with run_id: {run_id}")
179
+ print("\nCheck status:")
180
+ print(f" pyworkflow runs status {run_id}")
181
+ print(f" pyworkflow runs children {run_id}")
182
+
183
+ # Poll for completion
184
+ print("\nWaiting for workflow to complete...")
185
+ for _ in range(30):
186
+ await asyncio.sleep(1)
187
+ run = await get_workflow_run(run_id)
188
+ if run.status.value in ("completed", "failed", "cancelled"):
189
+ print(f"\nWorkflow {run.status.value}!")
190
+ if run.result:
191
+ print(f"Result: {run.result}")
192
+ if run.error:
193
+ print(f"Error: {run.error}")
194
+ break
195
+ else:
196
+ print("\nTimeout waiting for workflow completion")
197
+
198
+
199
+ if __name__ == "__main__":
200
+ import asyncio
201
+
202
+ asyncio.run(main())
@@ -0,0 +1,260 @@
1
+ """
2
+ Durable Workflow (Celery) - Continue-As-New
3
+
4
+ This example demonstrates continue_as_new() with Celery workers:
5
+ - Polling workflows that need fresh event history
6
+ - Batch processing with continuation
7
+ - Tracking workflow chains across distributed workers
8
+
9
+ Prerequisites:
10
+ 1. Redis running: docker run -d -p 6379:6379 redis
11
+ 2. Start Celery worker:
12
+ celery -A pyworkflow.celery.tasks worker -Q pyworkflow.workflows,pyworkflow.steps,pyworkflow.schedules -l info
13
+
14
+ Run: python examples/celery/durable/11_continue_as_new.py
15
+ """
16
+
17
+ import asyncio
18
+
19
+ from pyworkflow import (
20
+ configure,
21
+ continue_as_new,
22
+ get_workflow_chain,
23
+ reset_config,
24
+ start,
25
+ step,
26
+ workflow,
27
+ )
28
+ from pyworkflow.storage import FileStorageBackend
29
+
30
+
31
+ # --- Steps ---
32
+ @step()
33
+ async def fetch_batch(offset: int, batch_size: int) -> list:
34
+ """Fetch a batch of items to process."""
35
+ # Simulate fetching items - returns empty when done
36
+ total_items = 50 # Simulate 50 total items
37
+ if offset >= total_items:
38
+ return []
39
+ end = min(offset + batch_size, total_items)
40
+ items = list(range(offset, end))
41
+ print(f" [Step] Fetched items {offset} to {end - 1}")
42
+ await asyncio.sleep(0.1) # Simulate I/O
43
+ return items
44
+
45
+
46
+ @step(name="continue_process_item")
47
+ async def process_item(item: int) -> dict:
48
+ """Process a single item."""
49
+ await asyncio.sleep(0.05) # Simulate work
50
+ return {"item": item, "processed": True}
51
+
52
+
53
+ @step()
54
+ async def poll_for_messages(cursor: str | None) -> tuple[str | None, list]:
55
+ """Poll message queue for new messages."""
56
+ # Simulate message queue polling
57
+ await asyncio.sleep(0.1)
58
+
59
+ if cursor is None:
60
+ return "msg_batch_1", [{"id": 1, "type": "order"}, {"id": 2, "type": "payment"}]
61
+ elif cursor == "msg_batch_1":
62
+ return "msg_batch_2", [{"id": 3, "type": "shipment"}]
63
+ elif cursor == "msg_batch_2":
64
+ return "msg_batch_3", [{"id": 4, "type": "notification"}]
65
+ else:
66
+ return None, [] # No more messages
67
+
68
+
69
+ @step()
70
+ async def handle_message(message: dict) -> dict:
71
+ """Handle a single message."""
72
+ await asyncio.sleep(0.05)
73
+ return {"message_id": message["id"], "handled": True}
74
+
75
+
76
+ # --- Batch Processing Workflow ---
77
+ @workflow(durable=True, tags=["celery", "durable"])
78
+ async def batch_processor(offset: int = 0, batch_size: int = 10) -> str:
79
+ """
80
+ Process items in batches using continue_as_new.
81
+
82
+ Each batch runs as a separate workflow execution with fresh
83
+ event history, preventing unbounded history growth.
84
+
85
+ This pattern is ideal for:
86
+ - ETL pipelines processing millions of records
87
+ - Data migration jobs
88
+ - Bulk update operations
89
+ """
90
+ print(f"\n [Batch] Starting at offset {offset}")
91
+
92
+ items = await fetch_batch(offset, batch_size)
93
+
94
+ if not items:
95
+ return f"Batch processing complete! Total items: {offset}"
96
+
97
+ # Process items
98
+ for item in items:
99
+ await process_item(item)
100
+
101
+ print(f" [Batch] Processed {len(items)} items")
102
+
103
+ # Continue with next batch
104
+ continue_as_new(offset=offset + batch_size, batch_size=batch_size)
105
+
106
+
107
+ # --- Message Consumer Workflow ---
108
+ @workflow(durable=True, tags=["celery", "durable"])
109
+ async def message_consumer(cursor: str | None = None, messages_processed: int = 0) -> str:
110
+ """
111
+ Consume messages from a queue, continuing as new after each batch.
112
+
113
+ This pattern is useful for:
114
+ - Queue consumers that run indefinitely
115
+ - Event stream processors
116
+ - Real-time data ingestion
117
+ """
118
+ print(f"\n [Consumer] Polling with cursor: {cursor}")
119
+
120
+ # Poll for messages
121
+ new_cursor, messages = await poll_for_messages(cursor)
122
+
123
+ if not messages and new_cursor is None:
124
+ return f"Consumer complete! Processed {messages_processed} messages"
125
+
126
+ # Handle each message
127
+ count = 0
128
+ for message in messages:
129
+ await handle_message(message)
130
+ count += 1
131
+
132
+ total = messages_processed + count
133
+ print(f" [Consumer] Handled {count} messages (total: {total})")
134
+
135
+ # Continue with new cursor
136
+ continue_as_new(cursor=new_cursor, messages_processed=total)
137
+
138
+
139
+ # --- Recurring Task Workflow ---
140
+ @workflow(durable=True, tags=["celery", "durable"])
141
+ async def recurring_report(iteration: int = 1, max_iterations: int = 3) -> str:
142
+ """
143
+ Generate reports on a schedule, continuing as new for each iteration.
144
+
145
+ This demonstrates a pattern for:
146
+ - Daily/weekly reports
147
+ - Scheduled cleanup tasks
148
+ - Periodic sync operations
149
+
150
+ In production, you might add sleep() between iterations.
151
+ """
152
+ print(f"\n [Report] Generating report #{iteration}")
153
+
154
+ # Simulate report generation
155
+ await asyncio.sleep(0.1)
156
+ print(f" [Report] Report #{iteration} complete")
157
+
158
+ if iteration >= max_iterations:
159
+ return f"All {max_iterations} reports generated!"
160
+
161
+ # Continue with next iteration
162
+ continue_as_new(iteration=iteration + 1, max_iterations=max_iterations)
163
+
164
+
165
+ async def run_examples():
166
+ """Run all continue-as-new examples."""
167
+ print("\n=== Continue-As-New Examples (Celery) ===\n")
168
+
169
+ # Example 1: Batch Processing
170
+ print("--- Example 1: Batch Processing ---")
171
+ print("Processing 50 items in batches of 10...")
172
+
173
+ run_id = await start(batch_processor, offset=0, batch_size=10)
174
+ print(f"Started workflow: {run_id}")
175
+
176
+ # Wait for completion (in production, use webhooks or polling)
177
+ print("Waiting for completion...")
178
+ await asyncio.sleep(5)
179
+
180
+ # Check the chain
181
+ from pyworkflow import get_storage
182
+
183
+ storage = get_storage()
184
+ chain = await get_workflow_chain(run_id, storage=storage)
185
+ print(f"\nWorkflow chain has {len(chain)} runs:")
186
+ for i, run in enumerate(chain):
187
+ marker = " <- started here" if run.run_id == run_id else ""
188
+ print(f" {i + 1}. {run.run_id[:20]}... [{run.status.value}]{marker}")
189
+
190
+ # Example 2: Message Consumer
191
+ print("\n--- Example 2: Message Consumer ---")
192
+ print("Consuming messages until queue is empty...")
193
+
194
+ run_id2 = await start(message_consumer)
195
+ print(f"Started workflow: {run_id2}")
196
+
197
+ await asyncio.sleep(3)
198
+
199
+ chain2 = await get_workflow_chain(run_id2, storage=storage)
200
+ print(f"\nConsumer chain has {len(chain2)} runs")
201
+ if chain2:
202
+ final = chain2[-1]
203
+ if final.result:
204
+ print(f"Final result: {final.result}")
205
+
206
+ # Example 3: Recurring Task
207
+ print("\n--- Example 3: Recurring Report ---")
208
+ print("Running 3 report iterations...")
209
+
210
+ run_id3 = await start(recurring_report)
211
+ print(f"Started workflow: {run_id3}")
212
+
213
+ await asyncio.sleep(2)
214
+
215
+ chain3 = await get_workflow_chain(run_id3, storage=storage)
216
+ print(f"\nReport chain has {len(chain3)} runs")
217
+
218
+ # Summary
219
+ print("\n=== Summary ===")
220
+ print(f" Batch processor: {len(chain)} workflow executions")
221
+ print(f" Message consumer: {len(chain2)} workflow executions")
222
+ print(f" Recurring report: {len(chain3)} workflow executions")
223
+
224
+
225
+ def main():
226
+ """Configure and run examples."""
227
+ print("Configuring PyWorkflow with Celery runtime...")
228
+
229
+ # Reset any existing config
230
+ reset_config()
231
+
232
+ # Configure storage
233
+ storage = FileStorageBackend(base_path=".workflow_data")
234
+
235
+ # Configure pyworkflow
236
+ configure(
237
+ storage=storage,
238
+ default_runtime="celery",
239
+ default_durable=True,
240
+ )
241
+
242
+ print("Configuration complete!")
243
+ print("\nMake sure Celery worker is running:")
244
+ print(
245
+ " celery -A pyworkflow.celery.tasks worker -Q pyworkflow.workflows,pyworkflow.steps,pyworkflow.schedules -l info\n"
246
+ )
247
+
248
+ # Run examples
249
+ asyncio.run(run_examples())
250
+
251
+ print("\n=== Key Takeaways ===")
252
+ print(" - continue_as_new() works across distributed Celery workers")
253
+ print(" - Each continuation is a new Celery task execution")
254
+ print(" - Event history is reset, preventing unbounded growth")
255
+ print(" - Chains can be tracked with get_workflow_chain()")
256
+ print(" - Useful for long-running polling, batch processing, recurring tasks")
257
+
258
+
259
+ if __name__ == "__main__":
260
+ main()
@@ -0,0 +1,210 @@
1
+ """
2
+ Celery Durable Workflow - Fault Tolerance Example
3
+
4
+ This example demonstrates automatic recovery from worker failures for durable workflows.
5
+
6
+ Key features:
7
+ - Automatic recovery when workers crash mid-execution
8
+ - Event replay continues from the last completed step
9
+ - Configurable recovery attempts limit
10
+ - WORKFLOW_INTERRUPTED events recorded for auditing
11
+
12
+ When a worker crashes:
13
+ 1. The task is automatically requeued (task_reject_on_worker_lost=True)
14
+ 2. Another worker detects the RUNNING workflow and initiates recovery
15
+ 3. WORKFLOW_INTERRUPTED event is recorded
16
+ 4. Events are replayed to restore state
17
+ 5. Workflow continues from the last checkpoint
18
+
19
+ Prerequisites:
20
+ 1. Start Redis: docker run -d -p 6379:6379 redis:7-alpine
21
+ 2. Start worker: pyworkflow --module examples.celery.durable.06_fault_tolerance worker run
22
+
23
+ Run with CLI:
24
+ pyworkflow --module examples.celery.durable.06_fault_tolerance workflows run data_pipeline \
25
+ --arg data_id=data-123
26
+
27
+ To test fault tolerance:
28
+ 1. Start the workflow
29
+ 2. While it's running, kill the worker (Ctrl+C)
30
+ 3. Start a new worker
31
+ 4. Watch the workflow recover and continue from the last completed step
32
+
33
+ Check status:
34
+ pyworkflow runs list
35
+ pyworkflow runs status <run_id>
36
+ pyworkflow runs logs <run_id> # Will show WORKFLOW_INTERRUPTED events
37
+ """
38
+
39
+ import asyncio
40
+
41
+ from pyworkflow import sleep, step, workflow
42
+
43
+
44
+ @step()
45
+ async def fetch_data(data_id: str) -> dict:
46
+ """Fetch data from external source."""
47
+ print(f"[Step 1] Fetching data for {data_id}...")
48
+ await asyncio.sleep(2) # Simulate network delay
49
+ return {"data_id": data_id, "records": 1000, "fetched": True}
50
+
51
+
52
+ @step()
53
+ async def validate_data(data: dict) -> dict:
54
+ """Validate the fetched data."""
55
+ print(f"[Step 2] Validating {data['records']} records...")
56
+ await asyncio.sleep(2) # Simulate validation time
57
+ return {**data, "valid_records": 950, "validated": True}
58
+
59
+
60
+ @step()
61
+ async def transform_data(data: dict) -> dict:
62
+ """Transform data for processing."""
63
+ print(f"[Step 3] Transforming {data['valid_records']} records...")
64
+ await asyncio.sleep(3) # Simulate CPU-intensive work
65
+ return {**data, "transformed_records": 950, "transformed": True}
66
+
67
+
68
+ @step()
69
+ async def load_data(data: dict) -> dict:
70
+ """Load transformed data into destination."""
71
+ print(f"[Step 4] Loading {data['transformed_records']} records...")
72
+ await asyncio.sleep(2) # Simulate database writes
73
+ return {**data, "loaded": True}
74
+
75
+
76
+ @step()
77
+ async def send_notification(data: dict) -> dict:
78
+ """Send completion notification."""
79
+ print(f"[Step 5] Sending notification for {data['data_id']}...")
80
+ return {**data, "notified": True}
81
+
82
+
83
+ @workflow(
84
+ recover_on_worker_loss=True, # Enable automatic recovery (default for durable)
85
+ max_recovery_attempts=5, # Allow up to 5 recovery attempts
86
+ tags=["celery", "durable"],
87
+ )
88
+ async def data_pipeline(data_id: str) -> dict:
89
+ """
90
+ Data processing pipeline with fault tolerance.
91
+
92
+ This workflow demonstrates automatic recovery from worker failures:
93
+
94
+ 1. Fetch data from external source
95
+ 2. Validate the data
96
+ 3. Transform data for processing
97
+ 4. Load into destination
98
+ 5. Send completion notification
99
+
100
+ If a worker crashes during any step:
101
+ - The workflow will be automatically recovered by another worker
102
+ - Already completed steps will be skipped (results from event replay)
103
+ - Execution continues from where it left off
104
+ - Up to 5 recovery attempts are allowed
105
+
106
+ Test fault tolerance:
107
+ - Kill the worker during step 3 (transform_data) which takes longest
108
+ - Start a new worker and watch it recover
109
+ """
110
+ print(f"\n{'=' * 60}")
111
+ print(f"Starting data pipeline for {data_id}")
112
+ print(f"{'=' * 60}\n")
113
+
114
+ data = await fetch_data(data_id)
115
+ print(f" -> Fetch complete: {data['records']} records\n")
116
+
117
+ print(" [Sleeping 10s before validation - kill worker now to test recovery!]")
118
+ await sleep("10s")
119
+
120
+ data = await validate_data(data)
121
+ print(f" -> Validation complete: {data['valid_records']} valid records\n")
122
+
123
+ print(" [Sleeping 10s before transform - kill worker now to test recovery!]")
124
+ await sleep("10s")
125
+
126
+ data = await transform_data(data)
127
+ print(f" -> Transform complete: {data['transformed_records']} records\n")
128
+
129
+ print(" [Sleeping 10s before load - kill worker now to test recovery!]")
130
+ await sleep("10s")
131
+
132
+ data = await load_data(data)
133
+ print(" -> Load complete\n")
134
+
135
+ data = await send_notification(data)
136
+ print(" -> Notification sent\n")
137
+
138
+ print(f"{'=' * 60}")
139
+ print("Pipeline completed successfully!")
140
+ print(f"{'=' * 60}\n")
141
+
142
+ return data
143
+
144
+
145
+ @workflow(
146
+ recover_on_worker_loss=False, # Disable recovery for this workflow
147
+ max_recovery_attempts=0,
148
+ tags=["celery", "durable"],
149
+ )
150
+ async def critical_pipeline(data_id: str) -> dict:
151
+ """
152
+ Critical pipeline that should NOT auto-recover.
153
+
154
+ Some workflows should fail completely on worker loss rather than
155
+ recover, for example when:
156
+ - Steps have side effects that can't be safely repeated
157
+ - Human intervention is required after failures
158
+ - The workflow interacts with non-idempotent external systems
159
+
160
+ If a worker crashes during this workflow:
161
+ - The workflow will be marked as FAILED
162
+ - No automatic recovery will be attempted
163
+ - Manual intervention is required
164
+
165
+ Usage:
166
+ pyworkflow workflows run critical_pipeline --arg data_id=critical-001
167
+ """
168
+ print(f"[Critical] Processing {data_id} - NO AUTO-RECOVERY")
169
+
170
+ data = await fetch_data(data_id)
171
+ data = await validate_data(data)
172
+ data = await transform_data(data)
173
+
174
+ print(f"[Critical] Completed {data_id}")
175
+ return data
176
+
177
+
178
+ async def main() -> None:
179
+ """Run the fault tolerance example."""
180
+ import argparse
181
+
182
+ import pyworkflow
183
+
184
+ parser = argparse.ArgumentParser(description="Data Pipeline with Fault Tolerance")
185
+ parser.add_argument("--data-id", default="data-123", help="Data ID to process")
186
+ parser.add_argument(
187
+ "--critical",
188
+ action="store_true",
189
+ help="Run the critical pipeline (no auto-recovery)",
190
+ )
191
+ args = parser.parse_args()
192
+
193
+ if args.critical:
194
+ print(f"Starting CRITICAL pipeline for {args.data_id}...")
195
+ print("NOTE: This workflow will NOT auto-recover from worker failures")
196
+ run_id = await pyworkflow.start(critical_pipeline, args.data_id)
197
+ else:
198
+ print(f"Starting data pipeline for {args.data_id}...")
199
+ print("NOTE: This workflow WILL auto-recover from worker failures")
200
+ print(" Kill the worker during execution to test recovery")
201
+ run_id = await pyworkflow.start(data_pipeline, args.data_id)
202
+
203
+ print(f"\nWorkflow started with run_id: {run_id}")
204
+ print("\nMonitor with:")
205
+ print(f" pyworkflow runs status {run_id}")
206
+ print(f" pyworkflow runs logs {run_id}")
207
+
208
+
209
+ if __name__ == "__main__":
210
+ asyncio.run(main())