pyworkflow-engine 0.1.7__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. pyworkflow/__init__.py +10 -1
  2. pyworkflow/celery/tasks.py +272 -24
  3. pyworkflow/cli/__init__.py +4 -1
  4. pyworkflow/cli/commands/runs.py +4 -4
  5. pyworkflow/cli/commands/setup.py +203 -4
  6. pyworkflow/cli/utils/config_generator.py +76 -3
  7. pyworkflow/cli/utils/docker_manager.py +232 -0
  8. pyworkflow/config.py +94 -17
  9. pyworkflow/context/__init__.py +13 -0
  10. pyworkflow/context/base.py +26 -0
  11. pyworkflow/context/local.py +80 -0
  12. pyworkflow/context/step_context.py +295 -0
  13. pyworkflow/core/registry.py +6 -1
  14. pyworkflow/core/step.py +141 -0
  15. pyworkflow/core/workflow.py +56 -0
  16. pyworkflow/engine/events.py +30 -0
  17. pyworkflow/engine/replay.py +39 -0
  18. pyworkflow/primitives/child_workflow.py +1 -1
  19. pyworkflow/runtime/local.py +1 -1
  20. pyworkflow/storage/__init__.py +14 -0
  21. pyworkflow/storage/base.py +35 -0
  22. pyworkflow/storage/cassandra.py +1747 -0
  23. pyworkflow/storage/config.py +69 -0
  24. pyworkflow/storage/dynamodb.py +31 -2
  25. pyworkflow/storage/file.py +28 -0
  26. pyworkflow/storage/memory.py +18 -0
  27. pyworkflow/storage/mysql.py +1159 -0
  28. pyworkflow/storage/postgres.py +27 -2
  29. pyworkflow/storage/schemas.py +4 -3
  30. pyworkflow/storage/sqlite.py +25 -2
  31. {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/METADATA +7 -4
  32. pyworkflow_engine-0.1.10.dist-info/RECORD +91 -0
  33. pyworkflow_engine-0.1.10.dist-info/top_level.txt +1 -0
  34. dashboard/backend/app/__init__.py +0 -1
  35. dashboard/backend/app/config.py +0 -32
  36. dashboard/backend/app/controllers/__init__.py +0 -6
  37. dashboard/backend/app/controllers/run_controller.py +0 -86
  38. dashboard/backend/app/controllers/workflow_controller.py +0 -33
  39. dashboard/backend/app/dependencies/__init__.py +0 -5
  40. dashboard/backend/app/dependencies/storage.py +0 -50
  41. dashboard/backend/app/repositories/__init__.py +0 -6
  42. dashboard/backend/app/repositories/run_repository.py +0 -80
  43. dashboard/backend/app/repositories/workflow_repository.py +0 -27
  44. dashboard/backend/app/rest/__init__.py +0 -8
  45. dashboard/backend/app/rest/v1/__init__.py +0 -12
  46. dashboard/backend/app/rest/v1/health.py +0 -33
  47. dashboard/backend/app/rest/v1/runs.py +0 -133
  48. dashboard/backend/app/rest/v1/workflows.py +0 -41
  49. dashboard/backend/app/schemas/__init__.py +0 -23
  50. dashboard/backend/app/schemas/common.py +0 -16
  51. dashboard/backend/app/schemas/event.py +0 -24
  52. dashboard/backend/app/schemas/hook.py +0 -25
  53. dashboard/backend/app/schemas/run.py +0 -54
  54. dashboard/backend/app/schemas/step.py +0 -28
  55. dashboard/backend/app/schemas/workflow.py +0 -31
  56. dashboard/backend/app/server.py +0 -87
  57. dashboard/backend/app/services/__init__.py +0 -6
  58. dashboard/backend/app/services/run_service.py +0 -240
  59. dashboard/backend/app/services/workflow_service.py +0 -155
  60. dashboard/backend/main.py +0 -18
  61. docs/concepts/cancellation.mdx +0 -362
  62. docs/concepts/continue-as-new.mdx +0 -434
  63. docs/concepts/events.mdx +0 -266
  64. docs/concepts/fault-tolerance.mdx +0 -370
  65. docs/concepts/hooks.mdx +0 -552
  66. docs/concepts/limitations.mdx +0 -167
  67. docs/concepts/schedules.mdx +0 -775
  68. docs/concepts/sleep.mdx +0 -312
  69. docs/concepts/steps.mdx +0 -301
  70. docs/concepts/workflows.mdx +0 -255
  71. docs/guides/cli.mdx +0 -942
  72. docs/guides/configuration.mdx +0 -560
  73. docs/introduction.mdx +0 -155
  74. docs/quickstart.mdx +0 -279
  75. examples/__init__.py +0 -1
  76. examples/celery/__init__.py +0 -1
  77. examples/celery/durable/docker-compose.yml +0 -55
  78. examples/celery/durable/pyworkflow.config.yaml +0 -12
  79. examples/celery/durable/workflows/__init__.py +0 -122
  80. examples/celery/durable/workflows/basic.py +0 -87
  81. examples/celery/durable/workflows/batch_processing.py +0 -102
  82. examples/celery/durable/workflows/cancellation.py +0 -273
  83. examples/celery/durable/workflows/child_workflow_patterns.py +0 -240
  84. examples/celery/durable/workflows/child_workflows.py +0 -202
  85. examples/celery/durable/workflows/continue_as_new.py +0 -260
  86. examples/celery/durable/workflows/fault_tolerance.py +0 -210
  87. examples/celery/durable/workflows/hooks.py +0 -211
  88. examples/celery/durable/workflows/idempotency.py +0 -112
  89. examples/celery/durable/workflows/long_running.py +0 -99
  90. examples/celery/durable/workflows/retries.py +0 -101
  91. examples/celery/durable/workflows/schedules.py +0 -209
  92. examples/celery/transient/01_basic_workflow.py +0 -91
  93. examples/celery/transient/02_fault_tolerance.py +0 -257
  94. examples/celery/transient/__init__.py +0 -20
  95. examples/celery/transient/pyworkflow.config.yaml +0 -25
  96. examples/local/__init__.py +0 -1
  97. examples/local/durable/01_basic_workflow.py +0 -94
  98. examples/local/durable/02_file_storage.py +0 -132
  99. examples/local/durable/03_retries.py +0 -169
  100. examples/local/durable/04_long_running.py +0 -119
  101. examples/local/durable/05_event_log.py +0 -145
  102. examples/local/durable/06_idempotency.py +0 -148
  103. examples/local/durable/07_hooks.py +0 -334
  104. examples/local/durable/08_cancellation.py +0 -233
  105. examples/local/durable/09_child_workflows.py +0 -198
  106. examples/local/durable/10_child_workflow_patterns.py +0 -265
  107. examples/local/durable/11_continue_as_new.py +0 -249
  108. examples/local/durable/12_schedules.py +0 -198
  109. examples/local/durable/__init__.py +0 -1
  110. examples/local/transient/01_quick_tasks.py +0 -87
  111. examples/local/transient/02_retries.py +0 -130
  112. examples/local/transient/03_sleep.py +0 -141
  113. examples/local/transient/__init__.py +0 -1
  114. pyworkflow_engine-0.1.7.dist-info/RECORD +0 -196
  115. pyworkflow_engine-0.1.7.dist-info/top_level.txt +0 -5
  116. tests/examples/__init__.py +0 -0
  117. tests/integration/__init__.py +0 -0
  118. tests/integration/test_cancellation.py +0 -330
  119. tests/integration/test_child_workflows.py +0 -439
  120. tests/integration/test_continue_as_new.py +0 -428
  121. tests/integration/test_dynamodb_storage.py +0 -1146
  122. tests/integration/test_fault_tolerance.py +0 -369
  123. tests/integration/test_schedule_storage.py +0 -484
  124. tests/unit/__init__.py +0 -0
  125. tests/unit/backends/__init__.py +0 -1
  126. tests/unit/backends/test_dynamodb_storage.py +0 -1554
  127. tests/unit/backends/test_postgres_storage.py +0 -1281
  128. tests/unit/backends/test_sqlite_storage.py +0 -1460
  129. tests/unit/conftest.py +0 -41
  130. tests/unit/test_cancellation.py +0 -364
  131. tests/unit/test_child_workflows.py +0 -680
  132. tests/unit/test_continue_as_new.py +0 -441
  133. tests/unit/test_event_limits.py +0 -316
  134. tests/unit/test_executor.py +0 -320
  135. tests/unit/test_fault_tolerance.py +0 -334
  136. tests/unit/test_hooks.py +0 -495
  137. tests/unit/test_registry.py +0 -261
  138. tests/unit/test_replay.py +0 -420
  139. tests/unit/test_schedule_schemas.py +0 -285
  140. tests/unit/test_schedule_utils.py +0 -286
  141. tests/unit/test_scheduled_workflow.py +0 -274
  142. tests/unit/test_step.py +0 -353
  143. tests/unit/test_workflow.py +0 -243
  144. {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/WHEEL +0 -0
  145. {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/entry_points.txt +0 -0
  146. {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/licenses/LICENSE +0 -0
@@ -1,209 +0,0 @@
1
- """
2
- Celery Durable Workflow - Schedules Example
3
-
4
- This example demonstrates scheduled workflow execution with Celery Beat.
5
- - Cron-based scheduling (every minute)
6
- - Interval-based scheduling (every 30 seconds)
7
- - Overlap policies to control concurrent executions
8
- - Schedule management (pause, resume, delete)
9
-
10
- Prerequisites:
11
- 1. Start Redis: docker run -d -p 6379:6379 redis:7-alpine
12
- 2. Start worker: pyworkflow --module examples.celery.durable.12_schedules worker run
13
- 3. Start beat: pyworkflow --module examples.celery.durable.12_schedules beat run
14
-
15
- CLI Commands:
16
- # Create a schedule via CLI
17
- pyworkflow schedules create metrics_workflow --cron "* * * * *" --overlap skip
18
-
19
- # List all schedules
20
- pyworkflow schedules list
21
-
22
- # Pause/resume a schedule
23
- pyworkflow schedules pause <schedule_id>
24
- pyworkflow schedules resume <schedule_id>
25
-
26
- # Trigger immediately (bypass schedule)
27
- pyworkflow schedules trigger <schedule_id>
28
-
29
- # View schedule details
30
- pyworkflow schedules show <schedule_id>
31
- """
32
-
33
- from datetime import datetime
34
-
35
- from pyworkflow import (
36
- OverlapPolicy,
37
- scheduled_workflow,
38
- step,
39
- workflow,
40
- )
41
-
42
-
43
- # --- Steps ---
44
- @step()
45
- async def collect_metrics() -> dict:
46
- """Collect system metrics."""
47
- timestamp = datetime.now().isoformat()
48
- print(f"[Step] Collecting metrics at {timestamp}...")
49
- return {
50
- "timestamp": timestamp,
51
- "cpu_usage": 45.2,
52
- "memory_usage": 62.8,
53
- "disk_usage": 78.1,
54
- }
55
-
56
-
57
- @step()
58
- async def store_metrics(metrics: dict) -> dict:
59
- """Store metrics in database (simulated)."""
60
- print(f"[Step] Storing metrics: {metrics}")
61
- return {**metrics, "stored": True}
62
-
63
-
64
- @step()
65
- async def check_alerts(metrics: dict) -> dict:
66
- """Check if any metrics exceed thresholds."""
67
- alerts = []
68
- if metrics.get("cpu_usage", 0) > 80:
69
- alerts.append("High CPU usage")
70
- if metrics.get("memory_usage", 0) > 90:
71
- alerts.append("High memory usage")
72
- if metrics.get("disk_usage", 0) > 85:
73
- alerts.append("High disk usage")
74
-
75
- print(f"[Step] Alert check complete. Alerts: {alerts or 'None'}")
76
- return {**metrics, "alerts": alerts}
77
-
78
-
79
- # --- Scheduled Workflow (using decorator) ---
80
- @scheduled_workflow(
81
- cron="* * * * *", # Every minute
82
- overlap_policy=OverlapPolicy.SKIP, # Skip if previous run still active
83
- timezone="UTC",
84
- )
85
- async def metrics_workflow() -> dict:
86
- """
87
- Scheduled metrics collection workflow.
88
-
89
- Runs every minute via Celery Beat.
90
- Uses SKIP overlap policy - if a previous run is still active,
91
- new runs are skipped to prevent resource exhaustion.
92
-
93
- Steps:
94
- 1. Collect current system metrics
95
- 2. Store metrics in database
96
- 3. Check for threshold alerts
97
- """
98
- metrics = await collect_metrics()
99
- metrics = await store_metrics(metrics)
100
- metrics = await check_alerts(metrics)
101
- return metrics
102
-
103
-
104
- # --- Regular Workflow (for programmatic scheduling) ---
105
- @workflow()
106
- async def cleanup_workflow(days_old: int = 30) -> dict:
107
- """
108
- Cleanup old data workflow.
109
-
110
- This workflow is scheduled programmatically in main().
111
- """
112
- print(f"[Workflow] Cleaning up data older than {days_old} days...")
113
- return {"cleaned": True, "days_old": days_old}
114
-
115
-
116
- async def main() -> None:
117
- """
118
- Create schedules programmatically.
119
-
120
- The @scheduled_workflow decorator automatically creates a schedule
121
- when activate_scheduled_workflows() is called (done by Beat).
122
-
123
- For regular @workflow functions, use create_schedule() to create
124
- schedules programmatically.
125
- """
126
- import argparse
127
-
128
- from pyworkflow import (
129
- OverlapPolicy,
130
- ScheduleSpec,
131
- create_schedule,
132
- delete_schedule,
133
- list_schedules,
134
- pause_schedule,
135
- resume_schedule,
136
- )
137
-
138
- parser = argparse.ArgumentParser(description="Schedule Management Example")
139
- parser.add_argument(
140
- "--action",
141
- choices=["create", "list", "pause", "resume", "delete"],
142
- default="create",
143
- help="Action to perform",
144
- )
145
- parser.add_argument("--schedule-id", help="Schedule ID for pause/resume/delete")
146
- args = parser.parse_args()
147
-
148
- print("=== Celery Schedules Example ===\n")
149
-
150
- if args.action == "create":
151
- # Create a schedule for the cleanup workflow
152
- print("Creating cleanup schedule (runs every 2 minutes)...")
153
- spec = ScheduleSpec(cron="*/2 * * * *", timezone="UTC")
154
-
155
- schedule = await create_schedule(
156
- workflow_name="cleanup_workflow",
157
- spec=spec,
158
- overlap_policy=OverlapPolicy.SKIP,
159
- schedule_id="cleanup-hourly",
160
- days_old=7, # kwargs passed to workflow
161
- )
162
- print(f"Schedule created: {schedule.schedule_id}")
163
- print(f" Workflow: {schedule.workflow_name}")
164
- print(f" Cron: {schedule.spec.cron}")
165
- print(f" Next run: {schedule.next_run_time}")
166
-
167
- # Also show the decorated workflow schedule
168
- print("\nThe @scheduled_workflow decorator creates:")
169
- print(" - metrics_workflow: runs every minute")
170
- print(" - Activated automatically when Beat starts")
171
-
172
- elif args.action == "list":
173
- schedules = await list_schedules()
174
- print(f"Found {len(schedules)} schedule(s):\n")
175
- for sched in schedules:
176
- print(f" {sched.schedule_id}")
177
- print(f" Workflow: {sched.workflow_name}")
178
- print(f" Status: {sched.status.value}")
179
- print(f" Spec: cron={sched.spec.cron}, interval={sched.spec.interval}")
180
- print(f" Total runs: {sched.total_runs}")
181
- print()
182
-
183
- elif args.action == "pause" and args.schedule_id:
184
- schedule = await pause_schedule(args.schedule_id)
185
- print(f"Paused schedule: {schedule.schedule_id}")
186
- print(f"Status: {schedule.status.value}")
187
-
188
- elif args.action == "resume" and args.schedule_id:
189
- schedule = await resume_schedule(args.schedule_id)
190
- print(f"Resumed schedule: {schedule.schedule_id}")
191
- print(f"Status: {schedule.status.value}")
192
-
193
- elif args.action == "delete" and args.schedule_id:
194
- await delete_schedule(args.schedule_id)
195
- print(f"Deleted schedule: {args.schedule_id}")
196
-
197
- else:
198
- print("Invalid action or missing schedule-id")
199
-
200
- print("\n=== How to Run ===")
201
- print("1. Start worker: pyworkflow --module examples.celery.durable.12_schedules worker run")
202
- print("2. Start beat: pyworkflow --module examples.celery.durable.12_schedules beat run")
203
- print("3. Watch logs to see scheduled executions!")
204
-
205
-
206
- if __name__ == "__main__":
207
- import asyncio
208
-
209
- asyncio.run(main())
@@ -1,91 +0,0 @@
1
- """
2
- Celery Transient Workflow - Basic Example
3
-
4
- This example demonstrates a simple transient workflow running on Celery workers.
5
-
6
- Transient workflows:
7
- - Do NOT record events
8
- - Do NOT persist state
9
- - Are simpler and faster
10
- - Best for short-lived, stateless tasks
11
-
12
- Prerequisites:
13
- 1. Start Redis: docker run -d -p 6379:6379 redis:7-alpine
14
- 2. Start worker: pyworkflow --module examples.celery.transient.01_basic_workflow worker run
15
-
16
- Run with CLI:
17
- pyworkflow --module examples.celery.transient.01_basic_workflow workflows run quick_task \
18
- --arg item_id=item-123
19
-
20
- Note: Since this is transient, runs list and runs status won't show this workflow.
21
- """
22
-
23
- import asyncio
24
-
25
- from pyworkflow import step, workflow
26
-
27
-
28
- @step(name="transient_process_item")
29
- async def process_item(item_id: str) -> dict:
30
- """Process a single item."""
31
- print(f"[Step] Processing item {item_id}...")
32
- await asyncio.sleep(0.5) # Simulate quick processing
33
- return {"item_id": item_id, "processed": True}
34
-
35
-
36
- @step(name="transient_enrich_item")
37
- async def enrich_item(item: dict) -> dict:
38
- """Enrich item with additional data."""
39
- print(f"[Step] Enriching item {item['item_id']}...")
40
- await asyncio.sleep(0.3)
41
- return {**item, "enriched": True, "score": 0.95}
42
-
43
-
44
- @step(name="transient_store_result")
45
- async def store_result(item: dict) -> dict:
46
- """Store the processed result."""
47
- print(f"[Step] Storing result for {item['item_id']}...")
48
- await asyncio.sleep(0.2)
49
- return {**item, "stored": True}
50
-
51
-
52
- @workflow(durable=False, tags=["celery", "transient"]) # Transient workflow - no event recording
53
- async def quick_task(item_id: str) -> dict:
54
- """
55
- Quick processing task (transient).
56
-
57
- This workflow runs without event recording for maximum performance.
58
- Ideal for:
59
- - High-throughput processing
60
- - Stateless transformations
61
- - Quick API calls
62
- - Tasks that can be safely retried from scratch
63
- """
64
- print(f"\n[Workflow] Quick task for {item_id}")
65
-
66
- item = await process_item(item_id)
67
- item = await enrich_item(item)
68
- item = await store_result(item)
69
-
70
- print(f"[Workflow] Completed: {item}\n")
71
- return item
72
-
73
-
74
- async def main() -> None:
75
- """Run the transient workflow example."""
76
- import argparse
77
-
78
- import pyworkflow
79
-
80
- parser = argparse.ArgumentParser(description="Quick Processing Task (Transient)")
81
- parser.add_argument("--item-id", default="item-123", help="Item ID to process")
82
- args = parser.parse_args()
83
-
84
- print(f"Starting quick task for {args.item_id}...")
85
- print("NOTE: This is a transient workflow - no events are recorded")
86
- run_id = await pyworkflow.start(quick_task, args.item_id)
87
- print(f"Task dispatched with run_id: {run_id}")
88
-
89
-
90
- if __name__ == "__main__":
91
- asyncio.run(main())
@@ -1,257 +0,0 @@
1
- """
2
- Celery Transient Workflow - Fault Tolerance Example
3
-
4
- This example demonstrates fault tolerance options for transient workflows.
5
-
6
- Key difference from durable workflows:
7
- - Transient workflows do NOT record events
8
- - On worker failure, there's no state to recover from
9
- - By default, failed transient workflows stay FAILED
10
- - Optionally, they can be rescheduled to run from scratch
11
-
12
- Configuration options:
13
- 1. recover_on_worker_loss=False (DEFAULT for transient)
14
- - On worker crash: workflow is marked as FAILED
15
- - No automatic retry
16
- - Use when: tasks have side effects or can't be safely repeated
17
-
18
- 2. recover_on_worker_loss=True
19
- - On worker crash: workflow is rescheduled from scratch
20
- - All steps run again (no event replay - there are no events!)
21
- - Use when: tasks are idempotent and can be safely restarted
22
-
23
- Prerequisites:
24
- 1. Start Redis: docker run -d -p 6379:6379 redis:7-alpine
25
- 2. Start worker: pyworkflow --module examples.celery.transient.02_fault_tolerance worker run
26
-
27
- Run with CLI:
28
- # Default behavior (no recovery)
29
- pyworkflow --module examples.celery.transient.02_fault_tolerance workflows run image_processor \
30
- --arg image_id=img-123
31
-
32
- # With recovery enabled
33
- pyworkflow --module examples.celery.transient.02_fault_tolerance workflows run batch_processor \
34
- --arg batch_id=batch-456
35
-
36
- To test fault tolerance:
37
- 1. Start the workflow
38
- 2. Kill the worker during execution
39
- 3. Start a new worker
40
- 4. Observe the difference between recover_on_worker_loss=True/False
41
- """
42
-
43
- import asyncio
44
-
45
- from pyworkflow import step, workflow
46
-
47
-
48
- @step(name="transient_download_image")
49
- async def download_image(image_id: str) -> dict:
50
- """Download image from storage."""
51
- print(f"[Step] Downloading image {image_id}...")
52
- await asyncio.sleep(2)
53
- return {"image_id": image_id, "size_mb": 5.2, "downloaded": True}
54
-
55
-
56
- @step(name="transient_resize_image")
57
- async def resize_image(image: dict) -> dict:
58
- """Resize image to standard dimensions."""
59
- print(f"[Step] Resizing image {image['image_id']}...")
60
- print(" (taking 8 seconds - kill worker now to test!)")
61
- await asyncio.sleep(8) # Long operation - good time to kill worker
62
- return {**image, "resized": True, "new_size_mb": 1.2}
63
-
64
-
65
- @step(name="transient_apply_filters")
66
- async def apply_filters(image: dict) -> dict:
67
- """Apply visual filters to image."""
68
- print(f"[Step] Applying filters to {image['image_id']}...")
69
- print(" (taking 6 seconds - kill worker now to test!)")
70
- await asyncio.sleep(6) # Another good time to kill worker
71
- return {**image, "filtered": True}
72
-
73
-
74
- @step(name="transient_upload_result")
75
- async def upload_result(image: dict) -> dict:
76
- """Upload processed image."""
77
- print(f"[Step] Uploading processed {image['image_id']}...")
78
- await asyncio.sleep(2)
79
- return {**image, "uploaded": True, "url": f"https://cdn.example.com/{image['image_id']}"}
80
-
81
-
82
- # ============================================================================
83
- # Workflow 1: No Recovery (Default for Transient)
84
- # ============================================================================
85
-
86
-
87
- @workflow(
88
- durable=False,
89
- recover_on_worker_loss=False, # DEFAULT for transient - no auto-recovery
90
- tags=["celery", "transient"],
91
- )
92
- async def image_processor(image_id: str) -> dict:
93
- """
94
- Image processing workflow - NO AUTO-RECOVERY.
95
-
96
- This is the default behavior for transient workflows.
97
-
98
- If a worker crashes during execution:
99
- - The workflow is marked as FAILED
100
- - No automatic retry occurs
101
- - A new workflow must be manually started
102
-
103
- Why use this:
104
- - The upload step has side effects (can't safely repeat)
105
- - Need manual review of failures
106
- - Each image should only be processed once
107
- """
108
- print(f"\n{'=' * 60}")
109
- print(f"Image Processor (NO RECOVERY): {image_id}")
110
- print("If worker crashes, workflow will FAIL permanently")
111
- print(f"{'=' * 60}\n")
112
-
113
- image = await download_image(image_id)
114
- image = await resize_image(image)
115
- image = await apply_filters(image)
116
- image = await upload_result(image)
117
-
118
- print(f"\n[Complete] Image available at: {image['url']}\n")
119
- return image
120
-
121
-
122
- # ============================================================================
123
- # Workflow 2: With Recovery (Restart from Scratch)
124
- # ============================================================================
125
-
126
-
127
- @step(name="transient_fetch_batch_items")
128
- async def fetch_batch_items(batch_id: str) -> dict:
129
- """Fetch items in a batch."""
130
- print(f"[Step] Fetching batch {batch_id}...")
131
- await asyncio.sleep(2)
132
- return {"batch_id": batch_id, "items": ["a", "b", "c", "d", "e"], "fetched": True}
133
-
134
-
135
- @step(name="transient_process_batch_items")
136
- async def process_batch_items(batch: dict) -> dict:
137
- """Process all items in batch (idempotent)."""
138
- print(f"[Step] Processing {len(batch['items'])} items (kill worker during this step!)...")
139
- for i, item in enumerate(batch["items"]):
140
- print(f" Processing item {item} ({i + 1}/{len(batch['items'])})...")
141
- await asyncio.sleep(3) # 3 seconds per item - plenty of time to kill worker
142
- return {**batch, "processed": True, "processed_count": len(batch["items"])}
143
-
144
-
145
- @step(name="transient_generate_report")
146
- async def generate_report(batch: dict) -> dict:
147
- """Generate processing report (idempotent)."""
148
- print(f"[Step] Generating report for batch {batch['batch_id']}...")
149
- await asyncio.sleep(0.5)
150
- return {
151
- **batch,
152
- "report": f"Processed {batch['processed_count']} items successfully",
153
- "reported": True,
154
- }
155
-
156
-
157
- @workflow(
158
- durable=False,
159
- recover_on_worker_loss=True, # Enable recovery - restarts from scratch
160
- max_recovery_attempts=3, # Allow up to 3 restarts
161
- tags=["celery", "transient"],
162
- )
163
- async def batch_processor(batch_id: str) -> dict:
164
- """
165
- Batch processing workflow - WITH AUTO-RECOVERY.
166
-
167
- This transient workflow will restart from scratch on worker failure.
168
-
169
- If a worker crashes during execution:
170
- - A WORKFLOW_INTERRUPTED event is recorded (even for transient!)
171
- - The workflow restarts from the beginning
172
- - All steps run again (no event replay for transient)
173
- - Up to 3 recovery attempts allowed
174
-
175
- Why use this:
176
- - All steps are idempotent (safe to repeat)
177
- - Processing can be safely restarted
178
- - Better reliability for batch jobs
179
- - Items are processed atomically (all or nothing)
180
-
181
- Note: For transient workflows, recovery means RESTART, not RESUME.
182
- Unlike durable workflows, there are no events to replay.
183
- """
184
- print(f"\n{'=' * 60}")
185
- print(f"Batch Processor (WITH RECOVERY): {batch_id}")
186
- print("If worker crashes, workflow will RESTART from scratch")
187
- print(f"{'=' * 60}\n")
188
-
189
- batch = await fetch_batch_items(batch_id)
190
- batch = await process_batch_items(batch)
191
- batch = await generate_report(batch)
192
-
193
- print(f"\n[Complete] {batch['report']}\n")
194
- return batch
195
-
196
-
197
- # ============================================================================
198
- # Comparison Helper
199
- # ============================================================================
200
-
201
-
202
- async def main() -> None:
203
- """Run the transient fault tolerance examples."""
204
- import argparse
205
-
206
- import pyworkflow
207
-
208
- parser = argparse.ArgumentParser(
209
- description="Transient Workflow Fault Tolerance Examples",
210
- formatter_class=argparse.RawDescriptionHelpFormatter,
211
- epilog="""
212
- Examples:
213
- # Run image processor (no recovery on failure)
214
- python 02_fault_tolerance.py --workflow image --id img-123
215
-
216
- # Run batch processor (restarts on failure)
217
- python 02_fault_tolerance.py --workflow batch --id batch-456
218
-
219
- To test:
220
- 1. Start the workflow
221
- 2. Kill the worker (Ctrl+C) during processing
222
- 3. Start a new worker
223
- 4. Observe: image_processor stays FAILED, batch_processor restarts
224
- """,
225
- )
226
- parser.add_argument(
227
- "--workflow",
228
- choices=["image", "batch"],
229
- default="batch",
230
- help="Which workflow to run",
231
- )
232
- parser.add_argument("--id", default="test-001", help="ID for the workflow")
233
- args = parser.parse_args()
234
-
235
- print("\n" + "=" * 60)
236
- print("TRANSIENT WORKFLOW FAULT TOLERANCE DEMO")
237
- print("=" * 60)
238
-
239
- if args.workflow == "image":
240
- print("\nRunning: image_processor (recover_on_worker_loss=False)")
241
- print("Behavior: On worker crash -> FAILED (no recovery)")
242
- run_id = await pyworkflow.start(image_processor, args.id)
243
- else:
244
- print("\nRunning: batch_processor (recover_on_worker_loss=True)")
245
- print("Behavior: On worker crash -> RESTART from scratch")
246
- run_id = await pyworkflow.start(batch_processor, args.id)
247
-
248
- print(f"\nWorkflow dispatched with run_id: {run_id}")
249
- print("\nTo test fault tolerance:")
250
- print(" 1. Watch the worker output")
251
- print(" 2. Kill the worker during processing (Ctrl+C)")
252
- print(" 3. Start a new worker")
253
- print(" 4. Observe the recovery behavior")
254
-
255
-
256
- if __name__ == "__main__":
257
- asyncio.run(main())
@@ -1,20 +0,0 @@
1
- """
2
- Celery Transient Workflow Examples
3
-
4
- These examples demonstrate transient (non-durable) workflows running on Celery workers.
5
-
6
- Transient workflows:
7
- - Do NOT persist state to storage
8
- - Do NOT record events
9
- - Cannot be resumed after suspension
10
- - Are simpler and faster for short-lived tasks
11
-
12
- Key differences from durable workflows:
13
- | Feature | Durable | Transient |
14
- |-----------------------|-------------------|-------------------|
15
- | Event recording | Yes | No |
16
- | State persistence | Yes | No |
17
- | Resumable after crash | Yes (from events) | No (starts fresh) |
18
- | Sleep behavior | Suspends workflow | Blocks inline |
19
- | Best for | Long-running | Quick tasks |
20
- """
@@ -1,25 +0,0 @@
1
- # PyWorkflow Configuration for Celery Transient Examples
2
- #
3
- # This file configures the pyworkflow CLI when running from this directory.
4
- # Simply run: pyworkflow worker run
5
- #
6
- # Priority order:
7
- # 1. --module CLI argument
8
- # 2. PYWORKFLOW_DISCOVER environment variable
9
- # 3. This config file (pyworkflow.config.yaml)
10
-
11
- # Module containing workflow definitions
12
- module: examples.celery.transient
13
-
14
- # Runtime configuration
15
- runtime: celery
16
-
17
- # Storage is minimal for transient workflows
18
- # (only used for tracking run IDs, not for event sourcing)
19
- storage:
20
- type: memory
21
-
22
- # Celery broker and result backend
23
- celery:
24
- broker: redis://localhost:6379/0
25
- result_backend: redis://localhost:6379/1
@@ -1 +0,0 @@
1
- # PyWorkflow Local Examples Package