pyworkflow-engine 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. dashboard/backend/app/__init__.py +1 -0
  2. dashboard/backend/app/config.py +32 -0
  3. dashboard/backend/app/controllers/__init__.py +6 -0
  4. dashboard/backend/app/controllers/run_controller.py +86 -0
  5. dashboard/backend/app/controllers/workflow_controller.py +33 -0
  6. dashboard/backend/app/dependencies/__init__.py +5 -0
  7. dashboard/backend/app/dependencies/storage.py +50 -0
  8. dashboard/backend/app/repositories/__init__.py +6 -0
  9. dashboard/backend/app/repositories/run_repository.py +80 -0
  10. dashboard/backend/app/repositories/workflow_repository.py +27 -0
  11. dashboard/backend/app/rest/__init__.py +8 -0
  12. dashboard/backend/app/rest/v1/__init__.py +12 -0
  13. dashboard/backend/app/rest/v1/health.py +33 -0
  14. dashboard/backend/app/rest/v1/runs.py +133 -0
  15. dashboard/backend/app/rest/v1/workflows.py +41 -0
  16. dashboard/backend/app/schemas/__init__.py +23 -0
  17. dashboard/backend/app/schemas/common.py +16 -0
  18. dashboard/backend/app/schemas/event.py +24 -0
  19. dashboard/backend/app/schemas/hook.py +25 -0
  20. dashboard/backend/app/schemas/run.py +54 -0
  21. dashboard/backend/app/schemas/step.py +28 -0
  22. dashboard/backend/app/schemas/workflow.py +31 -0
  23. dashboard/backend/app/server.py +87 -0
  24. dashboard/backend/app/services/__init__.py +6 -0
  25. dashboard/backend/app/services/run_service.py +240 -0
  26. dashboard/backend/app/services/workflow_service.py +155 -0
  27. dashboard/backend/main.py +18 -0
  28. docs/concepts/cancellation.mdx +362 -0
  29. docs/concepts/continue-as-new.mdx +434 -0
  30. docs/concepts/events.mdx +266 -0
  31. docs/concepts/fault-tolerance.mdx +370 -0
  32. docs/concepts/hooks.mdx +552 -0
  33. docs/concepts/limitations.mdx +167 -0
  34. docs/concepts/schedules.mdx +775 -0
  35. docs/concepts/sleep.mdx +312 -0
  36. docs/concepts/steps.mdx +301 -0
  37. docs/concepts/workflows.mdx +255 -0
  38. docs/guides/cli.mdx +942 -0
  39. docs/guides/configuration.mdx +560 -0
  40. docs/introduction.mdx +155 -0
  41. docs/quickstart.mdx +279 -0
  42. examples/__init__.py +1 -0
  43. examples/celery/__init__.py +1 -0
  44. examples/celery/durable/docker-compose.yml +55 -0
  45. examples/celery/durable/pyworkflow.config.yaml +12 -0
  46. examples/celery/durable/workflows/__init__.py +122 -0
  47. examples/celery/durable/workflows/basic.py +87 -0
  48. examples/celery/durable/workflows/batch_processing.py +102 -0
  49. examples/celery/durable/workflows/cancellation.py +273 -0
  50. examples/celery/durable/workflows/child_workflow_patterns.py +240 -0
  51. examples/celery/durable/workflows/child_workflows.py +202 -0
  52. examples/celery/durable/workflows/continue_as_new.py +260 -0
  53. examples/celery/durable/workflows/fault_tolerance.py +210 -0
  54. examples/celery/durable/workflows/hooks.py +211 -0
  55. examples/celery/durable/workflows/idempotency.py +112 -0
  56. examples/celery/durable/workflows/long_running.py +99 -0
  57. examples/celery/durable/workflows/retries.py +101 -0
  58. examples/celery/durable/workflows/schedules.py +209 -0
  59. examples/celery/transient/01_basic_workflow.py +91 -0
  60. examples/celery/transient/02_fault_tolerance.py +257 -0
  61. examples/celery/transient/__init__.py +20 -0
  62. examples/celery/transient/pyworkflow.config.yaml +25 -0
  63. examples/local/__init__.py +1 -0
  64. examples/local/durable/01_basic_workflow.py +94 -0
  65. examples/local/durable/02_file_storage.py +132 -0
  66. examples/local/durable/03_retries.py +169 -0
  67. examples/local/durable/04_long_running.py +119 -0
  68. examples/local/durable/05_event_log.py +145 -0
  69. examples/local/durable/06_idempotency.py +148 -0
  70. examples/local/durable/07_hooks.py +334 -0
  71. examples/local/durable/08_cancellation.py +233 -0
  72. examples/local/durable/09_child_workflows.py +198 -0
  73. examples/local/durable/10_child_workflow_patterns.py +265 -0
  74. examples/local/durable/11_continue_as_new.py +249 -0
  75. examples/local/durable/12_schedules.py +198 -0
  76. examples/local/durable/__init__.py +1 -0
  77. examples/local/transient/01_quick_tasks.py +87 -0
  78. examples/local/transient/02_retries.py +130 -0
  79. examples/local/transient/03_sleep.py +141 -0
  80. examples/local/transient/__init__.py +1 -0
  81. pyworkflow/__init__.py +256 -0
  82. pyworkflow/aws/__init__.py +68 -0
  83. pyworkflow/aws/context.py +234 -0
  84. pyworkflow/aws/handler.py +184 -0
  85. pyworkflow/aws/testing.py +310 -0
  86. pyworkflow/celery/__init__.py +41 -0
  87. pyworkflow/celery/app.py +198 -0
  88. pyworkflow/celery/scheduler.py +315 -0
  89. pyworkflow/celery/tasks.py +1746 -0
  90. pyworkflow/cli/__init__.py +132 -0
  91. pyworkflow/cli/__main__.py +6 -0
  92. pyworkflow/cli/commands/__init__.py +1 -0
  93. pyworkflow/cli/commands/hooks.py +640 -0
  94. pyworkflow/cli/commands/quickstart.py +495 -0
  95. pyworkflow/cli/commands/runs.py +773 -0
  96. pyworkflow/cli/commands/scheduler.py +130 -0
  97. pyworkflow/cli/commands/schedules.py +794 -0
  98. pyworkflow/cli/commands/setup.py +703 -0
  99. pyworkflow/cli/commands/worker.py +413 -0
  100. pyworkflow/cli/commands/workflows.py +1257 -0
  101. pyworkflow/cli/output/__init__.py +1 -0
  102. pyworkflow/cli/output/formatters.py +321 -0
  103. pyworkflow/cli/output/styles.py +121 -0
  104. pyworkflow/cli/utils/__init__.py +1 -0
  105. pyworkflow/cli/utils/async_helpers.py +30 -0
  106. pyworkflow/cli/utils/config.py +130 -0
  107. pyworkflow/cli/utils/config_generator.py +344 -0
  108. pyworkflow/cli/utils/discovery.py +53 -0
  109. pyworkflow/cli/utils/docker_manager.py +651 -0
  110. pyworkflow/cli/utils/interactive.py +364 -0
  111. pyworkflow/cli/utils/storage.py +115 -0
  112. pyworkflow/config.py +329 -0
  113. pyworkflow/context/__init__.py +63 -0
  114. pyworkflow/context/aws.py +230 -0
  115. pyworkflow/context/base.py +416 -0
  116. pyworkflow/context/local.py +930 -0
  117. pyworkflow/context/mock.py +381 -0
  118. pyworkflow/core/__init__.py +0 -0
  119. pyworkflow/core/exceptions.py +353 -0
  120. pyworkflow/core/registry.py +313 -0
  121. pyworkflow/core/scheduled.py +328 -0
  122. pyworkflow/core/step.py +494 -0
  123. pyworkflow/core/workflow.py +294 -0
  124. pyworkflow/discovery.py +248 -0
  125. pyworkflow/engine/__init__.py +0 -0
  126. pyworkflow/engine/events.py +879 -0
  127. pyworkflow/engine/executor.py +682 -0
  128. pyworkflow/engine/replay.py +273 -0
  129. pyworkflow/observability/__init__.py +19 -0
  130. pyworkflow/observability/logging.py +234 -0
  131. pyworkflow/primitives/__init__.py +33 -0
  132. pyworkflow/primitives/child_handle.py +174 -0
  133. pyworkflow/primitives/child_workflow.py +372 -0
  134. pyworkflow/primitives/continue_as_new.py +101 -0
  135. pyworkflow/primitives/define_hook.py +150 -0
  136. pyworkflow/primitives/hooks.py +97 -0
  137. pyworkflow/primitives/resume_hook.py +210 -0
  138. pyworkflow/primitives/schedule.py +545 -0
  139. pyworkflow/primitives/shield.py +96 -0
  140. pyworkflow/primitives/sleep.py +100 -0
  141. pyworkflow/runtime/__init__.py +21 -0
  142. pyworkflow/runtime/base.py +179 -0
  143. pyworkflow/runtime/celery.py +310 -0
  144. pyworkflow/runtime/factory.py +101 -0
  145. pyworkflow/runtime/local.py +706 -0
  146. pyworkflow/scheduler/__init__.py +9 -0
  147. pyworkflow/scheduler/local.py +248 -0
  148. pyworkflow/serialization/__init__.py +0 -0
  149. pyworkflow/serialization/decoder.py +146 -0
  150. pyworkflow/serialization/encoder.py +162 -0
  151. pyworkflow/storage/__init__.py +54 -0
  152. pyworkflow/storage/base.py +612 -0
  153. pyworkflow/storage/config.py +185 -0
  154. pyworkflow/storage/dynamodb.py +1315 -0
  155. pyworkflow/storage/file.py +827 -0
  156. pyworkflow/storage/memory.py +549 -0
  157. pyworkflow/storage/postgres.py +1161 -0
  158. pyworkflow/storage/schemas.py +486 -0
  159. pyworkflow/storage/sqlite.py +1136 -0
  160. pyworkflow/utils/__init__.py +0 -0
  161. pyworkflow/utils/duration.py +177 -0
  162. pyworkflow/utils/schedule.py +391 -0
  163. pyworkflow_engine-0.1.7.dist-info/METADATA +687 -0
  164. pyworkflow_engine-0.1.7.dist-info/RECORD +196 -0
  165. pyworkflow_engine-0.1.7.dist-info/WHEEL +5 -0
  166. pyworkflow_engine-0.1.7.dist-info/entry_points.txt +2 -0
  167. pyworkflow_engine-0.1.7.dist-info/licenses/LICENSE +21 -0
  168. pyworkflow_engine-0.1.7.dist-info/top_level.txt +5 -0
  169. tests/examples/__init__.py +0 -0
  170. tests/integration/__init__.py +0 -0
  171. tests/integration/test_cancellation.py +330 -0
  172. tests/integration/test_child_workflows.py +439 -0
  173. tests/integration/test_continue_as_new.py +428 -0
  174. tests/integration/test_dynamodb_storage.py +1146 -0
  175. tests/integration/test_fault_tolerance.py +369 -0
  176. tests/integration/test_schedule_storage.py +484 -0
  177. tests/unit/__init__.py +0 -0
  178. tests/unit/backends/__init__.py +1 -0
  179. tests/unit/backends/test_dynamodb_storage.py +1554 -0
  180. tests/unit/backends/test_postgres_storage.py +1281 -0
  181. tests/unit/backends/test_sqlite_storage.py +1460 -0
  182. tests/unit/conftest.py +41 -0
  183. tests/unit/test_cancellation.py +364 -0
  184. tests/unit/test_child_workflows.py +680 -0
  185. tests/unit/test_continue_as_new.py +441 -0
  186. tests/unit/test_event_limits.py +316 -0
  187. tests/unit/test_executor.py +320 -0
  188. tests/unit/test_fault_tolerance.py +334 -0
  189. tests/unit/test_hooks.py +495 -0
  190. tests/unit/test_registry.py +261 -0
  191. tests/unit/test_replay.py +420 -0
  192. tests/unit/test_schedule_schemas.py +285 -0
  193. tests/unit/test_schedule_utils.py +286 -0
  194. tests/unit/test_scheduled_workflow.py +274 -0
  195. tests/unit/test_step.py +353 -0
  196. tests/unit/test_workflow.py +243 -0
@@ -0,0 +1,827 @@
1
+ """
2
+ File-based storage backend using JSON files.
3
+
4
+ This backend stores workflow data in local JSON files, suitable for:
5
+ - Development and testing
6
+ - Single-machine deployments
7
+ - Low-volume production use
8
+
9
+ Data is stored in a directory structure:
10
+ base_path/
11
+ runs/
12
+ {run_id}.json
13
+ events/
14
+ {run_id}.jsonl (append-only)
15
+ steps/
16
+ {step_id}.json
17
+ hooks/
18
+ {hook_id}.json
19
+ schedules/
20
+ {schedule_id}.json
21
+ _token_index.json (token -> hook_id mapping)
22
+ """
23
+
24
+ import asyncio
25
+ import json
26
+ from datetime import UTC, datetime
27
+ from pathlib import Path
28
+
29
+ from filelock import FileLock
30
+
31
+ from pyworkflow.engine.events import Event, EventType
32
+ from pyworkflow.storage.base import StorageBackend
33
+ from pyworkflow.storage.schemas import (
34
+ Hook,
35
+ HookStatus,
36
+ RunStatus,
37
+ Schedule,
38
+ ScheduleStatus,
39
+ StepExecution,
40
+ WorkflowRun,
41
+ )
42
+
43
+
44
+ class FileStorageBackend(StorageBackend):
45
+ """
46
+ File-based storage backend using JSON files.
47
+
48
+ Thread-safe using file locks for concurrent access.
49
+ """
50
+
51
+ def __init__(self, base_path: str = "./pyworkflow_data"):
52
+ """
53
+ Initialize file storage backend.
54
+
55
+ Args:
56
+ base_path: Base directory for storing workflow data
57
+ """
58
+ self.base_path = Path(base_path)
59
+ self.runs_dir = self.base_path / "runs"
60
+ self.events_dir = self.base_path / "events"
61
+ self.steps_dir = self.base_path / "steps"
62
+ self.hooks_dir = self.base_path / "hooks"
63
+ self.schedules_dir = self.base_path / "schedules"
64
+ self.locks_dir = self.base_path / ".locks"
65
+ self._token_index_file = self.base_path / "_token_index.json"
66
+
67
+ # Create directories
68
+ for dir_path in [
69
+ self.runs_dir,
70
+ self.events_dir,
71
+ self.steps_dir,
72
+ self.hooks_dir,
73
+ self.schedules_dir,
74
+ self.locks_dir,
75
+ ]:
76
+ dir_path.mkdir(parents=True, exist_ok=True)
77
+
78
+ # Workflow Run Operations
79
+
80
+ async def create_run(self, run: WorkflowRun) -> None:
81
+ """Create a new workflow run record."""
82
+ run_file = self.runs_dir / f"{run.run_id}.json"
83
+
84
+ if run_file.exists():
85
+ raise ValueError(f"Workflow run {run.run_id} already exists")
86
+
87
+ data = run.to_dict()
88
+
89
+ # Use file lock for thread safety
90
+ lock_file = self.locks_dir / f"{run.run_id}.lock"
91
+ lock = FileLock(str(lock_file))
92
+
93
+ def _write() -> None:
94
+ with lock:
95
+ run_file.write_text(json.dumps(data, indent=2))
96
+
97
+ await asyncio.to_thread(_write)
98
+
99
+ async def get_run(self, run_id: str) -> WorkflowRun | None:
100
+ """Retrieve a workflow run by ID."""
101
+ run_file = self.runs_dir / f"{run_id}.json"
102
+
103
+ if not run_file.exists():
104
+ return None
105
+
106
+ lock_file = self.locks_dir / f"{run_id}.lock"
107
+ lock = FileLock(str(lock_file))
108
+
109
+ def _read() -> dict | None:
110
+ with lock:
111
+ if not run_file.exists():
112
+ return None
113
+ content = run_file.read_text()
114
+ if not content.strip():
115
+ # File exists but is empty (race condition) - treat as not found
116
+ return None
117
+ return json.loads(content)
118
+
119
+ data = await asyncio.to_thread(_read)
120
+ return WorkflowRun.from_dict(data) if data else None
121
+
122
+ async def get_run_by_idempotency_key(self, key: str) -> WorkflowRun | None:
123
+ """Retrieve a workflow run by idempotency key."""
124
+
125
+ def _search() -> dict | None:
126
+ for run_file in self.runs_dir.glob("*.json"):
127
+ data = json.loads(run_file.read_text())
128
+ if data.get("idempotency_key") == key:
129
+ return data
130
+ return None
131
+
132
+ data = await asyncio.to_thread(_search)
133
+ return WorkflowRun.from_dict(data) if data else None
134
+
135
+ async def update_run_status(
136
+ self,
137
+ run_id: str,
138
+ status: RunStatus,
139
+ result: str | None = None,
140
+ error: str | None = None,
141
+ ) -> None:
142
+ """Update workflow run status."""
143
+ run_file = self.runs_dir / f"{run_id}.json"
144
+
145
+ if not run_file.exists():
146
+ raise ValueError(f"Workflow run {run_id} not found")
147
+
148
+ lock_file = self.locks_dir / f"{run_id}.lock"
149
+ lock = FileLock(str(lock_file))
150
+
151
+ def _update() -> None:
152
+ with lock:
153
+ data = json.loads(run_file.read_text())
154
+ data["status"] = status.value
155
+ data["updated_at"] = datetime.now(UTC).isoformat()
156
+
157
+ if result is not None:
158
+ data["result"] = result
159
+
160
+ if error is not None:
161
+ data["error"] = error
162
+
163
+ if status == RunStatus.COMPLETED:
164
+ data["completed_at"] = datetime.now(UTC).isoformat()
165
+
166
+ run_file.write_text(json.dumps(data, indent=2))
167
+
168
+ await asyncio.to_thread(_update)
169
+
170
+ async def update_run_recovery_attempts(
171
+ self,
172
+ run_id: str,
173
+ recovery_attempts: int,
174
+ ) -> None:
175
+ """Update the recovery attempts counter for a workflow run."""
176
+ run_file = self.runs_dir / f"{run_id}.json"
177
+
178
+ if not run_file.exists():
179
+ raise ValueError(f"Workflow run {run_id} not found")
180
+
181
+ lock_file = self.locks_dir / f"{run_id}.lock"
182
+ lock = FileLock(str(lock_file))
183
+
184
+ def _update() -> None:
185
+ with lock:
186
+ data = json.loads(run_file.read_text())
187
+ data["recovery_attempts"] = recovery_attempts
188
+ data["updated_at"] = datetime.now(UTC).isoformat()
189
+ run_file.write_text(json.dumps(data, indent=2))
190
+
191
+ await asyncio.to_thread(_update)
192
+
193
+ async def list_runs(
194
+ self,
195
+ query: str | None = None,
196
+ status: RunStatus | None = None,
197
+ start_time: datetime | None = None,
198
+ end_time: datetime | None = None,
199
+ limit: int = 100,
200
+ cursor: str | None = None,
201
+ ) -> tuple[list[WorkflowRun], str | None]:
202
+ """List workflow runs with optional filtering and cursor-based pagination."""
203
+
204
+ def _list() -> tuple[list[dict], str | None]:
205
+ runs = []
206
+ query_lower = query.lower() if query else None
207
+
208
+ for run_file in self.runs_dir.glob("*.json"):
209
+ data = json.loads(run_file.read_text())
210
+
211
+ # Apply query filter (case-insensitive substring in workflow_name or input_kwargs)
212
+ if query_lower:
213
+ workflow_name = data.get("workflow_name", "").lower()
214
+ input_kwargs = json.dumps(data.get("input_kwargs", {})).lower()
215
+ if query_lower not in workflow_name and query_lower not in input_kwargs:
216
+ continue
217
+
218
+ # Apply status filter
219
+ if status and data.get("status") != status.value:
220
+ continue
221
+
222
+ # Apply time filters (based on started_at)
223
+ started_at_str = data.get("started_at")
224
+ if started_at_str:
225
+ started_at = datetime.fromisoformat(started_at_str)
226
+ # Make timezone-aware comparison if needed
227
+ if start_time and started_at < start_time:
228
+ continue
229
+ if end_time and started_at >= end_time:
230
+ continue
231
+ elif start_time or end_time:
232
+ # If run hasn't started yet and we have time filters, skip it
233
+ continue
234
+
235
+ runs.append(data)
236
+
237
+ # Sort by (created_at DESC, run_id DESC) for deterministic ordering
238
+ runs.sort(key=lambda r: (r.get("created_at", ""), r.get("run_id", "")), reverse=True)
239
+
240
+ # Apply cursor-based pagination
241
+ if cursor:
242
+ # Find the cursor position and start after it
243
+ cursor_found = False
244
+ filtered_runs = []
245
+ for run in runs:
246
+ if cursor_found:
247
+ filtered_runs.append(run)
248
+ elif run.get("run_id") == cursor:
249
+ cursor_found = True
250
+ runs = filtered_runs
251
+
252
+ # Apply limit and determine next_cursor
253
+ if len(runs) > limit:
254
+ result_runs = runs[:limit]
255
+ next_cursor = result_runs[-1].get("run_id") if result_runs else None
256
+ else:
257
+ result_runs = runs[:limit]
258
+ next_cursor = None
259
+
260
+ return result_runs, next_cursor
261
+
262
+ run_data_list, next_cursor = await asyncio.to_thread(_list)
263
+ return [WorkflowRun.from_dict(data) for data in run_data_list], next_cursor
264
+
265
+ # Event Log Operations
266
+
267
+ async def record_event(self, event: Event) -> None:
268
+ """Record an event to the append-only event log."""
269
+ events_file = self.events_dir / f"{event.run_id}.jsonl"
270
+ lock_file = self.locks_dir / f"events_{event.run_id}.lock"
271
+ lock = FileLock(str(lock_file))
272
+
273
+ def _append() -> None:
274
+ with lock:
275
+ # Get next sequence number
276
+ sequence = 1
277
+ if events_file.exists():
278
+ with events_file.open("r") as f:
279
+ for line in f:
280
+ if line.strip():
281
+ sequence += 1
282
+
283
+ event.sequence = sequence
284
+
285
+ # Append event
286
+ event_data = {
287
+ "event_id": event.event_id,
288
+ "run_id": event.run_id,
289
+ "type": event.type.value,
290
+ "sequence": event.sequence,
291
+ "timestamp": event.timestamp.isoformat(),
292
+ "data": event.data,
293
+ }
294
+
295
+ with events_file.open("a") as f:
296
+ f.write(json.dumps(event_data) + "\n")
297
+
298
+ await asyncio.to_thread(_append)
299
+
300
+ async def get_events(
301
+ self,
302
+ run_id: str,
303
+ event_types: list[str] | None = None,
304
+ ) -> list[Event]:
305
+ """Retrieve all events for a workflow run."""
306
+ events_file = self.events_dir / f"{run_id}.jsonl"
307
+
308
+ if not events_file.exists():
309
+ return []
310
+
311
+ def _read() -> list[Event]:
312
+ events = []
313
+ with events_file.open("r") as f:
314
+ for line in f:
315
+ if not line.strip():
316
+ continue
317
+
318
+ data = json.loads(line)
319
+
320
+ # Apply type filter
321
+ if event_types and data["type"] not in event_types:
322
+ continue
323
+
324
+ events.append(
325
+ Event(
326
+ event_id=data["event_id"],
327
+ run_id=data["run_id"],
328
+ type=EventType(data["type"]),
329
+ sequence=data["sequence"],
330
+ timestamp=datetime.fromisoformat(data["timestamp"]),
331
+ data=data["data"],
332
+ )
333
+ )
334
+
335
+ return sorted(events, key=lambda e: e.sequence or 0)
336
+
337
+ return await asyncio.to_thread(_read)
338
+
339
+ async def get_latest_event(
340
+ self,
341
+ run_id: str,
342
+ event_type: str | None = None,
343
+ ) -> Event | None:
344
+ """Get the latest event for a run."""
345
+ events = await self.get_events(run_id, event_types=[event_type] if event_type else None)
346
+ return events[-1] if events else None
347
+
348
+ # Step Operations
349
+
350
+ async def create_step(self, step: StepExecution) -> None:
351
+ """Create a step execution record."""
352
+ step_file = self.steps_dir / f"{step.step_id}.json"
353
+
354
+ if step_file.exists():
355
+ raise ValueError(f"Step {step.step_id} already exists")
356
+
357
+ data = step.to_dict()
358
+
359
+ def _write() -> None:
360
+ step_file.write_text(json.dumps(data, indent=2))
361
+
362
+ await asyncio.to_thread(_write)
363
+
364
+ async def get_step(self, step_id: str) -> StepExecution | None:
365
+ """Retrieve a step execution by ID."""
366
+ step_file = self.steps_dir / f"{step_id}.json"
367
+
368
+ if not step_file.exists():
369
+ return None
370
+
371
+ def _read() -> dict:
372
+ return json.loads(step_file.read_text())
373
+
374
+ data = await asyncio.to_thread(_read)
375
+ return StepExecution.from_dict(data)
376
+
377
+ async def update_step_status(
378
+ self,
379
+ step_id: str,
380
+ status: str,
381
+ result: str | None = None,
382
+ error: str | None = None,
383
+ ) -> None:
384
+ """Update step execution status."""
385
+ step_file = self.steps_dir / f"{step_id}.json"
386
+
387
+ if not step_file.exists():
388
+ raise ValueError(f"Step {step_id} not found")
389
+
390
+ def _update() -> None:
391
+ data = json.loads(step_file.read_text())
392
+ data["status"] = status
393
+ data["updated_at"] = datetime.utcnow().isoformat()
394
+
395
+ if result is not None:
396
+ data["result"] = result
397
+
398
+ if error is not None:
399
+ data["error"] = error
400
+
401
+ if status == "completed":
402
+ data["completed_at"] = datetime.utcnow().isoformat()
403
+
404
+ step_file.write_text(json.dumps(data, indent=2))
405
+
406
+ await asyncio.to_thread(_update)
407
+
408
+ async def list_steps(self, run_id: str) -> list[StepExecution]:
409
+ """List all steps for a workflow run."""
410
+
411
+ def _list() -> list[dict]:
412
+ steps = []
413
+ for step_file in self.steps_dir.glob("*.json"):
414
+ data = json.loads(step_file.read_text())
415
+ if data.get("run_id") == run_id:
416
+ steps.append(data)
417
+
418
+ # Sort by created_at
419
+ steps.sort(key=lambda s: s.get("created_at", ""))
420
+ return steps
421
+
422
+ step_data_list = await asyncio.to_thread(_list)
423
+ return [StepExecution.from_dict(data) for data in step_data_list]
424
+
425
+ # Hook Operations
426
+
427
+ def _load_token_index(self) -> dict:
428
+ """Load the token -> hook_id index."""
429
+ if self._token_index_file.exists():
430
+ return json.loads(self._token_index_file.read_text())
431
+ return {}
432
+
433
+ def _save_token_index(self, index: dict) -> None:
434
+ """Save the token -> hook_id index."""
435
+ self._token_index_file.write_text(json.dumps(index, indent=2))
436
+
437
+ async def create_hook(self, hook: Hook) -> None:
438
+ """Create a hook record."""
439
+ hook_file = self.hooks_dir / f"{hook.hook_id}.json"
440
+ lock_file = self.locks_dir / "token_index.lock"
441
+ lock = FileLock(str(lock_file))
442
+
443
+ data = hook.to_dict()
444
+
445
+ def _write() -> None:
446
+ with lock:
447
+ hook_file.write_text(json.dumps(data, indent=2))
448
+ # Update token index
449
+ index = self._load_token_index()
450
+ index[hook.token] = hook.hook_id
451
+ self._save_token_index(index)
452
+
453
+ await asyncio.to_thread(_write)
454
+
455
+ async def get_hook(self, hook_id: str) -> Hook | None:
456
+ """Retrieve a hook by ID."""
457
+ hook_file = self.hooks_dir / f"{hook_id}.json"
458
+
459
+ if not hook_file.exists():
460
+ return None
461
+
462
+ def _read() -> dict:
463
+ return json.loads(hook_file.read_text())
464
+
465
+ data = await asyncio.to_thread(_read)
466
+ return Hook.from_dict(data)
467
+
468
+ async def get_hook_by_token(self, token: str) -> Hook | None:
469
+ """Retrieve a hook by its token."""
470
+
471
+ def _lookup() -> str | None:
472
+ index = self._load_token_index()
473
+ return index.get(token)
474
+
475
+ hook_id = await asyncio.to_thread(_lookup)
476
+ if hook_id:
477
+ return await self.get_hook(hook_id)
478
+ return None
479
+
480
+ async def update_hook_status(
481
+ self,
482
+ hook_id: str,
483
+ status: HookStatus,
484
+ payload: str | None = None,
485
+ ) -> None:
486
+ """Update hook status and optionally payload."""
487
+ hook_file = self.hooks_dir / f"{hook_id}.json"
488
+
489
+ if not hook_file.exists():
490
+ raise ValueError(f"Hook {hook_id} not found")
491
+
492
+ lock_file = self.locks_dir / f"hook_{hook_id}.lock"
493
+ lock = FileLock(str(lock_file))
494
+
495
+ def _update() -> None:
496
+ with lock:
497
+ data = json.loads(hook_file.read_text())
498
+ data["status"] = status.value
499
+
500
+ if payload is not None:
501
+ data["payload"] = payload
502
+
503
+ if status == HookStatus.RECEIVED:
504
+ data["received_at"] = datetime.now(UTC).isoformat()
505
+
506
+ hook_file.write_text(json.dumps(data, indent=2))
507
+
508
+ await asyncio.to_thread(_update)
509
+
510
+ async def list_hooks(
511
+ self,
512
+ run_id: str | None = None,
513
+ status: HookStatus | None = None,
514
+ limit: int = 100,
515
+ offset: int = 0,
516
+ ) -> list[Hook]:
517
+ """List hooks with optional filtering."""
518
+
519
+ def _list() -> list[dict]:
520
+ hooks = []
521
+ for hook_file in self.hooks_dir.glob("*.json"):
522
+ data = json.loads(hook_file.read_text())
523
+
524
+ # Apply filters
525
+ if run_id and data.get("run_id") != run_id:
526
+ continue
527
+ if status and data.get("status") != status.value:
528
+ continue
529
+
530
+ hooks.append(data)
531
+
532
+ # Sort by created_at descending
533
+ hooks.sort(key=lambda h: h.get("created_at", ""), reverse=True)
534
+
535
+ # Apply pagination
536
+ return hooks[offset : offset + limit]
537
+
538
+ hook_data_list = await asyncio.to_thread(_list)
539
+ return [Hook.from_dict(data) for data in hook_data_list]
540
+
541
+ # Cancellation Flag Operations
542
+
543
+ async def set_cancellation_flag(self, run_id: str) -> None:
544
+ """Set a cancellation flag for a workflow run."""
545
+ cancel_file = self.runs_dir / f"{run_id}.cancel"
546
+ lock_file = self.locks_dir / f"{run_id}_cancel.lock"
547
+ lock = FileLock(str(lock_file))
548
+
549
+ def _write() -> None:
550
+ with lock:
551
+ cancel_file.write_text(datetime.now(UTC).isoformat())
552
+
553
+ await asyncio.to_thread(_write)
554
+
555
+ async def check_cancellation_flag(self, run_id: str) -> bool:
556
+ """Check if a cancellation flag is set for a workflow run."""
557
+ cancel_file = self.runs_dir / f"{run_id}.cancel"
558
+
559
+ def _check() -> bool:
560
+ return cancel_file.exists()
561
+
562
+ return await asyncio.to_thread(_check)
563
+
564
+ async def clear_cancellation_flag(self, run_id: str) -> None:
565
+ """Clear the cancellation flag for a workflow run."""
566
+ cancel_file = self.runs_dir / f"{run_id}.cancel"
567
+ lock_file = self.locks_dir / f"{run_id}_cancel.lock"
568
+ lock = FileLock(str(lock_file))
569
+
570
+ def _clear() -> None:
571
+ with lock:
572
+ if cancel_file.exists():
573
+ cancel_file.unlink()
574
+
575
+ await asyncio.to_thread(_clear)
576
+
577
+ # Continue-As-New Chain Operations
578
+
579
+ async def update_run_continuation(
580
+ self,
581
+ run_id: str,
582
+ continued_to_run_id: str,
583
+ ) -> None:
584
+ """Update the continuation link for a workflow run."""
585
+ run_file = self.runs_dir / f"{run_id}.json"
586
+
587
+ if not run_file.exists():
588
+ raise ValueError(f"Workflow run {run_id} not found")
589
+
590
+ lock_file = self.locks_dir / f"{run_id}.lock"
591
+ lock = FileLock(str(lock_file))
592
+
593
+ def _update() -> None:
594
+ with lock:
595
+ data = json.loads(run_file.read_text())
596
+ data["continued_to_run_id"] = continued_to_run_id
597
+ data["updated_at"] = datetime.now(UTC).isoformat()
598
+ run_file.write_text(json.dumps(data, indent=2))
599
+
600
+ await asyncio.to_thread(_update)
601
+
602
+ async def get_workflow_chain(
603
+ self,
604
+ run_id: str,
605
+ ) -> list[WorkflowRun]:
606
+ """Get all runs in a continue-as-new chain."""
607
+ run = await self.get_run(run_id)
608
+ if not run:
609
+ return []
610
+
611
+ # Walk backwards to find the start of the chain
612
+ current = run
613
+ while current.continued_from_run_id:
614
+ prev = await self.get_run(current.continued_from_run_id)
615
+ if not prev:
616
+ break
617
+ current = prev
618
+
619
+ # Build chain from start to end
620
+ chain = [current]
621
+ while current.continued_to_run_id:
622
+ next_run = await self.get_run(current.continued_to_run_id)
623
+ if not next_run:
624
+ break
625
+ chain.append(next_run)
626
+ current = next_run
627
+
628
+ return chain
629
+
630
+ # Child Workflow Operations
631
+
632
+ async def get_children(
633
+ self,
634
+ parent_run_id: str,
635
+ status: RunStatus | None = None,
636
+ ) -> list[WorkflowRun]:
637
+ """Get all child workflow runs for a parent workflow."""
638
+
639
+ def _list() -> list[dict]:
640
+ children = []
641
+ for run_file in self.runs_dir.glob("*.json"):
642
+ data = json.loads(run_file.read_text())
643
+
644
+ # Filter by parent_run_id
645
+ if data.get("parent_run_id") != parent_run_id:
646
+ continue
647
+
648
+ # Filter by status if provided
649
+ if status and data.get("status") != status.value:
650
+ continue
651
+
652
+ children.append(data)
653
+
654
+ # Sort by created_at
655
+ children.sort(key=lambda r: r.get("created_at", ""))
656
+ return children
657
+
658
+ child_data_list = await asyncio.to_thread(_list)
659
+ return [WorkflowRun.from_dict(data) for data in child_data_list]
660
+
661
+ async def get_parent(self, run_id: str) -> WorkflowRun | None:
662
+ """Get the parent workflow run for a child workflow."""
663
+ run = await self.get_run(run_id)
664
+ if run and run.parent_run_id:
665
+ return await self.get_run(run.parent_run_id)
666
+ return None
667
+
668
+ async def get_nesting_depth(self, run_id: str) -> int:
669
+ """Get the nesting depth for a workflow."""
670
+ run = await self.get_run(run_id)
671
+ return run.nesting_depth if run else 0
672
+
673
+ # Schedule Operations
674
+
675
+ async def create_schedule(self, schedule: Schedule) -> None:
676
+ """Create a new schedule record."""
677
+ schedule_file = self.schedules_dir / f"{schedule.schedule_id}.json"
678
+
679
+ if schedule_file.exists():
680
+ raise ValueError(f"Schedule {schedule.schedule_id} already exists")
681
+
682
+ data = schedule.to_dict()
683
+
684
+ lock_file = self.locks_dir / f"schedule_{schedule.schedule_id}.lock"
685
+ lock = FileLock(str(lock_file))
686
+
687
+ def _write() -> None:
688
+ with lock:
689
+ schedule_file.write_text(json.dumps(data, indent=2))
690
+
691
+ await asyncio.to_thread(_write)
692
+
693
+ async def get_schedule(self, schedule_id: str) -> Schedule | None:
694
+ """Retrieve a schedule by ID."""
695
+ schedule_file = self.schedules_dir / f"{schedule_id}.json"
696
+
697
+ if not schedule_file.exists():
698
+ return None
699
+
700
+ lock_file = self.locks_dir / f"schedule_{schedule_id}.lock"
701
+ lock = FileLock(str(lock_file))
702
+
703
+ def _read() -> dict | None:
704
+ with lock:
705
+ if not schedule_file.exists():
706
+ return None
707
+ return json.loads(schedule_file.read_text())
708
+
709
+ data = await asyncio.to_thread(_read)
710
+ return Schedule.from_dict(data) if data else None
711
+
712
+ async def update_schedule(self, schedule: Schedule) -> None:
713
+ """Update an existing schedule."""
714
+ schedule_file = self.schedules_dir / f"{schedule.schedule_id}.json"
715
+
716
+ if not schedule_file.exists():
717
+ raise ValueError(f"Schedule {schedule.schedule_id} does not exist")
718
+
719
+ data = schedule.to_dict()
720
+
721
+ lock_file = self.locks_dir / f"schedule_{schedule.schedule_id}.lock"
722
+ lock = FileLock(str(lock_file))
723
+
724
+ def _write() -> None:
725
+ with lock:
726
+ schedule_file.write_text(json.dumps(data, indent=2))
727
+
728
+ await asyncio.to_thread(_write)
729
+
730
+ async def delete_schedule(self, schedule_id: str) -> None:
731
+ """Mark a schedule as deleted (soft delete)."""
732
+ schedule = await self.get_schedule(schedule_id)
733
+
734
+ if not schedule:
735
+ raise ValueError(f"Schedule {schedule_id} does not exist")
736
+
737
+ schedule.status = ScheduleStatus.DELETED
738
+ schedule.updated_at = datetime.now(UTC)
739
+ await self.update_schedule(schedule)
740
+
741
+ async def list_schedules(
742
+ self,
743
+ workflow_name: str | None = None,
744
+ status: ScheduleStatus | None = None,
745
+ limit: int = 100,
746
+ offset: int = 0,
747
+ ) -> list[Schedule]:
748
+ """List schedules with optional filtering."""
749
+
750
+ def _list() -> list[dict]:
751
+ schedules = []
752
+ for schedule_file in self.schedules_dir.glob("*.json"):
753
+ try:
754
+ data = json.loads(schedule_file.read_text())
755
+
756
+ # Apply filters
757
+ if workflow_name and data.get("workflow_name") != workflow_name:
758
+ continue
759
+ if status and data.get("status") != status.value:
760
+ continue
761
+
762
+ schedules.append(data)
763
+ except (json.JSONDecodeError, KeyError):
764
+ continue
765
+
766
+ # Sort by created_at descending
767
+ schedules.sort(key=lambda x: x.get("created_at", ""), reverse=True)
768
+
769
+ # Apply pagination
770
+ return schedules[offset : offset + limit]
771
+
772
+ schedule_data_list = await asyncio.to_thread(_list)
773
+ return [Schedule.from_dict(data) for data in schedule_data_list]
774
+
775
+ async def get_due_schedules(self, now: datetime) -> list[Schedule]:
776
+ """Get all schedules that are due to run."""
777
+ now_iso = now.isoformat()
778
+
779
+ def _list_due() -> list[dict]:
780
+ due_schedules = []
781
+ for schedule_file in self.schedules_dir.glob("*.json"):
782
+ try:
783
+ data = json.loads(schedule_file.read_text())
784
+
785
+ # Check criteria
786
+ if data.get("status") != ScheduleStatus.ACTIVE.value:
787
+ continue
788
+ next_run = data.get("next_run_time")
789
+ if not next_run:
790
+ continue
791
+ if next_run > now_iso:
792
+ continue
793
+
794
+ due_schedules.append(data)
795
+ except (json.JSONDecodeError, KeyError):
796
+ continue
797
+
798
+ # Sort by next_run_time ascending
799
+ due_schedules.sort(key=lambda x: x.get("next_run_time", ""))
800
+ return due_schedules
801
+
802
+ schedule_data_list = await asyncio.to_thread(_list_due)
803
+ return [Schedule.from_dict(data) for data in schedule_data_list]
804
+
805
+ async def add_running_run(self, schedule_id: str, run_id: str) -> None:
806
+ """Add a run_id to the schedule's running_run_ids list."""
807
+ schedule = await self.get_schedule(schedule_id)
808
+
809
+ if not schedule:
810
+ raise ValueError(f"Schedule {schedule_id} does not exist")
811
+
812
+ if run_id not in schedule.running_run_ids:
813
+ schedule.running_run_ids.append(run_id)
814
+ schedule.updated_at = datetime.now(UTC)
815
+ await self.update_schedule(schedule)
816
+
817
+ async def remove_running_run(self, schedule_id: str, run_id: str) -> None:
818
+ """Remove a run_id from the schedule's running_run_ids list."""
819
+ schedule = await self.get_schedule(schedule_id)
820
+
821
+ if not schedule:
822
+ raise ValueError(f"Schedule {schedule_id} does not exist")
823
+
824
+ if run_id in schedule.running_run_ids:
825
+ schedule.running_run_ids.remove(run_id)
826
+ schedule.updated_at = datetime.now(UTC)
827
+ await self.update_schedule(schedule)