atomicguard 0.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atomicguard/__init__.py +8 -3
- atomicguard/application/action_pair.py +7 -1
- atomicguard/application/agent.py +46 -6
- atomicguard/application/workflow.py +494 -11
- atomicguard/domain/__init__.py +4 -1
- atomicguard/domain/exceptions.py +19 -0
- atomicguard/domain/interfaces.py +137 -6
- atomicguard/domain/models.py +120 -6
- atomicguard/guards/__init__.py +16 -5
- atomicguard/guards/composite/__init__.py +11 -0
- atomicguard/guards/dynamic/__init__.py +13 -0
- atomicguard/guards/dynamic/test_runner.py +207 -0
- atomicguard/guards/interactive/__init__.py +11 -0
- atomicguard/guards/static/__init__.py +13 -0
- atomicguard/guards/static/imports.py +177 -0
- atomicguard/infrastructure/__init__.py +4 -1
- atomicguard/infrastructure/llm/__init__.py +3 -1
- atomicguard/infrastructure/llm/huggingface.py +180 -0
- atomicguard/infrastructure/llm/mock.py +32 -6
- atomicguard/infrastructure/llm/ollama.py +40 -17
- atomicguard/infrastructure/persistence/__init__.py +7 -1
- atomicguard/infrastructure/persistence/checkpoint.py +361 -0
- atomicguard/infrastructure/persistence/filesystem.py +69 -5
- atomicguard/infrastructure/persistence/memory.py +25 -3
- atomicguard/infrastructure/registry.py +126 -0
- atomicguard/schemas/__init__.py +142 -0
- {atomicguard-0.1.0.dist-info → atomicguard-1.2.0.dist-info}/METADATA +75 -13
- atomicguard-1.2.0.dist-info/RECORD +37 -0
- {atomicguard-0.1.0.dist-info → atomicguard-1.2.0.dist-info}/WHEEL +1 -1
- atomicguard-1.2.0.dist-info/entry_points.txt +4 -0
- atomicguard/guards/test_runner.py +0 -176
- atomicguard-0.1.0.dist-info/RECORD +0 -27
- /atomicguard/guards/{base.py → composite/base.py} +0 -0
- /atomicguard/guards/{human.py → interactive/human.py} +0 -0
- /atomicguard/guards/{syntax.py → static/syntax.py} +0 -0
- {atomicguard-0.1.0.dist-info → atomicguard-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {atomicguard-0.1.0.dist-info → atomicguard-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Filesystem implementation of the Checkpoint DAG.
|
|
3
|
+
|
|
4
|
+
Provides persistent storage for workflow checkpoints and human amendments,
|
|
5
|
+
enabling resumable workflows after failure/escalation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from atomicguard.domain.interfaces import CheckpointDAGInterface
|
|
13
|
+
from atomicguard.domain.models import (
|
|
14
|
+
AmendmentType,
|
|
15
|
+
FailureType,
|
|
16
|
+
HumanAmendment,
|
|
17
|
+
WorkflowCheckpoint,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FilesystemCheckpointDAG(CheckpointDAGInterface):
|
|
22
|
+
"""
|
|
23
|
+
Persistent storage for workflow checkpoints and amendments.
|
|
24
|
+
|
|
25
|
+
Directory structure:
|
|
26
|
+
{base_dir}/
|
|
27
|
+
checkpoints/
|
|
28
|
+
{prefix}/{checkpoint_id}.json
|
|
29
|
+
amendments/
|
|
30
|
+
{prefix}/{amendment_id}.json
|
|
31
|
+
checkpoint_index.json # Maps workflow_id -> checkpoint_ids
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, base_dir: str):
|
|
35
|
+
self._base_dir = Path(base_dir)
|
|
36
|
+
self._checkpoints_dir = self._base_dir / "checkpoints"
|
|
37
|
+
self._amendments_dir = self._base_dir / "amendments"
|
|
38
|
+
self._index_path = self._base_dir / "checkpoint_index.json"
|
|
39
|
+
self._cache_checkpoints: dict[str, WorkflowCheckpoint] = {}
|
|
40
|
+
self._cache_amendments: dict[str, HumanAmendment] = {}
|
|
41
|
+
self._index: dict[str, Any] = self._load_or_create_index()
|
|
42
|
+
|
|
43
|
+
def _load_or_create_index(self) -> dict[str, Any]:
|
|
44
|
+
"""Load existing index or create new one."""
|
|
45
|
+
self._base_dir.mkdir(parents=True, exist_ok=True)
|
|
46
|
+
self._checkpoints_dir.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
self._amendments_dir.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
|
|
49
|
+
if self._index_path.exists():
|
|
50
|
+
with open(self._index_path) as f:
|
|
51
|
+
result: dict[str, Any] = json.load(f)
|
|
52
|
+
return result
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
"version": "1.0",
|
|
56
|
+
"checkpoints": {}, # checkpoint_id -> metadata
|
|
57
|
+
"amendments": {}, # amendment_id -> metadata
|
|
58
|
+
"by_workflow": {}, # workflow_id -> [checkpoint_ids]
|
|
59
|
+
"by_checkpoint": {}, # checkpoint_id -> [amendment_ids]
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
def _update_index_atomic(self) -> None:
|
|
63
|
+
"""Atomically update index.json using write-to-temp + rename."""
|
|
64
|
+
temp_path = self._index_path.with_suffix(".tmp")
|
|
65
|
+
with open(temp_path, "w") as f:
|
|
66
|
+
json.dump(self._index, f, indent=2)
|
|
67
|
+
temp_path.rename(self._index_path)
|
|
68
|
+
|
|
69
|
+
def _get_checkpoint_path(self, checkpoint_id: str) -> Path:
|
|
70
|
+
"""Get filesystem path for checkpoint (using prefix directories)."""
|
|
71
|
+
prefix = checkpoint_id[:2]
|
|
72
|
+
return self._checkpoints_dir / prefix / f"{checkpoint_id}.json"
|
|
73
|
+
|
|
74
|
+
def _get_amendment_path(self, amendment_id: str) -> Path:
|
|
75
|
+
"""Get filesystem path for amendment (using prefix directories)."""
|
|
76
|
+
prefix = amendment_id[:2]
|
|
77
|
+
return self._amendments_dir / prefix / f"{amendment_id}.json"
|
|
78
|
+
|
|
79
|
+
def _checkpoint_to_dict(self, checkpoint: WorkflowCheckpoint) -> dict:
|
|
80
|
+
"""Serialize checkpoint to JSON-compatible dict."""
|
|
81
|
+
return {
|
|
82
|
+
"checkpoint_id": checkpoint.checkpoint_id,
|
|
83
|
+
"workflow_id": checkpoint.workflow_id,
|
|
84
|
+
"created_at": checkpoint.created_at,
|
|
85
|
+
"specification": checkpoint.specification,
|
|
86
|
+
"constraints": checkpoint.constraints,
|
|
87
|
+
"rmax": checkpoint.rmax,
|
|
88
|
+
"completed_steps": list(checkpoint.completed_steps),
|
|
89
|
+
"artifact_ids": dict(checkpoint.artifact_ids),
|
|
90
|
+
"failure_type": checkpoint.failure_type.value,
|
|
91
|
+
"failed_step": checkpoint.failed_step,
|
|
92
|
+
"failed_artifact_id": checkpoint.failed_artifact_id,
|
|
93
|
+
"failure_feedback": checkpoint.failure_feedback,
|
|
94
|
+
"provenance_ids": list(checkpoint.provenance_ids),
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
def _dict_to_checkpoint(self, data: dict) -> WorkflowCheckpoint:
|
|
98
|
+
"""Deserialize checkpoint from JSON dict."""
|
|
99
|
+
return WorkflowCheckpoint(
|
|
100
|
+
checkpoint_id=data["checkpoint_id"],
|
|
101
|
+
workflow_id=data["workflow_id"],
|
|
102
|
+
created_at=data["created_at"],
|
|
103
|
+
specification=data["specification"],
|
|
104
|
+
constraints=data["constraints"],
|
|
105
|
+
rmax=data["rmax"],
|
|
106
|
+
completed_steps=tuple(data["completed_steps"]),
|
|
107
|
+
artifact_ids=tuple(data["artifact_ids"].items()),
|
|
108
|
+
failure_type=FailureType(data["failure_type"]),
|
|
109
|
+
failed_step=data["failed_step"],
|
|
110
|
+
failed_artifact_id=data["failed_artifact_id"],
|
|
111
|
+
failure_feedback=data["failure_feedback"],
|
|
112
|
+
provenance_ids=tuple(data["provenance_ids"]),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def _amendment_to_dict(self, amendment: HumanAmendment) -> dict:
|
|
116
|
+
"""Serialize amendment to JSON-compatible dict."""
|
|
117
|
+
return {
|
|
118
|
+
"amendment_id": amendment.amendment_id,
|
|
119
|
+
"checkpoint_id": amendment.checkpoint_id,
|
|
120
|
+
"amendment_type": amendment.amendment_type.value,
|
|
121
|
+
"created_at": amendment.created_at,
|
|
122
|
+
"created_by": amendment.created_by,
|
|
123
|
+
"content": amendment.content,
|
|
124
|
+
"context": amendment.context,
|
|
125
|
+
"parent_artifact_id": amendment.parent_artifact_id,
|
|
126
|
+
"additional_rmax": amendment.additional_rmax,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
def _dict_to_amendment(self, data: dict) -> HumanAmendment:
|
|
130
|
+
"""Deserialize amendment from JSON dict."""
|
|
131
|
+
return HumanAmendment(
|
|
132
|
+
amendment_id=data["amendment_id"],
|
|
133
|
+
checkpoint_id=data["checkpoint_id"],
|
|
134
|
+
amendment_type=AmendmentType(data["amendment_type"]),
|
|
135
|
+
created_at=data["created_at"],
|
|
136
|
+
created_by=data["created_by"],
|
|
137
|
+
content=data["content"],
|
|
138
|
+
context=data.get("context", ""),
|
|
139
|
+
parent_artifact_id=data.get("parent_artifact_id"),
|
|
140
|
+
additional_rmax=data.get("additional_rmax", 0),
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def store_checkpoint(self, checkpoint: WorkflowCheckpoint) -> str:
|
|
144
|
+
"""
|
|
145
|
+
Store a checkpoint and return its ID.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
checkpoint: The checkpoint to store
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
The checkpoint_id
|
|
152
|
+
"""
|
|
153
|
+
# 1. Serialize to JSON
|
|
154
|
+
checkpoint_dict = self._checkpoint_to_dict(checkpoint)
|
|
155
|
+
|
|
156
|
+
# 2. Write to checkpoints/{prefix}/{checkpoint_id}.json
|
|
157
|
+
object_path = self._get_checkpoint_path(checkpoint.checkpoint_id)
|
|
158
|
+
object_path.parent.mkdir(parents=True, exist_ok=True)
|
|
159
|
+
with open(object_path, "w") as f:
|
|
160
|
+
json.dump(checkpoint_dict, f, indent=2)
|
|
161
|
+
|
|
162
|
+
# 3. Update index
|
|
163
|
+
self._index["checkpoints"][checkpoint.checkpoint_id] = {
|
|
164
|
+
"path": str(object_path.relative_to(self._base_dir)),
|
|
165
|
+
"workflow_id": checkpoint.workflow_id,
|
|
166
|
+
"failed_step": checkpoint.failed_step,
|
|
167
|
+
"failure_type": checkpoint.failure_type.value,
|
|
168
|
+
"created_at": checkpoint.created_at,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
# Track by workflow
|
|
172
|
+
if checkpoint.workflow_id not in self._index["by_workflow"]:
|
|
173
|
+
self._index["by_workflow"][checkpoint.workflow_id] = []
|
|
174
|
+
self._index["by_workflow"][checkpoint.workflow_id].append(
|
|
175
|
+
checkpoint.checkpoint_id
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# 4. Atomically update index
|
|
179
|
+
self._update_index_atomic()
|
|
180
|
+
|
|
181
|
+
# 5. Add to cache
|
|
182
|
+
self._cache_checkpoints[checkpoint.checkpoint_id] = checkpoint
|
|
183
|
+
|
|
184
|
+
return checkpoint.checkpoint_id
|
|
185
|
+
|
|
186
|
+
def get_checkpoint(self, checkpoint_id: str) -> WorkflowCheckpoint:
|
|
187
|
+
"""Retrieve checkpoint by ID (cache-first)."""
|
|
188
|
+
# Check cache first
|
|
189
|
+
if checkpoint_id in self._cache_checkpoints:
|
|
190
|
+
return self._cache_checkpoints[checkpoint_id]
|
|
191
|
+
|
|
192
|
+
# Check index
|
|
193
|
+
if checkpoint_id not in self._index["checkpoints"]:
|
|
194
|
+
raise KeyError(f"Checkpoint not found: {checkpoint_id}")
|
|
195
|
+
|
|
196
|
+
# Load from filesystem
|
|
197
|
+
rel_path = self._index["checkpoints"][checkpoint_id]["path"]
|
|
198
|
+
object_path = self._base_dir / rel_path
|
|
199
|
+
|
|
200
|
+
with open(object_path) as f:
|
|
201
|
+
data = json.load(f)
|
|
202
|
+
|
|
203
|
+
checkpoint = self._dict_to_checkpoint(data)
|
|
204
|
+
self._cache_checkpoints[checkpoint_id] = checkpoint
|
|
205
|
+
return checkpoint
|
|
206
|
+
|
|
207
|
+
def store_amendment(self, amendment: HumanAmendment) -> str:
|
|
208
|
+
"""
|
|
209
|
+
Store a human amendment and return its ID.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
amendment: The amendment to store
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
The amendment_id
|
|
216
|
+
"""
|
|
217
|
+
# 1. Serialize to JSON
|
|
218
|
+
amendment_dict = self._amendment_to_dict(amendment)
|
|
219
|
+
|
|
220
|
+
# 2. Write to amendments/{prefix}/{amendment_id}.json
|
|
221
|
+
object_path = self._get_amendment_path(amendment.amendment_id)
|
|
222
|
+
object_path.parent.mkdir(parents=True, exist_ok=True)
|
|
223
|
+
with open(object_path, "w") as f:
|
|
224
|
+
json.dump(amendment_dict, f, indent=2)
|
|
225
|
+
|
|
226
|
+
# 3. Update index
|
|
227
|
+
self._index["amendments"][amendment.amendment_id] = {
|
|
228
|
+
"path": str(object_path.relative_to(self._base_dir)),
|
|
229
|
+
"checkpoint_id": amendment.checkpoint_id,
|
|
230
|
+
"amendment_type": amendment.amendment_type.value,
|
|
231
|
+
"created_at": amendment.created_at,
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
# Track by checkpoint
|
|
235
|
+
if amendment.checkpoint_id not in self._index["by_checkpoint"]:
|
|
236
|
+
self._index["by_checkpoint"][amendment.checkpoint_id] = []
|
|
237
|
+
self._index["by_checkpoint"][amendment.checkpoint_id].append(
|
|
238
|
+
amendment.amendment_id
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# 4. Atomically update index
|
|
242
|
+
self._update_index_atomic()
|
|
243
|
+
|
|
244
|
+
# 5. Add to cache
|
|
245
|
+
self._cache_amendments[amendment.amendment_id] = amendment
|
|
246
|
+
|
|
247
|
+
return amendment.amendment_id
|
|
248
|
+
|
|
249
|
+
def get_amendment(self, amendment_id: str) -> HumanAmendment:
|
|
250
|
+
"""Retrieve amendment by ID (cache-first)."""
|
|
251
|
+
# Check cache first
|
|
252
|
+
if amendment_id in self._cache_amendments:
|
|
253
|
+
return self._cache_amendments[amendment_id]
|
|
254
|
+
|
|
255
|
+
# Check index
|
|
256
|
+
if amendment_id not in self._index["amendments"]:
|
|
257
|
+
raise KeyError(f"Amendment not found: {amendment_id}")
|
|
258
|
+
|
|
259
|
+
# Load from filesystem
|
|
260
|
+
rel_path = self._index["amendments"][amendment_id]["path"]
|
|
261
|
+
object_path = self._base_dir / rel_path
|
|
262
|
+
|
|
263
|
+
with open(object_path) as f:
|
|
264
|
+
data = json.load(f)
|
|
265
|
+
|
|
266
|
+
amendment = self._dict_to_amendment(data)
|
|
267
|
+
self._cache_amendments[amendment_id] = amendment
|
|
268
|
+
return amendment
|
|
269
|
+
|
|
270
|
+
def get_amendments_for_checkpoint(self, checkpoint_id: str) -> list[HumanAmendment]:
|
|
271
|
+
"""Get all amendments for a checkpoint."""
|
|
272
|
+
if checkpoint_id not in self._index.get("by_checkpoint", {}):
|
|
273
|
+
return []
|
|
274
|
+
|
|
275
|
+
amendment_ids = self._index["by_checkpoint"][checkpoint_id]
|
|
276
|
+
return [self.get_amendment(aid) for aid in amendment_ids]
|
|
277
|
+
|
|
278
|
+
def list_checkpoints(
|
|
279
|
+
self, workflow_id: str | None = None
|
|
280
|
+
) -> list[WorkflowCheckpoint]:
|
|
281
|
+
"""
|
|
282
|
+
List checkpoints, optionally filtered by workflow_id.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
workflow_id: Optional filter by workflow
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
List of matching checkpoints, newest first
|
|
289
|
+
"""
|
|
290
|
+
if workflow_id is not None:
|
|
291
|
+
# Filter by workflow
|
|
292
|
+
if workflow_id not in self._index.get("by_workflow", {}):
|
|
293
|
+
return []
|
|
294
|
+
checkpoint_ids = self._index["by_workflow"][workflow_id]
|
|
295
|
+
else:
|
|
296
|
+
# All checkpoints
|
|
297
|
+
checkpoint_ids = list(self._index.get("checkpoints", {}).keys())
|
|
298
|
+
|
|
299
|
+
# Load checkpoints and sort by created_at descending
|
|
300
|
+
checkpoints = [self.get_checkpoint(cid) for cid in checkpoint_ids]
|
|
301
|
+
checkpoints.sort(key=lambda c: c.created_at, reverse=True)
|
|
302
|
+
return checkpoints
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class InMemoryCheckpointDAG(CheckpointDAGInterface):
|
|
306
|
+
"""
|
|
307
|
+
In-memory checkpoint storage for testing.
|
|
308
|
+
"""
|
|
309
|
+
|
|
310
|
+
def __init__(self) -> None:
|
|
311
|
+
self._checkpoints: dict[str, WorkflowCheckpoint] = {}
|
|
312
|
+
self._amendments: dict[str, HumanAmendment] = {}
|
|
313
|
+
self._by_workflow: dict[str, list[str]] = {}
|
|
314
|
+
self._by_checkpoint: dict[str, list[str]] = {}
|
|
315
|
+
|
|
316
|
+
def store_checkpoint(self, checkpoint: WorkflowCheckpoint) -> str:
|
|
317
|
+
self._checkpoints[checkpoint.checkpoint_id] = checkpoint
|
|
318
|
+
|
|
319
|
+
if checkpoint.workflow_id not in self._by_workflow:
|
|
320
|
+
self._by_workflow[checkpoint.workflow_id] = []
|
|
321
|
+
self._by_workflow[checkpoint.workflow_id].append(checkpoint.checkpoint_id)
|
|
322
|
+
|
|
323
|
+
return checkpoint.checkpoint_id
|
|
324
|
+
|
|
325
|
+
def get_checkpoint(self, checkpoint_id: str) -> WorkflowCheckpoint:
|
|
326
|
+
if checkpoint_id not in self._checkpoints:
|
|
327
|
+
raise KeyError(f"Checkpoint not found: {checkpoint_id}")
|
|
328
|
+
return self._checkpoints[checkpoint_id]
|
|
329
|
+
|
|
330
|
+
def store_amendment(self, amendment: HumanAmendment) -> str:
|
|
331
|
+
self._amendments[amendment.amendment_id] = amendment
|
|
332
|
+
|
|
333
|
+
if amendment.checkpoint_id not in self._by_checkpoint:
|
|
334
|
+
self._by_checkpoint[amendment.checkpoint_id] = []
|
|
335
|
+
self._by_checkpoint[amendment.checkpoint_id].append(amendment.amendment_id)
|
|
336
|
+
|
|
337
|
+
return amendment.amendment_id
|
|
338
|
+
|
|
339
|
+
def get_amendment(self, amendment_id: str) -> HumanAmendment:
|
|
340
|
+
if amendment_id not in self._amendments:
|
|
341
|
+
raise KeyError(f"Amendment not found: {amendment_id}")
|
|
342
|
+
return self._amendments[amendment_id]
|
|
343
|
+
|
|
344
|
+
def get_amendments_for_checkpoint(self, checkpoint_id: str) -> list[HumanAmendment]:
|
|
345
|
+
if checkpoint_id not in self._by_checkpoint:
|
|
346
|
+
return []
|
|
347
|
+
return [self._amendments[aid] for aid in self._by_checkpoint[checkpoint_id]]
|
|
348
|
+
|
|
349
|
+
def list_checkpoints(
|
|
350
|
+
self, workflow_id: str | None = None
|
|
351
|
+
) -> list[WorkflowCheckpoint]:
|
|
352
|
+
if workflow_id is not None:
|
|
353
|
+
if workflow_id not in self._by_workflow:
|
|
354
|
+
return []
|
|
355
|
+
checkpoint_ids = self._by_workflow[workflow_id]
|
|
356
|
+
else:
|
|
357
|
+
checkpoint_ids = list(self._checkpoints.keys())
|
|
358
|
+
|
|
359
|
+
checkpoints = [self._checkpoints[cid] for cid in checkpoint_ids]
|
|
360
|
+
checkpoints.sort(key=lambda c: c.created_at, reverse=True)
|
|
361
|
+
return checkpoints
|
|
@@ -43,7 +43,7 @@ class FilesystemArtifactDAG(ArtifactDAGInterface):
|
|
|
43
43
|
result: dict[str, Any] = json.load(f)
|
|
44
44
|
return result
|
|
45
45
|
|
|
46
|
-
return {"version": "1.0", "artifacts": {}, "action_pairs": {}}
|
|
46
|
+
return {"version": "1.0", "artifacts": {}, "action_pairs": {}, "workflows": {}}
|
|
47
47
|
|
|
48
48
|
def _update_index_atomic(self) -> None:
|
|
49
49
|
"""Atomically update index.json using write-to-temp + rename."""
|
|
@@ -56,8 +56,10 @@ class FilesystemArtifactDAG(ArtifactDAGInterface):
|
|
|
56
56
|
"""Serialize artifact to JSON-compatible dict."""
|
|
57
57
|
return {
|
|
58
58
|
"artifact_id": artifact.artifact_id,
|
|
59
|
+
"workflow_id": artifact.workflow_id,
|
|
59
60
|
"content": artifact.content,
|
|
60
61
|
"previous_attempt_id": artifact.previous_attempt_id,
|
|
62
|
+
"parent_action_pair_id": artifact.parent_action_pair_id,
|
|
61
63
|
"action_pair_id": artifact.action_pair_id,
|
|
62
64
|
"created_at": artifact.created_at,
|
|
63
65
|
"attempt_number": artifact.attempt_number,
|
|
@@ -65,31 +67,43 @@ class FilesystemArtifactDAG(ArtifactDAGInterface):
|
|
|
65
67
|
"guard_result": artifact.guard_result,
|
|
66
68
|
"feedback": artifact.feedback,
|
|
67
69
|
"context": {
|
|
70
|
+
"workflow_id": artifact.context.workflow_id,
|
|
68
71
|
"specification": artifact.context.specification,
|
|
69
72
|
"constraints": artifact.context.constraints,
|
|
70
73
|
"feedback_history": [
|
|
71
74
|
{"artifact_id": fe.artifact_id, "feedback": fe.feedback}
|
|
72
75
|
for fe in artifact.context.feedback_history
|
|
73
76
|
],
|
|
74
|
-
|
|
77
|
+
# Serialize tuple → dict for JSON object format (matches schema)
|
|
78
|
+
"dependency_artifacts": dict(artifact.context.dependency_artifacts),
|
|
75
79
|
},
|
|
76
80
|
}
|
|
77
81
|
|
|
78
82
|
def _dict_to_artifact(self, data: dict) -> Artifact:
|
|
79
83
|
"""Deserialize artifact from JSON dict."""
|
|
84
|
+
# Handle both old format (dependency_ids) and new format (dependency_artifacts)
|
|
85
|
+
dep_data = data["context"].get("dependency_artifacts", {})
|
|
86
|
+
if not dep_data:
|
|
87
|
+
# Backwards compatibility: convert old flat array to empty dict
|
|
88
|
+
dep_data = {}
|
|
89
|
+
|
|
80
90
|
context = ContextSnapshot(
|
|
91
|
+
workflow_id=data["context"].get("workflow_id", "unknown"),
|
|
81
92
|
specification=data["context"]["specification"],
|
|
82
93
|
constraints=data["context"]["constraints"],
|
|
83
94
|
feedback_history=tuple(
|
|
84
95
|
FeedbackEntry(artifact_id=fe["artifact_id"], feedback=fe["feedback"])
|
|
85
96
|
for fe in data["context"]["feedback_history"]
|
|
86
97
|
),
|
|
87
|
-
|
|
98
|
+
# Deserialize dict → tuple for immutability
|
|
99
|
+
dependency_artifacts=tuple(dep_data.items()),
|
|
88
100
|
)
|
|
89
101
|
return Artifact(
|
|
90
102
|
artifact_id=data["artifact_id"],
|
|
103
|
+
workflow_id=data.get("workflow_id", "unknown"),
|
|
91
104
|
content=data["content"],
|
|
92
105
|
previous_attempt_id=data["previous_attempt_id"],
|
|
106
|
+
parent_action_pair_id=data.get("parent_action_pair_id"),
|
|
93
107
|
action_pair_id=data["action_pair_id"],
|
|
94
108
|
created_at=data["created_at"],
|
|
95
109
|
attempt_number=data["attempt_number"],
|
|
@@ -104,13 +118,12 @@ class FilesystemArtifactDAG(ArtifactDAGInterface):
|
|
|
104
118
|
prefix = artifact_id[:2]
|
|
105
119
|
return self._objects_dir / prefix / f"{artifact_id}.json"
|
|
106
120
|
|
|
107
|
-
def store(self, artifact: Artifact
|
|
121
|
+
def store(self, artifact: Artifact) -> str:
|
|
108
122
|
"""
|
|
109
123
|
Append artifact to DAG (immutable, append-only).
|
|
110
124
|
|
|
111
125
|
Args:
|
|
112
126
|
artifact: The artifact to store
|
|
113
|
-
metadata: Optional metadata string (for compatibility, stored in feedback)
|
|
114
127
|
|
|
115
128
|
Returns:
|
|
116
129
|
The artifact_id
|
|
@@ -127,7 +140,9 @@ class FilesystemArtifactDAG(ArtifactDAGInterface):
|
|
|
127
140
|
# 3. Update index
|
|
128
141
|
self._index["artifacts"][artifact.artifact_id] = {
|
|
129
142
|
"path": str(object_path.relative_to(self._base_dir)),
|
|
143
|
+
"workflow_id": artifact.workflow_id,
|
|
130
144
|
"action_pair_id": artifact.action_pair_id,
|
|
145
|
+
"parent_action_pair_id": artifact.parent_action_pair_id,
|
|
131
146
|
"status": artifact.status.value,
|
|
132
147
|
"created_at": artifact.created_at,
|
|
133
148
|
}
|
|
@@ -139,6 +154,13 @@ class FilesystemArtifactDAG(ArtifactDAGInterface):
|
|
|
139
154
|
artifact.artifact_id
|
|
140
155
|
)
|
|
141
156
|
|
|
157
|
+
# Track by workflow
|
|
158
|
+
if "workflows" not in self._index:
|
|
159
|
+
self._index["workflows"] = {}
|
|
160
|
+
if artifact.workflow_id not in self._index["workflows"]:
|
|
161
|
+
self._index["workflows"][artifact.workflow_id] = []
|
|
162
|
+
self._index["workflows"][artifact.workflow_id].append(artifact.artifact_id)
|
|
163
|
+
|
|
142
164
|
# 4. Atomically update index
|
|
143
165
|
self._update_index_atomic()
|
|
144
166
|
|
|
@@ -207,8 +229,10 @@ class FilesystemArtifactDAG(ArtifactDAGInterface):
|
|
|
207
229
|
# Create updated artifact (immutable, so we create new instance)
|
|
208
230
|
updated = Artifact(
|
|
209
231
|
artifact_id=artifact.artifact_id,
|
|
232
|
+
workflow_id=artifact.workflow_id,
|
|
210
233
|
content=artifact.content,
|
|
211
234
|
previous_attempt_id=artifact.previous_attempt_id,
|
|
235
|
+
parent_action_pair_id=artifact.parent_action_pair_id,
|
|
212
236
|
action_pair_id=artifact.action_pair_id,
|
|
213
237
|
created_at=artifact.created_at,
|
|
214
238
|
attempt_number=artifact.attempt_number,
|
|
@@ -230,3 +254,43 @@ class FilesystemArtifactDAG(ArtifactDAGInterface):
|
|
|
230
254
|
|
|
231
255
|
# Update cache
|
|
232
256
|
self._cache[artifact_id] = updated
|
|
257
|
+
|
|
258
|
+
def get_by_workflow(self, workflow_id: str) -> list[Artifact]:
|
|
259
|
+
"""Get all artifacts for a workflow execution."""
|
|
260
|
+
if "workflows" not in self._index:
|
|
261
|
+
return []
|
|
262
|
+
if workflow_id not in self._index["workflows"]:
|
|
263
|
+
return []
|
|
264
|
+
|
|
265
|
+
artifact_ids = self._index["workflows"][workflow_id]
|
|
266
|
+
return [self.get_artifact(aid) for aid in artifact_ids]
|
|
267
|
+
|
|
268
|
+
def get_latest_for_action_pair(
|
|
269
|
+
self, action_pair_id: str, workflow_id: str
|
|
270
|
+
) -> Artifact | None:
|
|
271
|
+
"""
|
|
272
|
+
Get the most recent artifact for an action pair in a workflow.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
action_pair_id: The action pair identifier (e.g., 'g_test')
|
|
276
|
+
workflow_id: UUID of the workflow execution instance
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
The most recent artifact, or None if not found
|
|
280
|
+
"""
|
|
281
|
+
if action_pair_id not in self._index.get("action_pairs", {}):
|
|
282
|
+
return None
|
|
283
|
+
|
|
284
|
+
# Filter by workflow and find latest by created_at
|
|
285
|
+
candidates = []
|
|
286
|
+
for artifact_id in self._index["action_pairs"][action_pair_id]:
|
|
287
|
+
artifact_info = self._index["artifacts"].get(artifact_id, {})
|
|
288
|
+
if artifact_info.get("workflow_id") == workflow_id:
|
|
289
|
+
candidates.append((artifact_id, artifact_info.get("created_at", "")))
|
|
290
|
+
|
|
291
|
+
if not candidates:
|
|
292
|
+
return None
|
|
293
|
+
|
|
294
|
+
# Sort by created_at descending and return the latest
|
|
295
|
+
candidates.sort(key=lambda x: x[1], reverse=True)
|
|
296
|
+
return self.get_artifact(candidates[0][0])
|
|
@@ -13,11 +13,9 @@ class InMemoryArtifactDAG(ArtifactDAGInterface):
|
|
|
13
13
|
|
|
14
14
|
def __init__(self) -> None:
|
|
15
15
|
self._artifacts: dict[str, Artifact] = {}
|
|
16
|
-
self._metadata: dict[str, str] = {}
|
|
17
16
|
|
|
18
|
-
def store(self, artifact: Artifact
|
|
17
|
+
def store(self, artifact: Artifact) -> str:
|
|
19
18
|
self._artifacts[artifact.artifact_id] = artifact
|
|
20
|
-
self._metadata[artifact.artifact_id] = metadata
|
|
21
19
|
return artifact.artifact_id
|
|
22
20
|
|
|
23
21
|
def get_artifact(self, artifact_id: str) -> Artifact:
|
|
@@ -37,3 +35,27 @@ class InMemoryArtifactDAG(ArtifactDAGInterface):
|
|
|
37
35
|
break
|
|
38
36
|
current = self._artifacts.get(current.previous_attempt_id)
|
|
39
37
|
return list(reversed(result))
|
|
38
|
+
|
|
39
|
+
def get_latest_for_action_pair(
|
|
40
|
+
self, action_pair_id: str, workflow_id: str
|
|
41
|
+
) -> Artifact | None:
|
|
42
|
+
"""
|
|
43
|
+
Get the most recent artifact for an action pair in a workflow.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
action_pair_id: The action pair identifier (e.g., 'g_test')
|
|
47
|
+
workflow_id: UUID of the workflow execution instance
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
The most recent artifact, or None if not found
|
|
51
|
+
"""
|
|
52
|
+
candidates = [
|
|
53
|
+
a
|
|
54
|
+
for a in self._artifacts.values()
|
|
55
|
+
if a.action_pair_id == action_pair_id and a.workflow_id == workflow_id
|
|
56
|
+
]
|
|
57
|
+
if not candidates:
|
|
58
|
+
return None
|
|
59
|
+
# Sort by created_at descending and return the latest
|
|
60
|
+
candidates.sort(key=lambda a: a.created_at, reverse=True)
|
|
61
|
+
return candidates[0]
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Generator Registry with Entry Points Discovery.
|
|
3
|
+
|
|
4
|
+
Provides dynamic generator loading via Python entry points (atomicguard.generators group).
|
|
5
|
+
External packages can register generators in their pyproject.toml:
|
|
6
|
+
|
|
7
|
+
[project.entry-points."atomicguard.generators"]
|
|
8
|
+
MyGenerator = "mypackage.generators:MyGenerator"
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from importlib.metadata import entry_points
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from atomicguard.domain.interfaces import GeneratorInterface
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class GeneratorRegistry:
|
|
18
|
+
"""
|
|
19
|
+
Registry for GeneratorInterface implementations.
|
|
20
|
+
|
|
21
|
+
Discovers generators via the 'atomicguard.generators' entry point group.
|
|
22
|
+
Uses lazy loading - entry points are only loaded on first access.
|
|
23
|
+
|
|
24
|
+
Example usage:
|
|
25
|
+
registry = GeneratorRegistry()
|
|
26
|
+
generator = registry.create("OllamaGenerator", model="qwen2.5-coder:14b")
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
_generators: dict[str, type[GeneratorInterface]] = {}
|
|
30
|
+
_loaded: bool = False
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def _load_entry_points(cls) -> None:
|
|
34
|
+
"""Load generators from entry points (lazy, called once)."""
|
|
35
|
+
if cls._loaded:
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
eps = entry_points(group="atomicguard.generators")
|
|
39
|
+
for ep in eps:
|
|
40
|
+
try:
|
|
41
|
+
generator_class = ep.load()
|
|
42
|
+
cls._generators[ep.name] = generator_class
|
|
43
|
+
except Exception as e:
|
|
44
|
+
import warnings
|
|
45
|
+
|
|
46
|
+
warnings.warn(
|
|
47
|
+
f"Failed to load generator '{ep.name}' from entry point: {e}",
|
|
48
|
+
stacklevel=2,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
cls._loaded = True
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def register(cls, name: str, generator_class: type[GeneratorInterface]) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Manually register a generator class.
|
|
57
|
+
|
|
58
|
+
Useful for testing or dynamically-created generators.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
name: Generator identifier (e.g., "OllamaGenerator")
|
|
62
|
+
generator_class: Class implementing GeneratorInterface
|
|
63
|
+
"""
|
|
64
|
+
cls._generators[name] = generator_class
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def get(cls, name: str) -> type[GeneratorInterface]:
|
|
68
|
+
"""
|
|
69
|
+
Get a generator class by name.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
name: Generator identifier
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
The generator class
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
KeyError: If generator not found
|
|
79
|
+
"""
|
|
80
|
+
cls._load_entry_points()
|
|
81
|
+
if name not in cls._generators:
|
|
82
|
+
available = ", ".join(cls._generators.keys()) or "(none)"
|
|
83
|
+
raise KeyError(
|
|
84
|
+
f"Generator '{name}' not found. Available generators: {available}"
|
|
85
|
+
)
|
|
86
|
+
return cls._generators[name]
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def create(cls, name: str, **config: Any) -> GeneratorInterface:
|
|
90
|
+
"""
|
|
91
|
+
Create a generator instance by name.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
name: Generator identifier
|
|
95
|
+
**config: Configuration passed to generator constructor
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Instantiated generator
|
|
99
|
+
|
|
100
|
+
Raises:
|
|
101
|
+
KeyError: If generator not found
|
|
102
|
+
TypeError: If config doesn't match constructor signature
|
|
103
|
+
"""
|
|
104
|
+
generator_class = cls.get(name)
|
|
105
|
+
return generator_class(**config)
|
|
106
|
+
|
|
107
|
+
@classmethod
|
|
108
|
+
def available(cls) -> list[str]:
|
|
109
|
+
"""
|
|
110
|
+
List available generator names.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
List of registered generator names
|
|
114
|
+
"""
|
|
115
|
+
cls._load_entry_points()
|
|
116
|
+
return list(cls._generators.keys())
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def clear(cls) -> None:
|
|
120
|
+
"""
|
|
121
|
+
Clear all registered generators (useful for testing).
|
|
122
|
+
|
|
123
|
+
Also resets the loaded flag so entry points can be reloaded.
|
|
124
|
+
"""
|
|
125
|
+
cls._generators.clear()
|
|
126
|
+
cls._loaded = False
|