atomicguard 0.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atomicguard/__init__.py +8 -3
- atomicguard/application/action_pair.py +7 -1
- atomicguard/application/agent.py +46 -6
- atomicguard/application/workflow.py +494 -11
- atomicguard/domain/__init__.py +4 -1
- atomicguard/domain/exceptions.py +19 -0
- atomicguard/domain/interfaces.py +137 -6
- atomicguard/domain/models.py +120 -6
- atomicguard/guards/__init__.py +16 -5
- atomicguard/guards/composite/__init__.py +11 -0
- atomicguard/guards/dynamic/__init__.py +13 -0
- atomicguard/guards/dynamic/test_runner.py +207 -0
- atomicguard/guards/interactive/__init__.py +11 -0
- atomicguard/guards/static/__init__.py +13 -0
- atomicguard/guards/static/imports.py +177 -0
- atomicguard/infrastructure/__init__.py +4 -1
- atomicguard/infrastructure/llm/__init__.py +3 -1
- atomicguard/infrastructure/llm/huggingface.py +180 -0
- atomicguard/infrastructure/llm/mock.py +32 -6
- atomicguard/infrastructure/llm/ollama.py +40 -17
- atomicguard/infrastructure/persistence/__init__.py +7 -1
- atomicguard/infrastructure/persistence/checkpoint.py +361 -0
- atomicguard/infrastructure/persistence/filesystem.py +69 -5
- atomicguard/infrastructure/persistence/memory.py +25 -3
- atomicguard/infrastructure/registry.py +126 -0
- atomicguard/schemas/__init__.py +142 -0
- {atomicguard-0.1.0.dist-info → atomicguard-1.2.0.dist-info}/METADATA +75 -13
- atomicguard-1.2.0.dist-info/RECORD +37 -0
- {atomicguard-0.1.0.dist-info → atomicguard-1.2.0.dist-info}/WHEEL +1 -1
- atomicguard-1.2.0.dist-info/entry_points.txt +4 -0
- atomicguard/guards/test_runner.py +0 -176
- atomicguard-0.1.0.dist-info/RECORD +0 -27
- /atomicguard/guards/{base.py → composite/base.py} +0 -0
- /atomicguard/guards/{human.py → interactive/human.py} +0 -0
- /atomicguard/guards/{syntax.py → static/syntax.py} +0 -0
- {atomicguard-0.1.0.dist-info → atomicguard-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {atomicguard-0.1.0.dist-info → atomicguard-1.2.0.dist-info}/top_level.txt +0 -0
atomicguard/domain/interfaces.py
CHANGED
|
@@ -13,10 +13,9 @@ if TYPE_CHECKING:
|
|
|
13
13
|
Artifact,
|
|
14
14
|
Context,
|
|
15
15
|
GuardResult,
|
|
16
|
+
HumanAmendment,
|
|
17
|
+
WorkflowCheckpoint,
|
|
16
18
|
)
|
|
17
|
-
|
|
18
|
-
if TYPE_CHECKING:
|
|
19
|
-
from atomicguard.domain.models import Artifact, Context, GuardResult
|
|
20
19
|
from atomicguard.domain.prompts import PromptTemplate
|
|
21
20
|
|
|
22
21
|
|
|
@@ -25,11 +24,29 @@ class GeneratorInterface(ABC):
|
|
|
25
24
|
Port for artifact generation.
|
|
26
25
|
|
|
27
26
|
Implementations connect to LLMs or other generation sources.
|
|
27
|
+
|
|
28
|
+
Note (Hierarchical Composition & Semantic Agency):
|
|
29
|
+
The generator is not constrained to a single inference step. It may be
|
|
30
|
+
instantiated as an autonomous Semantic Agent (ReAct loop, CoT reasoning,
|
|
31
|
+
multi-tool orchestration) operating within the stochastic environment.
|
|
32
|
+
From the workflow's perspective, this agentic process is atomic — the
|
|
33
|
+
Workflow State tracks only the final artifact's validity via the Guard.
|
|
34
|
+
|
|
35
|
+
Note (Side Effects & Idempotency):
|
|
36
|
+
While generate() formally produces an artifact, implementations
|
|
37
|
+
may induce side effects (filesystem I/O, API calls). In such cases:
|
|
38
|
+
1. The artifact serves as a receipt/manifest of the operation
|
|
39
|
+
2. Guards act as sensors verifying environmental state
|
|
40
|
+
3. Side-effecting generators MUST be idempotent for retry safety
|
|
28
41
|
"""
|
|
29
42
|
|
|
30
43
|
@abstractmethod
|
|
31
44
|
def generate(
|
|
32
|
-
self,
|
|
45
|
+
self,
|
|
46
|
+
context: "Context",
|
|
47
|
+
template: Optional["PromptTemplate"] = None,
|
|
48
|
+
action_pair_id: str = "unknown",
|
|
49
|
+
workflow_id: str = "unknown",
|
|
33
50
|
) -> "Artifact":
|
|
34
51
|
"""
|
|
35
52
|
Generate an artifact based on context.
|
|
@@ -37,6 +54,8 @@ class GeneratorInterface(ABC):
|
|
|
37
54
|
Args:
|
|
38
55
|
context: The generation context including specification and feedback
|
|
39
56
|
template: Optional prompt template for structured generation
|
|
57
|
+
action_pair_id: Identifier for the action pair requesting generation
|
|
58
|
+
workflow_id: UUID of the workflow execution instance
|
|
40
59
|
|
|
41
60
|
Returns:
|
|
42
61
|
A new Artifact containing the generated content
|
|
@@ -76,13 +95,12 @@ class ArtifactDAGInterface(ABC):
|
|
|
76
95
|
"""
|
|
77
96
|
|
|
78
97
|
@abstractmethod
|
|
79
|
-
def store(self, artifact: "Artifact"
|
|
98
|
+
def store(self, artifact: "Artifact") -> str:
|
|
80
99
|
"""
|
|
81
100
|
Store an artifact in the DAG.
|
|
82
101
|
|
|
83
102
|
Args:
|
|
84
103
|
artifact: The artifact to store
|
|
85
|
-
metadata: Optional metadata string
|
|
86
104
|
|
|
87
105
|
Returns:
|
|
88
106
|
The artifact_id
|
|
@@ -117,3 +135,116 @@ class ArtifactDAGInterface(ABC):
|
|
|
117
135
|
List of artifacts from oldest to newest in the chain
|
|
118
136
|
"""
|
|
119
137
|
pass
|
|
138
|
+
|
|
139
|
+
@abstractmethod
|
|
140
|
+
def get_latest_for_action_pair(
|
|
141
|
+
self, action_pair_id: str, workflow_id: str
|
|
142
|
+
) -> Optional["Artifact"]:
|
|
143
|
+
"""
|
|
144
|
+
Get the most recent artifact for an action pair in a workflow.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
action_pair_id: The action pair identifier (e.g., 'g_test')
|
|
148
|
+
workflow_id: UUID of the workflow execution instance
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
The most recent artifact, or None if not found
|
|
152
|
+
"""
|
|
153
|
+
pass
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class CheckpointDAGInterface(ABC):
|
|
157
|
+
"""
|
|
158
|
+
Port for checkpoint persistence.
|
|
159
|
+
|
|
160
|
+
Provides storage for workflow checkpoints and human amendments,
|
|
161
|
+
enabling resumable workflows after failure/escalation.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
@abstractmethod
|
|
165
|
+
def store_checkpoint(self, checkpoint: "WorkflowCheckpoint") -> str:
|
|
166
|
+
"""
|
|
167
|
+
Store a checkpoint and return its ID.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
checkpoint: The checkpoint to store
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
The checkpoint_id
|
|
174
|
+
"""
|
|
175
|
+
pass
|
|
176
|
+
|
|
177
|
+
@abstractmethod
|
|
178
|
+
def get_checkpoint(self, checkpoint_id: str) -> "WorkflowCheckpoint":
|
|
179
|
+
"""
|
|
180
|
+
Retrieve checkpoint by ID.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
checkpoint_id: The unique identifier
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
The checkpoint
|
|
187
|
+
|
|
188
|
+
Raises:
|
|
189
|
+
KeyError: If checkpoint not found
|
|
190
|
+
"""
|
|
191
|
+
pass
|
|
192
|
+
|
|
193
|
+
@abstractmethod
|
|
194
|
+
def store_amendment(self, amendment: "HumanAmendment") -> str:
|
|
195
|
+
"""
|
|
196
|
+
Store a human amendment and return its ID.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
amendment: The amendment to store
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
The amendment_id
|
|
203
|
+
"""
|
|
204
|
+
pass
|
|
205
|
+
|
|
206
|
+
@abstractmethod
|
|
207
|
+
def get_amendment(self, amendment_id: str) -> "HumanAmendment":
|
|
208
|
+
"""
|
|
209
|
+
Retrieve amendment by ID.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
amendment_id: The unique identifier
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
The amendment
|
|
216
|
+
|
|
217
|
+
Raises:
|
|
218
|
+
KeyError: If amendment not found
|
|
219
|
+
"""
|
|
220
|
+
pass
|
|
221
|
+
|
|
222
|
+
@abstractmethod
|
|
223
|
+
def get_amendments_for_checkpoint(
|
|
224
|
+
self, checkpoint_id: str
|
|
225
|
+
) -> list["HumanAmendment"]:
|
|
226
|
+
"""
|
|
227
|
+
Get all amendments for a checkpoint.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
checkpoint_id: The checkpoint identifier
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
List of amendments linked to this checkpoint
|
|
234
|
+
"""
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
@abstractmethod
|
|
238
|
+
def list_checkpoints(
|
|
239
|
+
self, workflow_id: str | None = None
|
|
240
|
+
) -> list["WorkflowCheckpoint"]:
|
|
241
|
+
"""
|
|
242
|
+
List checkpoints, optionally filtered by workflow_id.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
workflow_id: Optional filter by workflow
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
List of matching checkpoints, newest first
|
|
249
|
+
"""
|
|
250
|
+
pass
|
atomicguard/domain/models.py
CHANGED
|
@@ -27,6 +27,14 @@ class ArtifactStatus(Enum):
|
|
|
27
27
|
SUPERSEDED = "superseded" # Guard returned ⊤, but later attempt also passed
|
|
28
28
|
|
|
29
29
|
|
|
30
|
+
class ArtifactSource(Enum):
|
|
31
|
+
"""Origin of artifact content."""
|
|
32
|
+
|
|
33
|
+
GENERATED = "generated" # LLM-generated
|
|
34
|
+
HUMAN = "human" # Human-provided during amendment
|
|
35
|
+
IMPORTED = "imported" # Imported from external source
|
|
36
|
+
|
|
37
|
+
|
|
30
38
|
@dataclass(frozen=True)
|
|
31
39
|
class FeedbackEntry:
|
|
32
40
|
"""Single entry in feedback history H."""
|
|
@@ -39,10 +47,13 @@ class FeedbackEntry:
|
|
|
39
47
|
class ContextSnapshot:
|
|
40
48
|
"""Immutable context C that conditioned generation (Definition 5)."""
|
|
41
49
|
|
|
50
|
+
workflow_id: str # UUID of the workflow execution instance
|
|
42
51
|
specification: str # Ψ - static specification
|
|
43
52
|
constraints: str # Ω - global constraints
|
|
44
53
|
feedback_history: tuple[FeedbackEntry, ...] # H - accumulated rejections
|
|
45
|
-
|
|
54
|
+
dependency_artifacts: tuple[
|
|
55
|
+
tuple[str, str], ...
|
|
56
|
+
] = () # (action_pair_id, artifact_id) - matches schema
|
|
46
57
|
|
|
47
58
|
|
|
48
59
|
@dataclass(frozen=True)
|
|
@@ -55,11 +66,13 @@ class Artifact:
|
|
|
55
66
|
|
|
56
67
|
# Identity
|
|
57
68
|
artifact_id: str # Unique identifier (UUID)
|
|
69
|
+
workflow_id: str # UUID of the workflow execution instance
|
|
58
70
|
content: str # The generated code/text
|
|
59
71
|
|
|
60
72
|
# DAG Structure
|
|
61
73
|
previous_attempt_id: str | None # Retry chain within same action pair
|
|
62
|
-
#
|
|
74
|
+
parent_action_pair_id: str | None # Parent hierarchy for composite generators
|
|
75
|
+
# Cross-step deps are in context.dependency_artifacts
|
|
63
76
|
|
|
64
77
|
# Action Pair Coupling (Definition 6: A = ⟨ρ, a_gen, G⟩)
|
|
65
78
|
action_pair_id: str # Which action pair produced this
|
|
@@ -71,6 +84,7 @@ class Artifact:
|
|
|
71
84
|
guard_result: bool | None # ⊤ or ⊥ (None if pending)
|
|
72
85
|
feedback: str # φ - guard feedback (empty if passed)
|
|
73
86
|
context: ContextSnapshot # Full context snapshot at generation time
|
|
87
|
+
source: ArtifactSource = ArtifactSource.GENERATED # Origin of content
|
|
74
88
|
|
|
75
89
|
|
|
76
90
|
# =============================================================================
|
|
@@ -84,6 +98,7 @@ class GuardResult:
|
|
|
84
98
|
|
|
85
99
|
passed: bool
|
|
86
100
|
feedback: str = ""
|
|
101
|
+
fatal: bool = False # ⊥_fatal - skip retry, escalate to human
|
|
87
102
|
|
|
88
103
|
|
|
89
104
|
# =============================================================================
|
|
@@ -107,9 +122,16 @@ class Context:
|
|
|
107
122
|
specification: str
|
|
108
123
|
current_artifact: str | None = None
|
|
109
124
|
feedback_history: tuple[tuple[str, str], ...] = ()
|
|
110
|
-
|
|
111
|
-
tuple[str,
|
|
112
|
-
] = () # (
|
|
125
|
+
dependency_artifacts: tuple[
|
|
126
|
+
tuple[str, str], ...
|
|
127
|
+
] = () # (action_pair_id, artifact_id) - matches schema
|
|
128
|
+
|
|
129
|
+
def get_dependency(self, action_pair_id: str) -> str | None:
|
|
130
|
+
"""Look up artifact_id by action_pair_id."""
|
|
131
|
+
for key, artifact_id in self.dependency_artifacts:
|
|
132
|
+
if key == action_pair_id:
|
|
133
|
+
return artifact_id
|
|
134
|
+
return None
|
|
113
135
|
|
|
114
136
|
|
|
115
137
|
# =============================================================================
|
|
@@ -117,6 +139,15 @@ class Context:
|
|
|
117
139
|
# =============================================================================
|
|
118
140
|
|
|
119
141
|
|
|
142
|
+
class WorkflowStatus(Enum):
|
|
143
|
+
"""Workflow execution outcome."""
|
|
144
|
+
|
|
145
|
+
SUCCESS = "success" # All steps completed
|
|
146
|
+
FAILED = "failed" # Rmax exhausted on a step
|
|
147
|
+
ESCALATION = "escalation" # Fatal guard triggered
|
|
148
|
+
CHECKPOINT = "checkpoint" # Workflow paused, checkpoint created for resume
|
|
149
|
+
|
|
150
|
+
|
|
120
151
|
@dataclass
|
|
121
152
|
class WorkflowState:
|
|
122
153
|
"""Mutable workflow state tracking guard satisfaction."""
|
|
@@ -139,7 +170,90 @@ class WorkflowState:
|
|
|
139
170
|
class WorkflowResult:
|
|
140
171
|
"""Result of workflow execution."""
|
|
141
172
|
|
|
142
|
-
|
|
173
|
+
status: WorkflowStatus
|
|
143
174
|
artifacts: dict[str, Artifact]
|
|
144
175
|
failed_step: str | None = None
|
|
145
176
|
provenance: tuple[tuple[Artifact, str], ...] = ()
|
|
177
|
+
escalation_artifact: Artifact | None = None # Artifact that triggered escalation
|
|
178
|
+
escalation_feedback: str = "" # Fatal feedback message
|
|
179
|
+
checkpoint: "WorkflowCheckpoint | None" = None # For CHECKPOINT status
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
# =============================================================================
|
|
183
|
+
# CHECKPOINT AND HUMAN AMENDMENT (Resumable Workflow Support)
|
|
184
|
+
# =============================================================================
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class FailureType(Enum):
|
|
188
|
+
"""Type of workflow failure that triggered checkpoint."""
|
|
189
|
+
|
|
190
|
+
ESCALATION = "escalation" # Guard returned ⊥_fatal
|
|
191
|
+
RMAX_EXHAUSTED = "rmax_exhausted" # Retry budget exhausted
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@dataclass(frozen=True)
|
|
195
|
+
class WorkflowCheckpoint:
|
|
196
|
+
"""
|
|
197
|
+
Immutable checkpoint capturing workflow state at failure.
|
|
198
|
+
|
|
199
|
+
Enables resumption after human amendment by preserving:
|
|
200
|
+
- Original workflow context and configuration
|
|
201
|
+
- Completed steps and their artifacts
|
|
202
|
+
- Failure details for human review
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
# Identity
|
|
206
|
+
checkpoint_id: str # UUID
|
|
207
|
+
workflow_id: str # Original workflow execution ID
|
|
208
|
+
created_at: str # ISO timestamp
|
|
209
|
+
|
|
210
|
+
# Workflow Context
|
|
211
|
+
specification: str # Original Ψ
|
|
212
|
+
constraints: str # Original Ω
|
|
213
|
+
rmax: int # Original retry budget
|
|
214
|
+
|
|
215
|
+
# Completed State
|
|
216
|
+
completed_steps: tuple[str, ...] # guard_ids that passed
|
|
217
|
+
artifact_ids: tuple[tuple[str, str], ...] # (guard_id, artifact_id) pairs
|
|
218
|
+
|
|
219
|
+
# Failure Details
|
|
220
|
+
failure_type: FailureType
|
|
221
|
+
failed_step: str # guard_id where failure occurred
|
|
222
|
+
failed_artifact_id: str | None # Last artifact before failure
|
|
223
|
+
failure_feedback: str # Error/feedback message
|
|
224
|
+
provenance_ids: tuple[str, ...] # Artifact IDs of all failed attempts
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class AmendmentType(Enum):
|
|
228
|
+
"""Type of human amendment."""
|
|
229
|
+
|
|
230
|
+
ARTIFACT = "artifact" # Human provides new artifact content
|
|
231
|
+
FEEDBACK = "feedback" # Human provides additional guidance for LLM retry
|
|
232
|
+
SKIP = "skip" # Human approves skipping this step (for optional steps)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@dataclass(frozen=True)
|
|
236
|
+
class HumanAmendment:
|
|
237
|
+
"""
|
|
238
|
+
Immutable record of human intervention in a workflow.
|
|
239
|
+
|
|
240
|
+
Creates a link in the DAG provenance chain from the failed artifact
|
|
241
|
+
to the human-provided amendment.
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
# Identity
|
|
245
|
+
amendment_id: str # UUID
|
|
246
|
+
checkpoint_id: str # Links to WorkflowCheckpoint
|
|
247
|
+
amendment_type: AmendmentType
|
|
248
|
+
created_at: str # ISO timestamp
|
|
249
|
+
created_by: str # Human identifier (e.g., username, "cli")
|
|
250
|
+
|
|
251
|
+
# Content
|
|
252
|
+
content: str # Human-provided artifact or feedback
|
|
253
|
+
context: str = "" # Additional context/clarification
|
|
254
|
+
|
|
255
|
+
# Provenance
|
|
256
|
+
parent_artifact_id: str | None = None # Links to failed artifact in DAG
|
|
257
|
+
|
|
258
|
+
# Resume Options
|
|
259
|
+
additional_rmax: int = 0 # Extra retries beyond original budget
|
atomicguard/guards/__init__.py
CHANGED
|
@@ -3,17 +3,28 @@ Guards for the Dual-State Framework.
|
|
|
3
3
|
|
|
4
4
|
Guards are deterministic validators that return ⊤ (pass) or ⊥ (fail with feedback).
|
|
5
5
|
They can be composed using CompositeGuard for layered validation.
|
|
6
|
+
|
|
7
|
+
Organization by validation profile:
|
|
8
|
+
- static/: Pure AST-based validation (no execution)
|
|
9
|
+
- dynamic/: Subprocess-based validation (test execution)
|
|
10
|
+
- interactive/: Human-in-loop validation
|
|
11
|
+
- composite/: Guard composition patterns
|
|
6
12
|
"""
|
|
7
13
|
|
|
8
|
-
from atomicguard.guards.
|
|
9
|
-
from atomicguard.guards.
|
|
10
|
-
from atomicguard.guards.
|
|
11
|
-
from atomicguard.guards.
|
|
14
|
+
from atomicguard.guards.composite import CompositeGuard
|
|
15
|
+
from atomicguard.guards.dynamic import DynamicTestGuard, TestGuard
|
|
16
|
+
from atomicguard.guards.interactive import HumanReviewGuard
|
|
17
|
+
from atomicguard.guards.static import ImportGuard, SyntaxGuard
|
|
12
18
|
|
|
13
19
|
__all__ = [
|
|
14
|
-
|
|
20
|
+
# Static guards (pure, fast)
|
|
15
21
|
"SyntaxGuard",
|
|
22
|
+
"ImportGuard",
|
|
23
|
+
# Dynamic guards (subprocess-based)
|
|
16
24
|
"TestGuard",
|
|
17
25
|
"DynamicTestGuard",
|
|
26
|
+
# Interactive guards (human-in-loop)
|
|
18
27
|
"HumanReviewGuard",
|
|
28
|
+
# Composition patterns
|
|
29
|
+
"CompositeGuard",
|
|
19
30
|
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dynamic guards - Subprocess-based validation with code execution.
|
|
3
|
+
|
|
4
|
+
These guards run code in isolated subprocesses for safety.
|
|
5
|
+
They are slower but can validate runtime behavior.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from atomicguard.guards.dynamic.test_runner import DynamicTestGuard, TestGuard
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"DynamicTestGuard",
|
|
12
|
+
"TestGuard",
|
|
13
|
+
]
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test execution guards.
|
|
3
|
+
|
|
4
|
+
Guards that validate artifacts by running tests against them.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import multiprocessing
|
|
8
|
+
import sys
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from atomicguard.domain.interfaces import GuardInterface
|
|
12
|
+
from atomicguard.domain.models import Artifact, GuardResult
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestGuard(GuardInterface):
|
|
16
|
+
"""
|
|
17
|
+
Validates artifact via test execution in the same process.
|
|
18
|
+
|
|
19
|
+
Simple guard that executes test code against artifact content.
|
|
20
|
+
For isolation, use DynamicTestGuard instead.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, test_code: str | None = None):
|
|
24
|
+
"""
|
|
25
|
+
Args:
|
|
26
|
+
test_code: Static test code to run (if not using dependencies)
|
|
27
|
+
"""
|
|
28
|
+
self._static_test_code = test_code
|
|
29
|
+
|
|
30
|
+
def validate(self, artifact: Artifact, **deps: Any) -> GuardResult:
|
|
31
|
+
"""
|
|
32
|
+
Execute test code against artifact.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
artifact: The implementation artifact to test
|
|
36
|
+
**deps: May include 'test' artifact with test code
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
GuardResult with test outcome
|
|
40
|
+
"""
|
|
41
|
+
# Auto-detect first dependency (test guards typically have exactly one)
|
|
42
|
+
test_artifact = next(iter(deps.values()), None) if deps else None
|
|
43
|
+
test_code = test_artifact.content if test_artifact else self._static_test_code
|
|
44
|
+
|
|
45
|
+
if not test_code:
|
|
46
|
+
return GuardResult(passed=False, feedback="No test code provided")
|
|
47
|
+
|
|
48
|
+
namespace: dict[str, Any] = {}
|
|
49
|
+
try:
|
|
50
|
+
exec(artifact.content, namespace)
|
|
51
|
+
exec(test_code, namespace)
|
|
52
|
+
return GuardResult(passed=True)
|
|
53
|
+
except AssertionError as e:
|
|
54
|
+
return GuardResult(passed=False, feedback=f"Test failed: {e}")
|
|
55
|
+
except Exception as e:
|
|
56
|
+
return GuardResult(passed=False, feedback=f"{type(e).__name__}: {e}")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class DynamicTestGuard(GuardInterface):
|
|
60
|
+
"""
|
|
61
|
+
Runs test code against implementation in isolated subprocess.
|
|
62
|
+
|
|
63
|
+
Can receive test code from:
|
|
64
|
+
1. Constructor parameter (test_code) - for config-driven workflows
|
|
65
|
+
2. Dependency artifact (deps['test']) - for multi-step TDD workflows
|
|
66
|
+
|
|
67
|
+
Executes tests and returns pass/fail with detailed feedback.
|
|
68
|
+
|
|
69
|
+
Uses multiprocessing for isolation to prevent test code from
|
|
70
|
+
affecting the parent process.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, timeout: float = 60.0, test_code: str | None = None):
|
|
74
|
+
"""
|
|
75
|
+
Args:
|
|
76
|
+
timeout: Maximum time in seconds to wait for test execution
|
|
77
|
+
test_code: Static test code to run (if not using dependencies)
|
|
78
|
+
"""
|
|
79
|
+
self.timeout = timeout
|
|
80
|
+
self._static_test_code = test_code
|
|
81
|
+
|
|
82
|
+
def validate(self, artifact: Artifact, **deps: Any) -> GuardResult:
|
|
83
|
+
"""
|
|
84
|
+
Run tests in isolated subprocess.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
artifact: The implementation artifact to test
|
|
88
|
+
**deps: May include 'test' artifact with test code
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
GuardResult with test outcome
|
|
92
|
+
"""
|
|
93
|
+
# Auto-detect first dependency (test guards typically have exactly one)
|
|
94
|
+
test_artifact = next(iter(deps.values()), None) if deps else None
|
|
95
|
+
test_code = test_artifact.content if test_artifact else self._static_test_code
|
|
96
|
+
|
|
97
|
+
if not test_code:
|
|
98
|
+
return GuardResult(
|
|
99
|
+
passed=False,
|
|
100
|
+
feedback="No test code provided (via dependency or config)",
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
q: multiprocessing.Queue = multiprocessing.Queue()
|
|
104
|
+
p = multiprocessing.Process(
|
|
105
|
+
target=self._run_tests, args=(artifact.content, test_code, q)
|
|
106
|
+
)
|
|
107
|
+
p.start()
|
|
108
|
+
p.join(self.timeout)
|
|
109
|
+
|
|
110
|
+
if p.is_alive():
|
|
111
|
+
p.terminate()
|
|
112
|
+
p.join()
|
|
113
|
+
return GuardResult(
|
|
114
|
+
passed=False,
|
|
115
|
+
feedback=f"Timeout: Test execution exceeded {self.timeout}s",
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
if not q.empty():
|
|
119
|
+
passed, msg = q.get()
|
|
120
|
+
return GuardResult(passed=passed, feedback=msg)
|
|
121
|
+
return GuardResult(passed=False, feedback="Test execution crashed")
|
|
122
|
+
|
|
123
|
+
def _run_tests(self, impl_code: str, test_code: str, q: Any) -> None:
|
|
124
|
+
"""
|
|
125
|
+
Execute tests using pytest in an isolated temp directory.
|
|
126
|
+
|
|
127
|
+
This method runs in a forked process for isolation.
|
|
128
|
+
Supports pytest classes, fixtures, and parameterized tests.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
impl_code: The implementation code to test
|
|
132
|
+
test_code: The test code to run against the implementation
|
|
133
|
+
q: Queue to send results back to parent process
|
|
134
|
+
"""
|
|
135
|
+
import os
|
|
136
|
+
import tempfile
|
|
137
|
+
from io import StringIO
|
|
138
|
+
|
|
139
|
+
if not impl_code:
|
|
140
|
+
q.put((False, "No implementation code"))
|
|
141
|
+
return
|
|
142
|
+
|
|
143
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
144
|
+
# Write implementation as importable module
|
|
145
|
+
impl_path = os.path.join(tmpdir, "implementation.py")
|
|
146
|
+
with open(impl_path, "w") as f:
|
|
147
|
+
f.write(impl_code)
|
|
148
|
+
|
|
149
|
+
# Write test file
|
|
150
|
+
test_path = os.path.join(tmpdir, "test_generated.py")
|
|
151
|
+
with open(test_path, "w") as f:
|
|
152
|
+
f.write(test_code)
|
|
153
|
+
|
|
154
|
+
# Add tmpdir to sys.path for imports
|
|
155
|
+
sys.path.insert(0, tmpdir)
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
import pytest
|
|
159
|
+
|
|
160
|
+
# Capture pytest output
|
|
161
|
+
captured_output = StringIO()
|
|
162
|
+
|
|
163
|
+
class OutputCapture:
|
|
164
|
+
"""Pytest plugin to capture failure output."""
|
|
165
|
+
|
|
166
|
+
@pytest.hookimpl(hookwrapper=True)
|
|
167
|
+
def pytest_runtest_logreport(self, report: Any) -> Any:
|
|
168
|
+
yield
|
|
169
|
+
if report.failed:
|
|
170
|
+
captured_output.write(
|
|
171
|
+
f"{report.nodeid}: {report.longreprtext}\n"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Run pytest
|
|
175
|
+
exit_code = pytest.main(
|
|
176
|
+
[
|
|
177
|
+
test_path,
|
|
178
|
+
"-v",
|
|
179
|
+
"--tb=short",
|
|
180
|
+
"-q",
|
|
181
|
+
"--no-header",
|
|
182
|
+
],
|
|
183
|
+
plugins=[OutputCapture()],
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if exit_code == pytest.ExitCode.OK:
|
|
187
|
+
q.put((True, "All tests passed"))
|
|
188
|
+
elif exit_code == pytest.ExitCode.NO_TESTS_COLLECTED:
|
|
189
|
+
q.put((False, "No tests collected by pytest"))
|
|
190
|
+
else:
|
|
191
|
+
output = captured_output.getvalue()
|
|
192
|
+
if output:
|
|
193
|
+
q.put((False, f"Test failures:\n{output}"))
|
|
194
|
+
else:
|
|
195
|
+
q.put((False, f"pytest exited with code {exit_code}"))
|
|
196
|
+
|
|
197
|
+
except SyntaxError as e:
|
|
198
|
+
q.put((False, f"Syntax error: {e}"))
|
|
199
|
+
except Exception as e:
|
|
200
|
+
q.put((False, f"pytest execution error: {type(e).__name__}: {e}"))
|
|
201
|
+
finally:
|
|
202
|
+
# Clean up sys.path
|
|
203
|
+
if tmpdir in sys.path:
|
|
204
|
+
sys.path.remove(tmpdir)
|
|
205
|
+
# Clean up implementation module if loaded
|
|
206
|
+
if "implementation" in sys.modules:
|
|
207
|
+
del sys.modules["implementation"]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Static guards - Pure AST-based validation with no side effects.
|
|
3
|
+
|
|
4
|
+
These guards are fast, deterministic, and do not execute code.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from atomicguard.guards.static.imports import ImportGuard
|
|
8
|
+
from atomicguard.guards.static.syntax import SyntaxGuard
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"ImportGuard",
|
|
12
|
+
"SyntaxGuard",
|
|
13
|
+
]
|