atomicguard 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atomicguard/__init__.py +116 -0
- atomicguard/application/__init__.py +16 -0
- atomicguard/application/action_pair.py +65 -0
- atomicguard/application/agent.py +129 -0
- atomicguard/application/workflow.py +149 -0
- atomicguard/domain/__init__.py +51 -0
- atomicguard/domain/exceptions.py +28 -0
- atomicguard/domain/interfaces.py +119 -0
- atomicguard/domain/models.py +145 -0
- atomicguard/domain/prompts.py +85 -0
- atomicguard/guards/__init__.py +19 -0
- atomicguard/guards/base.py +41 -0
- atomicguard/guards/human.py +85 -0
- atomicguard/guards/syntax.py +33 -0
- atomicguard/guards/test_runner.py +176 -0
- atomicguard/infrastructure/__init__.py +23 -0
- atomicguard/infrastructure/llm/__init__.py +11 -0
- atomicguard/infrastructure/llm/mock.py +61 -0
- atomicguard/infrastructure/llm/ollama.py +132 -0
- atomicguard/infrastructure/persistence/__init__.py +11 -0
- atomicguard/infrastructure/persistence/filesystem.py +232 -0
- atomicguard/infrastructure/persistence/memory.py +39 -0
- atomicguard-0.1.0.dist-info/METADATA +137 -0
- atomicguard-0.1.0.dist-info/RECORD +27 -0
- atomicguard-0.1.0.dist-info/WHEEL +5 -0
- atomicguard-0.1.0.dist-info/licenses/LICENSE +21 -0
- atomicguard-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Domain models for the Dual-State Framework.
|
|
3
|
+
|
|
4
|
+
These are pure data structures aligned with paper Definitions 4-6.
|
|
5
|
+
All models are immutable (frozen dataclasses) to ensure referential transparency.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from atomicguard.domain.interfaces import ArtifactDAGInterface
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# =============================================================================
|
|
17
|
+
# ARTIFACT MODEL (Definition 4-6)
|
|
18
|
+
# =============================================================================
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ArtifactStatus(Enum):
|
|
22
|
+
"""Status of an artifact in the DAG."""
|
|
23
|
+
|
|
24
|
+
PENDING = "pending" # Generated, not yet validated
|
|
25
|
+
REJECTED = "rejected" # Guard returned ⊥
|
|
26
|
+
ACCEPTED = "accepted" # Guard returned ⊤, final for this step
|
|
27
|
+
SUPERSEDED = "superseded" # Guard returned ⊤, but later attempt also passed
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class FeedbackEntry:
|
|
32
|
+
"""Single entry in feedback history H."""
|
|
33
|
+
|
|
34
|
+
artifact_id: str # Reference to the rejected artifact
|
|
35
|
+
feedback: str # Guard's rejection message φ
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class ContextSnapshot:
|
|
40
|
+
"""Immutable context C that conditioned generation (Definition 5)."""
|
|
41
|
+
|
|
42
|
+
specification: str # Ψ - static specification
|
|
43
|
+
constraints: str # Ω - global constraints
|
|
44
|
+
feedback_history: tuple[FeedbackEntry, ...] # H - accumulated rejections
|
|
45
|
+
dependency_ids: tuple[str, ...] # Artifact IDs from prior workflow steps
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(frozen=True)
|
|
49
|
+
class Artifact:
|
|
50
|
+
"""
|
|
51
|
+
Immutable node in the Versioned Repository DAG (Definition 4).
|
|
52
|
+
|
|
53
|
+
Represents a single generation attempt with full provenance tracking.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
# Identity
|
|
57
|
+
artifact_id: str # Unique identifier (UUID)
|
|
58
|
+
content: str # The generated code/text
|
|
59
|
+
|
|
60
|
+
# DAG Structure
|
|
61
|
+
previous_attempt_id: str | None # Retry chain within same action pair
|
|
62
|
+
# Cross-step deps are in context.dependency_ids
|
|
63
|
+
|
|
64
|
+
# Action Pair Coupling (Definition 6: A = ⟨ρ, a_gen, G⟩)
|
|
65
|
+
action_pair_id: str # Which action pair produced this
|
|
66
|
+
|
|
67
|
+
# Metadata
|
|
68
|
+
created_at: str # ISO timestamp
|
|
69
|
+
attempt_number: int # Attempt within this action pair context
|
|
70
|
+
status: ArtifactStatus # pending/rejected/accepted/superseded
|
|
71
|
+
guard_result: bool | None # ⊤ or ⊥ (None if pending)
|
|
72
|
+
feedback: str # φ - guard feedback (empty if passed)
|
|
73
|
+
context: ContextSnapshot # Full context snapshot at generation time
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# =============================================================================
|
|
77
|
+
# GUARD RESULT
|
|
78
|
+
# =============================================================================
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass(frozen=True)
|
|
82
|
+
class GuardResult:
|
|
83
|
+
"""Immutable guard validation outcome."""
|
|
84
|
+
|
|
85
|
+
passed: bool
|
|
86
|
+
feedback: str = ""
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# =============================================================================
|
|
90
|
+
# CONTEXT AND ENVIRONMENT
|
|
91
|
+
# =============================================================================
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass(frozen=True)
|
|
95
|
+
class AmbientEnvironment:
|
|
96
|
+
"""Ambient Environment E = ⟨R, Ω⟩"""
|
|
97
|
+
|
|
98
|
+
repository: "ArtifactDAGInterface"
|
|
99
|
+
constraints: str = ""
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass(frozen=True)
|
|
103
|
+
class Context:
|
|
104
|
+
"""Immutable hierarchical context composition (Definition 5)."""
|
|
105
|
+
|
|
106
|
+
ambient: AmbientEnvironment
|
|
107
|
+
specification: str
|
|
108
|
+
current_artifact: str | None = None
|
|
109
|
+
feedback_history: tuple[tuple[str, str], ...] = ()
|
|
110
|
+
dependencies: tuple[
|
|
111
|
+
tuple[str, "Artifact"], ...
|
|
112
|
+
] = () # (key, artifact) pairs from prior steps
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# =============================================================================
|
|
116
|
+
# WORKFLOW STATE
|
|
117
|
+
# =============================================================================
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass
|
|
121
|
+
class WorkflowState:
|
|
122
|
+
"""Mutable workflow state tracking guard satisfaction."""
|
|
123
|
+
|
|
124
|
+
guards: dict[str, bool] = field(default_factory=dict)
|
|
125
|
+
artifact_ids: dict[str, str] = field(default_factory=dict)
|
|
126
|
+
|
|
127
|
+
def is_satisfied(self, guard_id: str) -> bool:
|
|
128
|
+
return self.guards.get(guard_id, False)
|
|
129
|
+
|
|
130
|
+
def satisfy(self, guard_id: str, artifact_id: str) -> None:
|
|
131
|
+
self.guards[guard_id] = True
|
|
132
|
+
self.artifact_ids[guard_id] = artifact_id
|
|
133
|
+
|
|
134
|
+
def get_artifact_id(self, guard_id: str) -> str | None:
|
|
135
|
+
return self.artifact_ids.get(guard_id)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@dataclass(frozen=True)
|
|
139
|
+
class WorkflowResult:
|
|
140
|
+
"""Result of workflow execution."""
|
|
141
|
+
|
|
142
|
+
success: bool
|
|
143
|
+
artifacts: dict[str, Artifact]
|
|
144
|
+
failed_step: str | None = None
|
|
145
|
+
provenance: tuple[tuple[Artifact, str], ...] = ()
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Prompt and task definitions for the Dual-State Framework.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- PromptTemplate: Structured prompt rendering
|
|
6
|
+
- StepDefinition: Single workflow step specification
|
|
7
|
+
- TaskDefinition: Complete task with multiple steps
|
|
8
|
+
|
|
9
|
+
These are domain structures (schemas) only. Actual task content should be
|
|
10
|
+
defined by the calling application (e.g., benchmarks), not hardcoded here.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from typing import TYPE_CHECKING
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from atomicguard.domain.models import Context
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# =============================================================================
|
|
21
|
+
# PROMPT TEMPLATE (moved from models.py)
|
|
22
|
+
# =============================================================================
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class PromptTemplate:
|
|
27
|
+
"""Structured prompt template for generator."""
|
|
28
|
+
|
|
29
|
+
role: str
|
|
30
|
+
constraints: str
|
|
31
|
+
task: str
|
|
32
|
+
feedback_wrapper: str = (
|
|
33
|
+
"GUARD REJECTION:\n{feedback}\nInstruction: Address the rejection above."
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def render(self, context: "Context") -> str:
|
|
37
|
+
"""Render prompt with context."""
|
|
38
|
+
parts = [
|
|
39
|
+
f"# ROLE\n{self.role}",
|
|
40
|
+
f"# CONSTRAINTS\n{self.constraints}",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
if context.ambient.constraints:
|
|
44
|
+
parts.append(f"# CONTEXT\n{context.ambient.constraints}")
|
|
45
|
+
|
|
46
|
+
if context.feedback_history:
|
|
47
|
+
parts.append("# HISTORY (Context Refinement)")
|
|
48
|
+
for i, (_artifact_content, feedback) in enumerate(context.feedback_history):
|
|
49
|
+
wrapped = self.feedback_wrapper.format(feedback=feedback)
|
|
50
|
+
parts.append(f"--- Attempt {i + 1} ---\n{wrapped}")
|
|
51
|
+
|
|
52
|
+
parts.append(f"# TASK\n{self.task}")
|
|
53
|
+
return "\n\n".join(parts)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# =============================================================================
|
|
57
|
+
# TASK DEFINITIONS (DS-PDDL semantic layer)
|
|
58
|
+
# =============================================================================
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass(frozen=True)
|
|
62
|
+
class StepDefinition:
|
|
63
|
+
"""Single workflow step specification."""
|
|
64
|
+
|
|
65
|
+
step_id: str # e.g., "g_test", "g_impl"
|
|
66
|
+
prompt: str # Prompt template with {placeholders}
|
|
67
|
+
guard: str # Guard type: "syntax", "dynamic_test", "human", etc.
|
|
68
|
+
requires: tuple[str, ...] = () # Step IDs this depends on
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass(frozen=True)
|
|
72
|
+
class TaskDefinition:
|
|
73
|
+
"""Complete task definition with multiple workflow steps."""
|
|
74
|
+
|
|
75
|
+
task_id: str # e.g., "tdd_stack"
|
|
76
|
+
name: str # Human-readable name
|
|
77
|
+
specification: str # High-level task description (Ψ)
|
|
78
|
+
steps: tuple[StepDefinition, ...] # Ordered workflow steps
|
|
79
|
+
|
|
80
|
+
def get_step(self, step_id: str) -> StepDefinition | None:
|
|
81
|
+
"""Get a step by ID."""
|
|
82
|
+
for step in self.steps:
|
|
83
|
+
if step.step_id == step_id:
|
|
84
|
+
return step
|
|
85
|
+
return None
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Guards for the Dual-State Framework.
|
|
3
|
+
|
|
4
|
+
Guards are deterministic validators that return ⊤ (pass) or ⊥ (fail with feedback).
|
|
5
|
+
They can be composed using CompositeGuard for layered validation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from atomicguard.guards.base import CompositeGuard
|
|
9
|
+
from atomicguard.guards.human import HumanReviewGuard
|
|
10
|
+
from atomicguard.guards.syntax import SyntaxGuard
|
|
11
|
+
from atomicguard.guards.test_runner import DynamicTestGuard, TestGuard
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"CompositeGuard",
|
|
15
|
+
"SyntaxGuard",
|
|
16
|
+
"TestGuard",
|
|
17
|
+
"DynamicTestGuard",
|
|
18
|
+
"HumanReviewGuard",
|
|
19
|
+
]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base guard implementations and composition patterns.
|
|
3
|
+
|
|
4
|
+
CompositeGuard implements the Decorator pattern for guard composition.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from atomicguard.domain.interfaces import GuardInterface
|
|
10
|
+
from atomicguard.domain.models import Artifact, GuardResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CompositeGuard(GuardInterface):
|
|
14
|
+
"""
|
|
15
|
+
Logical AND of multiple guards. All must pass.
|
|
16
|
+
|
|
17
|
+
Evaluates guards in order, short-circuits on first failure.
|
|
18
|
+
This ensures automated checks run before human review.
|
|
19
|
+
|
|
20
|
+
Per paper section on Composite Guards:
|
|
21
|
+
G_composite = G_automated ∧ G_human
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, *guards: GuardInterface):
|
|
25
|
+
"""
|
|
26
|
+
Args:
|
|
27
|
+
*guards: Guards to compose (evaluated in order)
|
|
28
|
+
"""
|
|
29
|
+
self.guards = guards
|
|
30
|
+
|
|
31
|
+
def validate(self, artifact: Artifact, **deps: Any) -> GuardResult:
|
|
32
|
+
"""
|
|
33
|
+
Validate artifact against all composed guards.
|
|
34
|
+
|
|
35
|
+
Short-circuits on first failure.
|
|
36
|
+
"""
|
|
37
|
+
for guard in self.guards:
|
|
38
|
+
result = guard.validate(artifact, **deps)
|
|
39
|
+
if not result.passed:
|
|
40
|
+
return result # Short-circuit on failure
|
|
41
|
+
return GuardResult(passed=True, feedback="All guards passed")
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Human-in-the-loop review guard.
|
|
3
|
+
|
|
4
|
+
Blocks workflow until human approval via CLI prompts.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
from rich.prompt import Prompt
|
|
11
|
+
from rich.syntax import Syntax
|
|
12
|
+
|
|
13
|
+
from atomicguard.domain.interfaces import GuardInterface
|
|
14
|
+
from atomicguard.domain.models import Artifact, GuardResult
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class HumanReviewGuard(GuardInterface):
|
|
18
|
+
"""
|
|
19
|
+
Blocks workflow until human approval.
|
|
20
|
+
|
|
21
|
+
Per paper Phase 8 (Human Oversight):
|
|
22
|
+
- Pauses workflow to poll external oracle (human)
|
|
23
|
+
- Returns approval or rejection with feedback
|
|
24
|
+
- Feedback flows back to generator for retry
|
|
25
|
+
|
|
26
|
+
This implementation uses synchronous CLI prompts.
|
|
27
|
+
For async/distributed use, extend with file-based or webhook polling.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, prompt_title: str = "HUMAN REVIEW REQUIRED"):
|
|
31
|
+
"""
|
|
32
|
+
Args:
|
|
33
|
+
prompt_title: Title displayed in the review prompt
|
|
34
|
+
"""
|
|
35
|
+
self.prompt_title = prompt_title
|
|
36
|
+
self.console = Console()
|
|
37
|
+
|
|
38
|
+
def validate(self, artifact: Artifact, **deps: Any) -> GuardResult:
|
|
39
|
+
"""
|
|
40
|
+
Display artifact and prompt for human approval.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
artifact: The artifact to review
|
|
44
|
+
**deps: Dependencies shown for context
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
GuardResult based on human decision
|
|
48
|
+
"""
|
|
49
|
+
self.console.print(f"\n[bold yellow]═══ {self.prompt_title} ═══[/bold yellow]")
|
|
50
|
+
self.console.print(f"[dim]Artifact ID: {artifact.artifact_id}[/dim]")
|
|
51
|
+
self.console.print(f"[dim]Action Pair: {artifact.action_pair_id}[/dim]\n")
|
|
52
|
+
|
|
53
|
+
# Display the artifact content with syntax highlighting
|
|
54
|
+
self.console.print(
|
|
55
|
+
Syntax(artifact.content, "python", theme="monokai", line_numbers=True)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Show dependencies if present
|
|
59
|
+
if deps:
|
|
60
|
+
self.console.print("\n[dim]Dependencies:[/dim]")
|
|
61
|
+
for key, dep_artifact in deps.items():
|
|
62
|
+
self.console.print(f" [dim]{key}: {dep_artifact.artifact_id}[/dim]")
|
|
63
|
+
|
|
64
|
+
# Prompt for decision
|
|
65
|
+
decision = Prompt.ask(
|
|
66
|
+
"\n[bold]Approve this artifact?[/bold]", choices=["y", "n", "v"]
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if decision == "v":
|
|
70
|
+
# View more context
|
|
71
|
+
self.console.print("\n[dim]Context:[/dim]")
|
|
72
|
+
self.console.print(
|
|
73
|
+
f" Specification: {artifact.context.specification[:200]}..."
|
|
74
|
+
)
|
|
75
|
+
if artifact.context.feedback_history:
|
|
76
|
+
self.console.print(
|
|
77
|
+
f" Previous failures: {len(artifact.context.feedback_history)}"
|
|
78
|
+
)
|
|
79
|
+
decision = Prompt.ask("\n[bold]Approve?[/bold]", choices=["y", "n"])
|
|
80
|
+
|
|
81
|
+
if decision == "y":
|
|
82
|
+
return GuardResult(passed=True, feedback="Human approved")
|
|
83
|
+
else:
|
|
84
|
+
feedback = Prompt.ask("[bold]Rejection reason[/bold]")
|
|
85
|
+
return GuardResult(passed=False, feedback=f"Human rejected: {feedback}")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Syntax validation guard.
|
|
3
|
+
|
|
4
|
+
Pure guard with no I/O dependencies - validates Python AST.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import ast
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from atomicguard.domain.interfaces import GuardInterface
|
|
11
|
+
from atomicguard.domain.models import Artifact, GuardResult
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SyntaxGuard(GuardInterface):
|
|
15
|
+
"""
|
|
16
|
+
Validates Python syntax using AST parsing.
|
|
17
|
+
|
|
18
|
+
This is a pure guard with no I/O - it only validates
|
|
19
|
+
that the artifact content is syntactically valid Python.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def validate(self, artifact: Artifact, **_deps: Any) -> GuardResult:
|
|
23
|
+
"""
|
|
24
|
+
Parse artifact content as Python AST.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
GuardResult with passed=True if syntax is valid
|
|
28
|
+
"""
|
|
29
|
+
try:
|
|
30
|
+
ast.parse(artifact.content)
|
|
31
|
+
return GuardResult(passed=True, feedback="Syntax valid")
|
|
32
|
+
except SyntaxError as e:
|
|
33
|
+
return GuardResult(passed=False, feedback=f"Syntax error: {e}")
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test execution guards.
|
|
3
|
+
|
|
4
|
+
Guards that validate artifacts by running tests against them.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import multiprocessing
|
|
8
|
+
import sys
|
|
9
|
+
import types
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from atomicguard.domain.interfaces import GuardInterface
|
|
13
|
+
from atomicguard.domain.models import Artifact, GuardResult
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TestGuard(GuardInterface):
|
|
17
|
+
"""
|
|
18
|
+
Validates artifact via test execution in the same process.
|
|
19
|
+
|
|
20
|
+
Simple guard that executes test code against artifact content.
|
|
21
|
+
For isolation, use DynamicTestGuard instead.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, test_code: str | None = None):
|
|
25
|
+
"""
|
|
26
|
+
Args:
|
|
27
|
+
test_code: Static test code to run (if not using dependencies)
|
|
28
|
+
"""
|
|
29
|
+
self._static_test_code = test_code
|
|
30
|
+
|
|
31
|
+
def validate(self, artifact: Artifact, **deps: Any) -> GuardResult:
|
|
32
|
+
"""
|
|
33
|
+
Execute test code against artifact.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
artifact: The implementation artifact to test
|
|
37
|
+
**deps: May include 'test' artifact with test code
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
GuardResult with test outcome
|
|
41
|
+
"""
|
|
42
|
+
test_artifact = deps.get("test")
|
|
43
|
+
test_code = test_artifact.content if test_artifact else self._static_test_code
|
|
44
|
+
|
|
45
|
+
if not test_code:
|
|
46
|
+
return GuardResult(passed=False, feedback="No test code provided")
|
|
47
|
+
|
|
48
|
+
namespace: dict[str, Any] = {}
|
|
49
|
+
try:
|
|
50
|
+
exec(artifact.content, namespace)
|
|
51
|
+
exec(test_code, namespace)
|
|
52
|
+
return GuardResult(passed=True)
|
|
53
|
+
except AssertionError as e:
|
|
54
|
+
return GuardResult(passed=False, feedback=f"Test failed: {e}")
|
|
55
|
+
except Exception as e:
|
|
56
|
+
return GuardResult(passed=False, feedback=f"{type(e).__name__}: {e}")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class DynamicTestGuard(GuardInterface):
|
|
60
|
+
"""
|
|
61
|
+
Runs test code against implementation in isolated subprocess.
|
|
62
|
+
|
|
63
|
+
Expects 'test' dependency containing the test artifact.
|
|
64
|
+
Executes tests and returns pass/fail with detailed feedback.
|
|
65
|
+
|
|
66
|
+
Uses multiprocessing for isolation to prevent test code from
|
|
67
|
+
affecting the parent process.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def __init__(self, timeout: float = 60.0):
|
|
71
|
+
"""
|
|
72
|
+
Args:
|
|
73
|
+
timeout: Maximum time in seconds to wait for test execution
|
|
74
|
+
"""
|
|
75
|
+
self.timeout = timeout
|
|
76
|
+
|
|
77
|
+
def validate(self, artifact: Artifact, **deps: Any) -> GuardResult:
|
|
78
|
+
"""
|
|
79
|
+
Run tests in isolated subprocess.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
artifact: The implementation artifact to test
|
|
83
|
+
**deps: Must include 'test' artifact with test code
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
GuardResult with test outcome
|
|
87
|
+
"""
|
|
88
|
+
test_artifact = deps.get("test")
|
|
89
|
+
if not test_artifact:
|
|
90
|
+
return GuardResult(
|
|
91
|
+
passed=False, feedback="No test artifact in dependencies"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
q: multiprocessing.Queue = multiprocessing.Queue()
|
|
95
|
+
p = multiprocessing.Process(
|
|
96
|
+
target=self._run_tests, args=(artifact, test_artifact, q)
|
|
97
|
+
)
|
|
98
|
+
p.start()
|
|
99
|
+
p.join(self.timeout)
|
|
100
|
+
|
|
101
|
+
if p.is_alive():
|
|
102
|
+
p.terminate()
|
|
103
|
+
p.join()
|
|
104
|
+
return GuardResult(
|
|
105
|
+
passed=False,
|
|
106
|
+
feedback=f"Timeout: Test execution exceeded {self.timeout}s",
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
if not q.empty():
|
|
110
|
+
passed, msg = q.get()
|
|
111
|
+
return GuardResult(passed=passed, feedback=msg)
|
|
112
|
+
return GuardResult(passed=False, feedback="Test execution crashed")
|
|
113
|
+
|
|
114
|
+
def _run_tests(
|
|
115
|
+
self, impl_artifact: Artifact, test_artifact: Artifact, q: Any
|
|
116
|
+
) -> None:
|
|
117
|
+
"""
|
|
118
|
+
Execute tests in subprocess.
|
|
119
|
+
|
|
120
|
+
This method runs in a forked process for isolation.
|
|
121
|
+
"""
|
|
122
|
+
try:
|
|
123
|
+
impl_code = impl_artifact.content
|
|
124
|
+
test_code = test_artifact.content
|
|
125
|
+
|
|
126
|
+
if not impl_code:
|
|
127
|
+
q.put((False, "No implementation code"))
|
|
128
|
+
return
|
|
129
|
+
|
|
130
|
+
if not test_code:
|
|
131
|
+
q.put((False, "No test code"))
|
|
132
|
+
return
|
|
133
|
+
|
|
134
|
+
# Create mock 'implementation' module
|
|
135
|
+
impl_module = types.ModuleType("implementation")
|
|
136
|
+
exec(impl_code, impl_module.__dict__)
|
|
137
|
+
sys.modules["implementation"] = impl_module
|
|
138
|
+
|
|
139
|
+
# Execute test code (pytest already in sys.modules from parent)
|
|
140
|
+
import pytest
|
|
141
|
+
|
|
142
|
+
test_scope = {"__builtins__": __builtins__, "pytest": pytest}
|
|
143
|
+
exec(test_code, test_scope)
|
|
144
|
+
|
|
145
|
+
# Find and run test functions
|
|
146
|
+
test_funcs = [
|
|
147
|
+
v
|
|
148
|
+
for k, v in test_scope.items()
|
|
149
|
+
if k.startswith("test_") and callable(v)
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
if not test_funcs:
|
|
153
|
+
q.put((False, "No test functions found"))
|
|
154
|
+
return
|
|
155
|
+
|
|
156
|
+
failures = []
|
|
157
|
+
for func in test_funcs:
|
|
158
|
+
try:
|
|
159
|
+
func()
|
|
160
|
+
except AssertionError as e:
|
|
161
|
+
failures.append(f"{func.__name__}: AssertionError - {e}")
|
|
162
|
+
except Exception as e:
|
|
163
|
+
failures.append(f"{func.__name__}: {type(e).__name__} - {e}")
|
|
164
|
+
|
|
165
|
+
if failures:
|
|
166
|
+
q.put((False, "Test failures:\n" + "\n".join(failures)))
|
|
167
|
+
else:
|
|
168
|
+
q.put((True, f"All {len(test_funcs)} tests passed"))
|
|
169
|
+
|
|
170
|
+
except SyntaxError as e:
|
|
171
|
+
q.put((False, f"Syntax error: {e}"))
|
|
172
|
+
except Exception as e:
|
|
173
|
+
q.put((False, f"Execution error: {e}"))
|
|
174
|
+
finally:
|
|
175
|
+
if "implementation" in sys.modules:
|
|
176
|
+
del sys.modules["implementation"]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Infrastructure layer for the Dual-State Framework.
|
|
3
|
+
|
|
4
|
+
Contains adapters for external concerns (persistence, LLMs, etc.).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from atomicguard.infrastructure.llm import (
|
|
8
|
+
MockGenerator,
|
|
9
|
+
OllamaGenerator,
|
|
10
|
+
)
|
|
11
|
+
from atomicguard.infrastructure.persistence import (
|
|
12
|
+
FilesystemArtifactDAG,
|
|
13
|
+
InMemoryArtifactDAG,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
# Persistence
|
|
18
|
+
"InMemoryArtifactDAG",
|
|
19
|
+
"FilesystemArtifactDAG",
|
|
20
|
+
# LLM
|
|
21
|
+
"OllamaGenerator",
|
|
22
|
+
"MockGenerator",
|
|
23
|
+
]
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mock generator for testing without LLM.
|
|
3
|
+
|
|
4
|
+
Returns predefined responses in sequence.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import uuid
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
from atomicguard.domain.interfaces import GeneratorInterface
|
|
11
|
+
from atomicguard.domain.models import Artifact, ArtifactStatus, Context, ContextSnapshot
|
|
12
|
+
from atomicguard.domain.prompts import PromptTemplate
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MockGenerator(GeneratorInterface):
|
|
16
|
+
"""Returns predefined responses for testing."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, responses: list[str]):
|
|
19
|
+
"""
|
|
20
|
+
Args:
|
|
21
|
+
responses: List of response strings to return in sequence
|
|
22
|
+
"""
|
|
23
|
+
self._responses = responses
|
|
24
|
+
self._call_count = 0
|
|
25
|
+
|
|
26
|
+
def generate(
|
|
27
|
+
self, _context: Context, _template: PromptTemplate | None = None
|
|
28
|
+
) -> Artifact:
|
|
29
|
+
"""Return the next predefined response."""
|
|
30
|
+
if self._call_count >= len(self._responses):
|
|
31
|
+
raise RuntimeError("MockGenerator exhausted responses")
|
|
32
|
+
|
|
33
|
+
content = self._responses[self._call_count]
|
|
34
|
+
self._call_count += 1
|
|
35
|
+
|
|
36
|
+
return Artifact(
|
|
37
|
+
artifact_id=str(uuid.uuid4()),
|
|
38
|
+
content=content,
|
|
39
|
+
previous_attempt_id=None,
|
|
40
|
+
action_pair_id="mock",
|
|
41
|
+
created_at=datetime.now().isoformat(),
|
|
42
|
+
attempt_number=self._call_count,
|
|
43
|
+
status=ArtifactStatus.PENDING,
|
|
44
|
+
guard_result=None,
|
|
45
|
+
feedback="",
|
|
46
|
+
context=ContextSnapshot(
|
|
47
|
+
specification="",
|
|
48
|
+
constraints="",
|
|
49
|
+
feedback_history=(),
|
|
50
|
+
dependency_ids=(),
|
|
51
|
+
),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def call_count(self) -> int:
|
|
56
|
+
"""Number of times generate() has been called."""
|
|
57
|
+
return self._call_count
|
|
58
|
+
|
|
59
|
+
def reset(self) -> None:
|
|
60
|
+
"""Reset the call counter to reuse responses."""
|
|
61
|
+
self._call_count = 0
|