atomicguard 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,145 @@
1
+ """
2
+ Domain models for the Dual-State Framework.
3
+
4
+ These are pure data structures aligned with paper Definitions 4-6.
5
+ All models are immutable (frozen dataclasses) to ensure referential transparency.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from enum import Enum
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from atomicguard.domain.interfaces import ArtifactDAGInterface
14
+
15
+
16
+ # =============================================================================
17
+ # ARTIFACT MODEL (Definition 4-6)
18
+ # =============================================================================
19
+
20
+
21
+ class ArtifactStatus(Enum):
22
+ """Status of an artifact in the DAG."""
23
+
24
+ PENDING = "pending" # Generated, not yet validated
25
+ REJECTED = "rejected" # Guard returned ⊥
26
+ ACCEPTED = "accepted" # Guard returned ⊤, final for this step
27
+ SUPERSEDED = "superseded" # Guard returned ⊤, but later attempt also passed
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class FeedbackEntry:
32
+ """Single entry in feedback history H."""
33
+
34
+ artifact_id: str # Reference to the rejected artifact
35
+ feedback: str # Guard's rejection message φ
36
+
37
+
38
+ @dataclass(frozen=True)
39
+ class ContextSnapshot:
40
+ """Immutable context C that conditioned generation (Definition 5)."""
41
+
42
+ specification: str # Ψ - static specification
43
+ constraints: str # Ω - global constraints
44
+ feedback_history: tuple[FeedbackEntry, ...] # H - accumulated rejections
45
+ dependency_ids: tuple[str, ...] # Artifact IDs from prior workflow steps
46
+
47
+
48
+ @dataclass(frozen=True)
49
+ class Artifact:
50
+ """
51
+ Immutable node in the Versioned Repository DAG (Definition 4).
52
+
53
+ Represents a single generation attempt with full provenance tracking.
54
+ """
55
+
56
+ # Identity
57
+ artifact_id: str # Unique identifier (UUID)
58
+ content: str # The generated code/text
59
+
60
+ # DAG Structure
61
+ previous_attempt_id: str | None # Retry chain within same action pair
62
+ # Cross-step deps are in context.dependency_ids
63
+
64
+ # Action Pair Coupling (Definition 6: A = ⟨ρ, a_gen, G⟩)
65
+ action_pair_id: str # Which action pair produced this
66
+
67
+ # Metadata
68
+ created_at: str # ISO timestamp
69
+ attempt_number: int # Attempt within this action pair context
70
+ status: ArtifactStatus # pending/rejected/accepted/superseded
71
+ guard_result: bool | None # ⊤ or ⊥ (None if pending)
72
+ feedback: str # φ - guard feedback (empty if passed)
73
+ context: ContextSnapshot # Full context snapshot at generation time
74
+
75
+
76
+ # =============================================================================
77
+ # GUARD RESULT
78
+ # =============================================================================
79
+
80
+
81
+ @dataclass(frozen=True)
82
+ class GuardResult:
83
+ """Immutable guard validation outcome."""
84
+
85
+ passed: bool
86
+ feedback: str = ""
87
+
88
+
89
+ # =============================================================================
90
+ # CONTEXT AND ENVIRONMENT
91
+ # =============================================================================
92
+
93
+
94
+ @dataclass(frozen=True)
95
+ class AmbientEnvironment:
96
+ """Ambient Environment E = ⟨R, Ω⟩"""
97
+
98
+ repository: "ArtifactDAGInterface"
99
+ constraints: str = ""
100
+
101
+
102
+ @dataclass(frozen=True)
103
+ class Context:
104
+ """Immutable hierarchical context composition (Definition 5)."""
105
+
106
+ ambient: AmbientEnvironment
107
+ specification: str
108
+ current_artifact: str | None = None
109
+ feedback_history: tuple[tuple[str, str], ...] = ()
110
+ dependencies: tuple[
111
+ tuple[str, "Artifact"], ...
112
+ ] = () # (key, artifact) pairs from prior steps
113
+
114
+
115
+ # =============================================================================
116
+ # WORKFLOW STATE
117
+ # =============================================================================
118
+
119
+
120
+ @dataclass
121
+ class WorkflowState:
122
+ """Mutable workflow state tracking guard satisfaction."""
123
+
124
+ guards: dict[str, bool] = field(default_factory=dict)
125
+ artifact_ids: dict[str, str] = field(default_factory=dict)
126
+
127
+ def is_satisfied(self, guard_id: str) -> bool:
128
+ return self.guards.get(guard_id, False)
129
+
130
+ def satisfy(self, guard_id: str, artifact_id: str) -> None:
131
+ self.guards[guard_id] = True
132
+ self.artifact_ids[guard_id] = artifact_id
133
+
134
+ def get_artifact_id(self, guard_id: str) -> str | None:
135
+ return self.artifact_ids.get(guard_id)
136
+
137
+
138
+ @dataclass(frozen=True)
139
+ class WorkflowResult:
140
+ """Result of workflow execution."""
141
+
142
+ success: bool
143
+ artifacts: dict[str, Artifact]
144
+ failed_step: str | None = None
145
+ provenance: tuple[tuple[Artifact, str], ...] = ()
@@ -0,0 +1,85 @@
1
+ """
2
+ Prompt and task definitions for the Dual-State Framework.
3
+
4
+ This module provides:
5
+ - PromptTemplate: Structured prompt rendering
6
+ - StepDefinition: Single workflow step specification
7
+ - TaskDefinition: Complete task with multiple steps
8
+
9
+ These are domain structures (schemas) only. Actual task content should be
10
+ defined by the calling application (e.g., benchmarks), not hardcoded here.
11
+ """
12
+
13
+ from dataclasses import dataclass
14
+ from typing import TYPE_CHECKING
15
+
16
+ if TYPE_CHECKING:
17
+ from atomicguard.domain.models import Context
18
+
19
+
20
+ # =============================================================================
21
+ # PROMPT TEMPLATE (moved from models.py)
22
+ # =============================================================================
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class PromptTemplate:
27
+ """Structured prompt template for generator."""
28
+
29
+ role: str
30
+ constraints: str
31
+ task: str
32
+ feedback_wrapper: str = (
33
+ "GUARD REJECTION:\n{feedback}\nInstruction: Address the rejection above."
34
+ )
35
+
36
+ def render(self, context: "Context") -> str:
37
+ """Render prompt with context."""
38
+ parts = [
39
+ f"# ROLE\n{self.role}",
40
+ f"# CONSTRAINTS\n{self.constraints}",
41
+ ]
42
+
43
+ if context.ambient.constraints:
44
+ parts.append(f"# CONTEXT\n{context.ambient.constraints}")
45
+
46
+ if context.feedback_history:
47
+ parts.append("# HISTORY (Context Refinement)")
48
+ for i, (_artifact_content, feedback) in enumerate(context.feedback_history):
49
+ wrapped = self.feedback_wrapper.format(feedback=feedback)
50
+ parts.append(f"--- Attempt {i + 1} ---\n{wrapped}")
51
+
52
+ parts.append(f"# TASK\n{self.task}")
53
+ return "\n\n".join(parts)
54
+
55
+
56
+ # =============================================================================
57
+ # TASK DEFINITIONS (DS-PDDL semantic layer)
58
+ # =============================================================================
59
+
60
+
61
+ @dataclass(frozen=True)
62
+ class StepDefinition:
63
+ """Single workflow step specification."""
64
+
65
+ step_id: str # e.g., "g_test", "g_impl"
66
+ prompt: str # Prompt template with {placeholders}
67
+ guard: str # Guard type: "syntax", "dynamic_test", "human", etc.
68
+ requires: tuple[str, ...] = () # Step IDs this depends on
69
+
70
+
71
+ @dataclass(frozen=True)
72
+ class TaskDefinition:
73
+ """Complete task definition with multiple workflow steps."""
74
+
75
+ task_id: str # e.g., "tdd_stack"
76
+ name: str # Human-readable name
77
+ specification: str # High-level task description (Ψ)
78
+ steps: tuple[StepDefinition, ...] # Ordered workflow steps
79
+
80
+ def get_step(self, step_id: str) -> StepDefinition | None:
81
+ """Get a step by ID."""
82
+ for step in self.steps:
83
+ if step.step_id == step_id:
84
+ return step
85
+ return None
@@ -0,0 +1,19 @@
1
+ """
2
+ Guards for the Dual-State Framework.
3
+
4
+ Guards are deterministic validators that return ⊤ (pass) or ⊥ (fail with feedback).
5
+ They can be composed using CompositeGuard for layered validation.
6
+ """
7
+
8
+ from atomicguard.guards.base import CompositeGuard
9
+ from atomicguard.guards.human import HumanReviewGuard
10
+ from atomicguard.guards.syntax import SyntaxGuard
11
+ from atomicguard.guards.test_runner import DynamicTestGuard, TestGuard
12
+
13
+ __all__ = [
14
+ "CompositeGuard",
15
+ "SyntaxGuard",
16
+ "TestGuard",
17
+ "DynamicTestGuard",
18
+ "HumanReviewGuard",
19
+ ]
@@ -0,0 +1,41 @@
1
+ """
2
+ Base guard implementations and composition patterns.
3
+
4
+ CompositeGuard implements the Decorator pattern for guard composition.
5
+ """
6
+
7
+ from typing import Any
8
+
9
+ from atomicguard.domain.interfaces import GuardInterface
10
+ from atomicguard.domain.models import Artifact, GuardResult
11
+
12
+
13
+ class CompositeGuard(GuardInterface):
14
+ """
15
+ Logical AND of multiple guards. All must pass.
16
+
17
+ Evaluates guards in order, short-circuits on first failure.
18
+ This ensures automated checks run before human review.
19
+
20
+ Per paper section on Composite Guards:
21
+ G_composite = G_automated ∧ G_human
22
+ """
23
+
24
+ def __init__(self, *guards: GuardInterface):
25
+ """
26
+ Args:
27
+ *guards: Guards to compose (evaluated in order)
28
+ """
29
+ self.guards = guards
30
+
31
+ def validate(self, artifact: Artifact, **deps: Any) -> GuardResult:
32
+ """
33
+ Validate artifact against all composed guards.
34
+
35
+ Short-circuits on first failure.
36
+ """
37
+ for guard in self.guards:
38
+ result = guard.validate(artifact, **deps)
39
+ if not result.passed:
40
+ return result # Short-circuit on failure
41
+ return GuardResult(passed=True, feedback="All guards passed")
@@ -0,0 +1,85 @@
1
+ """
2
+ Human-in-the-loop review guard.
3
+
4
+ Blocks workflow until human approval via CLI prompts.
5
+ """
6
+
7
+ from typing import Any
8
+
9
+ from rich.console import Console
10
+ from rich.prompt import Prompt
11
+ from rich.syntax import Syntax
12
+
13
+ from atomicguard.domain.interfaces import GuardInterface
14
+ from atomicguard.domain.models import Artifact, GuardResult
15
+
16
+
17
+ class HumanReviewGuard(GuardInterface):
18
+ """
19
+ Blocks workflow until human approval.
20
+
21
+ Per paper Phase 8 (Human Oversight):
22
+ - Pauses workflow to poll external oracle (human)
23
+ - Returns approval or rejection with feedback
24
+ - Feedback flows back to generator for retry
25
+
26
+ This implementation uses synchronous CLI prompts.
27
+ For async/distributed use, extend with file-based or webhook polling.
28
+ """
29
+
30
+ def __init__(self, prompt_title: str = "HUMAN REVIEW REQUIRED"):
31
+ """
32
+ Args:
33
+ prompt_title: Title displayed in the review prompt
34
+ """
35
+ self.prompt_title = prompt_title
36
+ self.console = Console()
37
+
38
+ def validate(self, artifact: Artifact, **deps: Any) -> GuardResult:
39
+ """
40
+ Display artifact and prompt for human approval.
41
+
42
+ Args:
43
+ artifact: The artifact to review
44
+ **deps: Dependencies shown for context
45
+
46
+ Returns:
47
+ GuardResult based on human decision
48
+ """
49
+ self.console.print(f"\n[bold yellow]═══ {self.prompt_title} ═══[/bold yellow]")
50
+ self.console.print(f"[dim]Artifact ID: {artifact.artifact_id}[/dim]")
51
+ self.console.print(f"[dim]Action Pair: {artifact.action_pair_id}[/dim]\n")
52
+
53
+ # Display the artifact content with syntax highlighting
54
+ self.console.print(
55
+ Syntax(artifact.content, "python", theme="monokai", line_numbers=True)
56
+ )
57
+
58
+ # Show dependencies if present
59
+ if deps:
60
+ self.console.print("\n[dim]Dependencies:[/dim]")
61
+ for key, dep_artifact in deps.items():
62
+ self.console.print(f" [dim]{key}: {dep_artifact.artifact_id}[/dim]")
63
+
64
+ # Prompt for decision
65
+ decision = Prompt.ask(
66
+ "\n[bold]Approve this artifact?[/bold]", choices=["y", "n", "v"]
67
+ )
68
+
69
+ if decision == "v":
70
+ # View more context
71
+ self.console.print("\n[dim]Context:[/dim]")
72
+ self.console.print(
73
+ f" Specification: {artifact.context.specification[:200]}..."
74
+ )
75
+ if artifact.context.feedback_history:
76
+ self.console.print(
77
+ f" Previous failures: {len(artifact.context.feedback_history)}"
78
+ )
79
+ decision = Prompt.ask("\n[bold]Approve?[/bold]", choices=["y", "n"])
80
+
81
+ if decision == "y":
82
+ return GuardResult(passed=True, feedback="Human approved")
83
+ else:
84
+ feedback = Prompt.ask("[bold]Rejection reason[/bold]")
85
+ return GuardResult(passed=False, feedback=f"Human rejected: {feedback}")
@@ -0,0 +1,33 @@
1
+ """
2
+ Syntax validation guard.
3
+
4
+ Pure guard with no I/O dependencies - validates Python AST.
5
+ """
6
+
7
+ import ast
8
+ from typing import Any
9
+
10
+ from atomicguard.domain.interfaces import GuardInterface
11
+ from atomicguard.domain.models import Artifact, GuardResult
12
+
13
+
14
+ class SyntaxGuard(GuardInterface):
15
+ """
16
+ Validates Python syntax using AST parsing.
17
+
18
+ This is a pure guard with no I/O - it only validates
19
+ that the artifact content is syntactically valid Python.
20
+ """
21
+
22
+ def validate(self, artifact: Artifact, **_deps: Any) -> GuardResult:
23
+ """
24
+ Parse artifact content as Python AST.
25
+
26
+ Returns:
27
+ GuardResult with passed=True if syntax is valid
28
+ """
29
+ try:
30
+ ast.parse(artifact.content)
31
+ return GuardResult(passed=True, feedback="Syntax valid")
32
+ except SyntaxError as e:
33
+ return GuardResult(passed=False, feedback=f"Syntax error: {e}")
@@ -0,0 +1,176 @@
1
+ """
2
+ Test execution guards.
3
+
4
+ Guards that validate artifacts by running tests against them.
5
+ """
6
+
7
+ import multiprocessing
8
+ import sys
9
+ import types
10
+ from typing import Any
11
+
12
+ from atomicguard.domain.interfaces import GuardInterface
13
+ from atomicguard.domain.models import Artifact, GuardResult
14
+
15
+
16
+ class TestGuard(GuardInterface):
17
+ """
18
+ Validates artifact via test execution in the same process.
19
+
20
+ Simple guard that executes test code against artifact content.
21
+ For isolation, use DynamicTestGuard instead.
22
+ """
23
+
24
+ def __init__(self, test_code: str | None = None):
25
+ """
26
+ Args:
27
+ test_code: Static test code to run (if not using dependencies)
28
+ """
29
+ self._static_test_code = test_code
30
+
31
+ def validate(self, artifact: Artifact, **deps: Any) -> GuardResult:
32
+ """
33
+ Execute test code against artifact.
34
+
35
+ Args:
36
+ artifact: The implementation artifact to test
37
+ **deps: May include 'test' artifact with test code
38
+
39
+ Returns:
40
+ GuardResult with test outcome
41
+ """
42
+ test_artifact = deps.get("test")
43
+ test_code = test_artifact.content if test_artifact else self._static_test_code
44
+
45
+ if not test_code:
46
+ return GuardResult(passed=False, feedback="No test code provided")
47
+
48
+ namespace: dict[str, Any] = {}
49
+ try:
50
+ exec(artifact.content, namespace)
51
+ exec(test_code, namespace)
52
+ return GuardResult(passed=True)
53
+ except AssertionError as e:
54
+ return GuardResult(passed=False, feedback=f"Test failed: {e}")
55
+ except Exception as e:
56
+ return GuardResult(passed=False, feedback=f"{type(e).__name__}: {e}")
57
+
58
+
59
+ class DynamicTestGuard(GuardInterface):
60
+ """
61
+ Runs test code against implementation in isolated subprocess.
62
+
63
+ Expects 'test' dependency containing the test artifact.
64
+ Executes tests and returns pass/fail with detailed feedback.
65
+
66
+ Uses multiprocessing for isolation to prevent test code from
67
+ affecting the parent process.
68
+ """
69
+
70
+ def __init__(self, timeout: float = 60.0):
71
+ """
72
+ Args:
73
+ timeout: Maximum time in seconds to wait for test execution
74
+ """
75
+ self.timeout = timeout
76
+
77
+ def validate(self, artifact: Artifact, **deps: Any) -> GuardResult:
78
+ """
79
+ Run tests in isolated subprocess.
80
+
81
+ Args:
82
+ artifact: The implementation artifact to test
83
+ **deps: Must include 'test' artifact with test code
84
+
85
+ Returns:
86
+ GuardResult with test outcome
87
+ """
88
+ test_artifact = deps.get("test")
89
+ if not test_artifact:
90
+ return GuardResult(
91
+ passed=False, feedback="No test artifact in dependencies"
92
+ )
93
+
94
+ q: multiprocessing.Queue = multiprocessing.Queue()
95
+ p = multiprocessing.Process(
96
+ target=self._run_tests, args=(artifact, test_artifact, q)
97
+ )
98
+ p.start()
99
+ p.join(self.timeout)
100
+
101
+ if p.is_alive():
102
+ p.terminate()
103
+ p.join()
104
+ return GuardResult(
105
+ passed=False,
106
+ feedback=f"Timeout: Test execution exceeded {self.timeout}s",
107
+ )
108
+
109
+ if not q.empty():
110
+ passed, msg = q.get()
111
+ return GuardResult(passed=passed, feedback=msg)
112
+ return GuardResult(passed=False, feedback="Test execution crashed")
113
+
114
+ def _run_tests(
115
+ self, impl_artifact: Artifact, test_artifact: Artifact, q: Any
116
+ ) -> None:
117
+ """
118
+ Execute tests in subprocess.
119
+
120
+ This method runs in a forked process for isolation.
121
+ """
122
+ try:
123
+ impl_code = impl_artifact.content
124
+ test_code = test_artifact.content
125
+
126
+ if not impl_code:
127
+ q.put((False, "No implementation code"))
128
+ return
129
+
130
+ if not test_code:
131
+ q.put((False, "No test code"))
132
+ return
133
+
134
+ # Create mock 'implementation' module
135
+ impl_module = types.ModuleType("implementation")
136
+ exec(impl_code, impl_module.__dict__)
137
+ sys.modules["implementation"] = impl_module
138
+
139
+ # Execute test code (pytest already in sys.modules from parent)
140
+ import pytest
141
+
142
+ test_scope = {"__builtins__": __builtins__, "pytest": pytest}
143
+ exec(test_code, test_scope)
144
+
145
+ # Find and run test functions
146
+ test_funcs = [
147
+ v
148
+ for k, v in test_scope.items()
149
+ if k.startswith("test_") and callable(v)
150
+ ]
151
+
152
+ if not test_funcs:
153
+ q.put((False, "No test functions found"))
154
+ return
155
+
156
+ failures = []
157
+ for func in test_funcs:
158
+ try:
159
+ func()
160
+ except AssertionError as e:
161
+ failures.append(f"{func.__name__}: AssertionError - {e}")
162
+ except Exception as e:
163
+ failures.append(f"{func.__name__}: {type(e).__name__} - {e}")
164
+
165
+ if failures:
166
+ q.put((False, "Test failures:\n" + "\n".join(failures)))
167
+ else:
168
+ q.put((True, f"All {len(test_funcs)} tests passed"))
169
+
170
+ except SyntaxError as e:
171
+ q.put((False, f"Syntax error: {e}"))
172
+ except Exception as e:
173
+ q.put((False, f"Execution error: {e}"))
174
+ finally:
175
+ if "implementation" in sys.modules:
176
+ del sys.modules["implementation"]
@@ -0,0 +1,23 @@
1
+ """
2
+ Infrastructure layer for the Dual-State Framework.
3
+
4
+ Contains adapters for external concerns (persistence, LLMs, etc.).
5
+ """
6
+
7
+ from atomicguard.infrastructure.llm import (
8
+ MockGenerator,
9
+ OllamaGenerator,
10
+ )
11
+ from atomicguard.infrastructure.persistence import (
12
+ FilesystemArtifactDAG,
13
+ InMemoryArtifactDAG,
14
+ )
15
+
16
+ __all__ = [
17
+ # Persistence
18
+ "InMemoryArtifactDAG",
19
+ "FilesystemArtifactDAG",
20
+ # LLM
21
+ "OllamaGenerator",
22
+ "MockGenerator",
23
+ ]
@@ -0,0 +1,11 @@
1
+ """
2
+ LLM adapters for artifact generation.
3
+ """
4
+
5
+ from atomicguard.infrastructure.llm.mock import MockGenerator
6
+ from atomicguard.infrastructure.llm.ollama import OllamaGenerator
7
+
8
+ __all__ = [
9
+ "OllamaGenerator",
10
+ "MockGenerator",
11
+ ]
@@ -0,0 +1,61 @@
1
+ """
2
+ Mock generator for testing without LLM.
3
+
4
+ Returns predefined responses in sequence.
5
+ """
6
+
7
+ import uuid
8
+ from datetime import datetime
9
+
10
+ from atomicguard.domain.interfaces import GeneratorInterface
11
+ from atomicguard.domain.models import Artifact, ArtifactStatus, Context, ContextSnapshot
12
+ from atomicguard.domain.prompts import PromptTemplate
13
+
14
+
15
+ class MockGenerator(GeneratorInterface):
16
+ """Returns predefined responses for testing."""
17
+
18
+ def __init__(self, responses: list[str]):
19
+ """
20
+ Args:
21
+ responses: List of response strings to return in sequence
22
+ """
23
+ self._responses = responses
24
+ self._call_count = 0
25
+
26
+ def generate(
27
+ self, _context: Context, _template: PromptTemplate | None = None
28
+ ) -> Artifact:
29
+ """Return the next predefined response."""
30
+ if self._call_count >= len(self._responses):
31
+ raise RuntimeError("MockGenerator exhausted responses")
32
+
33
+ content = self._responses[self._call_count]
34
+ self._call_count += 1
35
+
36
+ return Artifact(
37
+ artifact_id=str(uuid.uuid4()),
38
+ content=content,
39
+ previous_attempt_id=None,
40
+ action_pair_id="mock",
41
+ created_at=datetime.now().isoformat(),
42
+ attempt_number=self._call_count,
43
+ status=ArtifactStatus.PENDING,
44
+ guard_result=None,
45
+ feedback="",
46
+ context=ContextSnapshot(
47
+ specification="",
48
+ constraints="",
49
+ feedback_history=(),
50
+ dependency_ids=(),
51
+ ),
52
+ )
53
+
54
+ @property
55
+ def call_count(self) -> int:
56
+ """Number of times generate() has been called."""
57
+ return self._call_count
58
+
59
+ def reset(self) -> None:
60
+ """Reset the call counter to reuse responses."""
61
+ self._call_count = 0