PyPI - aitdd - Versions diffs - 0.1.0__py3-none-any.whl - Mend

aitdd 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

aitdd/__init__.py +5 -0
aitdd/agents.py +298 -0
aitdd/cli.py +73 -0
aitdd/hook_policy.py +85 -0
aitdd/progress.py +165 -0
aitdd/review.py +102 -0
aitdd/runner.py +341 -0
aitdd/spec.py +116 -0
aitdd-0.1.0.dist-info/METADATA +143 -0
aitdd-0.1.0.dist-info/RECORD +12 -0
aitdd-0.1.0.dist-info/WHEEL +4 -0
aitdd-0.1.0.dist-info/entry_points.txt +2 -0

aitdd/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""AiTdd: Codex-planned, Cursor-implemented TDD loops."""
+from .runner import TddLoop, TddLoopConfig
+__all__ = ["TddLoop", "TddLoopConfig"]

aitdd/agents.py ADDED Viewed

@@ -0,0 +1,298 @@
+"""Agent adapters used by the TDD loop."""
+from __future__ import annotations
+import json
+import os
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Protocol
+CODEX_SDK_BRIDGE = r"""
+import { Codex } from "@openai/codex-sdk";
+async function readStdin() {
+  const chunks = [];
+  for await (const chunk of process.stdin) chunks.push(chunk);
+  return Buffer.concat(chunks).toString("utf8");
+}
+const input = JSON.parse(await readStdin());
+const codex = new Codex();
+const thread = codex.startThread({
+  workingDirectory: input.cwd,
+  skipGitRepoCheck: true,
+  sandboxMode: "read-only",
+  approvalPolicy: "never",
+  model: input.model || undefined,
+});
+const turnOptions = {};
+if (input.outputSchema) {
+  turnOptions.outputSchema = input.outputSchema;
+}
+const result = await thread.run(input.prompt, turnOptions);
+process.stdout.write(JSON.stringify({
+  status: "finished",
+  result: result.finalResponse,
+  usage: result.usage,
+  threadId: thread.id,
+}) + "\n");
+"""
+CURSOR_SDK_BRIDGE = r"""
+import { Agent } from "@cursor/sdk";
+async function readStdin() {
+  const chunks = [];
+  for await (const chunk of process.stdin) chunks.push(chunk);
+  return Buffer.concat(chunks).toString("utf8");
+}
+const input = JSON.parse(await readStdin());
+const options = {
+  model: { id: input.model || "composer-latest" },
+  local: {
+    cwd: input.cwd,
+    sandboxOptions: { enabled: false },
+  },
+};
+if (process.env.CURSOR_API_KEY) {
+  options.apiKey = process.env.CURSOR_API_KEY;
+}
+const result = await Agent.prompt(input.prompt, options);
+process.stdout.write(JSON.stringify({
+  status: result.status,
+  result: result.result ?? "",
+  durationMs: result.durationMs,
+}) + "\n");
+"""
+@dataclass(frozen=True)
+class AgentResult:
+    role: str
+    prompt: str
+    stdout: str
+    stderr: str
+    returncode: int
+    @property
+    def ok(self) -> bool:
+        return self.returncode == 0
+class Agent(Protocol):
+    role: str
+    def run(
+        self,
+        prompt: str,
+        cwd: Path,
+        output_schema: dict[str, object] | None = None,
+    ) -> AgentResult:
+        """Run the agent for one prompt."""
+@dataclass(frozen=True)
+class DryRunAgent:
+    role: str
+    def run(
+        self,
+        prompt: str,
+        cwd: Path,
+        output_schema: dict[str, object] | None = None,
+    ) -> AgentResult:
+        return AgentResult(
+            role=self.role,
+            prompt=prompt,
+            stdout=f"[dry-run:{self.role}] cwd={cwd}\n{prompt}\n",
+            stderr="",
+            returncode=0,
+        )
+@dataclass(frozen=True)
+class CodexSdkAgent:
+    """Codex planning/review adapter using the official @openai/codex-sdk."""
+    role: str = "codex"
+    model: str | None = None
+    timeout: int = 900
+    node_bin: str = "node"
+    def run(
+        self,
+        prompt: str,
+        cwd: Path,
+        output_schema: dict[str, object] | None = None,
+    ) -> AgentResult:
+        command = [
+            self.node_bin,
+            "--input-type=module",
+            "-e",
+            CODEX_SDK_BRIDGE,
+        ]
+        completed = subprocess.run(
+            command,
+            cwd=_node_package_root(cwd),
+            input=json.dumps(
+                {
+                    "cwd": str(cwd),
+                    "model": self.model,
+                    "prompt": prompt,
+                    "outputSchema": output_schema,
+                }
+            ),
+            text=True,
+            capture_output=True,
+            timeout=self.timeout,
+            check=False,
+        )
+        return AgentResult(
+            self.role,
+            prompt,
+            _result_text(completed.stdout),
+            completed.stderr,
+            completed.returncode,
+        )
+@dataclass(frozen=True)
+class CursorCliAgent:
+    """Cursor implementation adapter using Cursor Agent CLI with Composer."""
+    role: str = "cursor"
+    cursor_bin: str = "cursor-agent"
+    model: str | None = "composer-latest"
+    timeout: int = 1800
+    force: bool = True
+    def run(self, prompt: str, cwd: Path) -> AgentResult:
+        command = [
+            self.cursor_bin,
+            "--print",
+            "--output-format",
+            "text",
+            "--trust",
+            "--workspace",
+            str(cwd),
+        ]
+        if self.force:
+            command.append("--force")
+        if self.model:
+            command.extend(["--model", self.model])
+        command.append(prompt)
+        completed = subprocess.run(
+            command,
+            cwd=cwd,
+            text=True,
+            capture_output=True,
+            timeout=self.timeout,
+            check=False,
+            env=_cursor_sdk_env(),
+        )
+        return AgentResult(
+            self.role,
+            prompt,
+            completed.stdout,
+            completed.stderr,
+            completed.returncode,
+        )
+@dataclass(frozen=True)
+class CursorSdkAgent:
+    """Cursor implementation adapter using the official @cursor/sdk."""
+    role: str = "cursor"
+    model: str = "composer-latest"
+    timeout: int = 1800
+    node_bin: str = "node"
+    def run(self, prompt: str, cwd: Path) -> AgentResult:
+        command = [
+            self.node_bin,
+            "--input-type=module",
+            "-e",
+            CURSOR_SDK_BRIDGE,
+        ]
+        completed = subprocess.run(
+            command,
+            cwd=_node_package_root(cwd),
+            input=json.dumps({"cwd": str(cwd), "model": self.model, "prompt": prompt}),
+            text=True,
+            capture_output=True,
+            timeout=self.timeout,
+            check=False,
+            env=_cursor_sdk_env(),
+        )
+        return AgentResult(
+            self.role,
+            prompt,
+            _result_text(completed.stdout),
+            _friendly_cursor_sdk_stderr(completed.stderr),
+            completed.returncode,
+        )
+def _friendly_cursor_sdk_stderr(stderr: str) -> str:
+    if "AuthenticationError" not in stderr:
+        return stderr
+    return (
+        "Cursor SDK authentication failed. "
+        "Set CURSOR_API_KEY or make sure the official @cursor/sdk can resolve Cursor auth. "
+        "Original stderr follows:\n"
+        f"{stderr}"
+    )
+def _node_package_root(workdir: Path) -> Path:
+    if (workdir / "node_modules" / "@cursor" / "sdk").exists():
+        return workdir
+    return Path(__file__).resolve().parents[2]
+def _cursor_sdk_env() -> dict[str, str]:
+    env = os.environ.copy()
+    if env.get("CURSOR_API_KEY"):
+        return env
+    token = _read_macos_keychain_secret("aitdd.cursor_api_key")
+    if token:
+        env["CURSOR_API_KEY"] = token
+    return env
+def _read_macos_keychain_secret(service: str) -> str | None:
+    completed = subprocess.run(
+        ["security", "find-generic-password", "-w", "-s", service],
+        text=True,
+        capture_output=True,
+        check=False,
+    )
+    if completed.returncode != 0:
+        return None
+    token = completed.stdout.strip()
+    return token or None
+def _result_text(stdout: str) -> str:
+    value = parse_json_object(stdout)
+    result = value.get("result")
+    return result if isinstance(result, str) else stdout
+def parse_json_object(text: str) -> dict[str, object]:
+    """Parse the first JSON object from an agent response."""
+    start = text.find("{")
+    end = text.rfind("}")
+    if start == -1 or end == -1 or end < start:
+        return {}
+    try:
+        value = json.loads(text[start : end + 1])
+    except json.JSONDecodeError:
+        return {}
+    return value if isinstance(value, dict) else {}

aitdd/cli.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""Command line entrypoint."""
+from __future__ import annotations
+import argparse
+from pathlib import Path
+from .runner import TddLoop, TddLoopConfig
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(prog="aitdd")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    run = subparsers.add_parser("run", help="run the Codex/Cursor TDD loop")
+    _add_run_arguments(run)
+    resume = subparsers.add_parser("resume", help="resume from .aitdd/progress.json")
+    _add_run_arguments(resume)
+    resume.set_defaults(resume=True)
+    return parser
+def _add_run_arguments(run: argparse.ArgumentParser) -> None:
+    run.add_argument("goal", nargs="?", default="")
+    run.add_argument("--workdir", default=".")
+    run.add_argument("--test-command", default="pytest")
+    run.add_argument("--max-cycles", type=int, default=5)
+    run.add_argument("--spec", type=Path, help="path to aitdd.yaml")
+    run.add_argument("--codex-model")
+    run.add_argument("--cursor-model", default="composer-latest")
+    run.add_argument(
+        "--cursor-backend",
+        choices=["cli", "sdk"],
+        default="sdk",
+        help="use cursor-agent CLI or the official @cursor/sdk bridge",
+    )
+    run.add_argument("--dry-run", action="store_true")
+def main(argv: list[str] | None = None) -> int:
+    args = build_parser().parse_args(argv)
+    if args.command in {"run", "resume"}:
+        if not args.goal and not args.spec:
+            raise SystemExit("goal is required unless --spec is provided")
+        config = TddLoopConfig(
+            goal=args.goal,
+            workdir=Path(args.workdir).resolve(),
+            test_command=args.test_command,
+            max_cycles=args.max_cycles,
+            spec_path=args.spec.resolve() if args.spec else None,
+            codex_model=args.codex_model,
+            cursor_model=args.cursor_model,
+            cursor_backend=args.cursor_backend,
+            resume=getattr(args, "resume", False),
+            dry_run=args.dry_run,
+        )
+        results = TddLoop(config).run()
+        for result in results:
+            print(
+                f"cycle={result.index} red={result.red.returncode} "
+                f"green={result.green.returncode} refactor={result.refactor.returncode} "
+                f"complete={result.complete} "
+                f"one_behavior_only={result.review_gate.one_behavior_only} "
+                f"minimal_green={result.review_gate.minimal_green} "
+                f"boundary_ok={result.review_gate.acceptance_unit_boundary_ok}"
+            )
+        return 0
+    return 2
+if __name__ == "__main__":
+    raise SystemExit(main())

aitdd/hook_policy.py ADDED Viewed

@@ -0,0 +1,85 @@
+"""TDD phase policy shared by the loop and Codex hooks."""
+from __future__ import annotations
+from dataclasses import dataclass
+from enum import Enum
+from codex_hookkit import Decision, allow, deny
+class TddPhase(str, Enum):
+    RED = "red"
+    GREEN = "green"
+    REFACTOR = "refactor"
+@dataclass(frozen=True)
+class PhaseTestResult:
+    command: str
+    returncode: int
+    stdout: str = ""
+    stderr: str = ""
+    @property
+    def passed(self) -> bool:
+        return self.returncode == 0
+    @property
+    def failed(self) -> bool:
+        return not self.passed
+@dataclass(frozen=True)
+class ExpectedRed:
+    exit_code: str = "nonzero"
+    must_include: list[str] | None = None
+    must_not_include: list[str] | None = None
+def evaluate_phase(
+    phase: TddPhase,
+    test_run: PhaseTestResult,
+    expected_red_failure: list[str] | ExpectedRed | None = None,
+) -> Decision:
+    if phase is TddPhase.RED:
+        if test_run.passed:
+            return deny.decision(
+                "RED rejected: tests passed. Add the smallest meaningful failing test first."
+            )
+        expected = _normalize_expected_red(expected_red_failure)
+        if not expected:
+            return allow.decision("RED accepted: tests fail as expected.")
+        combined_output = f"{test_run.stdout}\n{test_run.stderr}"
+        forbidden = expected.must_not_include or []
+        if any(fragment in combined_output for fragment in forbidden):
+            return deny.decision(
+                "RED rejected: tests failed with a forbidden reason. "
+                f"Forbidden: {', '.join(forbidden)}"
+            )
+        required = expected.must_include or []
+        if not required or any(fragment in combined_output for fragment in required):
+            return allow.decision("RED accepted: tests fail for the expected reason.")
+        return deny.decision(
+            "RED rejected: tests failed, but not for the expected reason. "
+            f"Expected one of: {', '.join(required)}"
+        )
+    if test_run.passed:
+        return allow.decision(f"{phase.value.upper()} accepted: tests pass.")
+    return deny.decision(
+        f"{phase.value.upper()} rejected: tests are failing. "
+        "Restore a passing suite before moving on."
+    )
+def _normalize_expected_red(value: list[str] | ExpectedRed | None) -> ExpectedRed | None:
+    if value is None:
+        return None
+    if isinstance(value, ExpectedRed):
+        return value
+    return ExpectedRed(must_include=value)

aitdd/progress.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""Minimal persistent progress and report files."""
+from __future__ import annotations
+import json
+import subprocess
+from dataclasses import asdict, dataclass, field
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+from .hook_policy import PhaseTestResult
+from .review import ReviewGate
+@dataclass
+class CycleProgress:
+    index: int
+    behavior: str
+    status: str = "started"
+    started_at: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
+    finished_at: str | None = None
+    red: dict[str, Any] | None = None
+    green: dict[str, Any] | None = None
+    refactor: dict[str, Any] | None = None
+    review_gate: dict[str, Any] | None = None
+    issues: list[str] = field(default_factory=list)
+class ProgressStore:
+    def __init__(self, workdir: Path) -> None:
+        self.workdir = workdir
+        self.root = workdir / ".aitdd"
+        self.cycles_dir = self.root / "cycles"
+        self.progress_path = self.root / "progress.json"
+        self.report_path = self.root / "report.md"
+        self.root.mkdir(exist_ok=True)
+        self.cycles_dir.mkdir(exist_ok=True)
+        self.data = self._load()
+    def start_cycle(self, index: int, behavior: str, plan: str) -> CycleProgress:
+        cycle = CycleProgress(index=index, behavior=behavior)
+        self._upsert(cycle)
+        self.append_report(f"## Cycle {index}: {behavior}\n\n### Plan\n\n{plan}\n")
+        return cycle
+    def record_phase(self, cycle: CycleProgress, phase: str, result: PhaseTestResult) -> None:
+        setattr(cycle, phase, _phase_dict(result))
+        self.snapshot_diff(cycle.index, phase)
+        self._upsert(cycle)
+        self.append_report(
+            f"\n### {phase.upper()}\n\n"
+            f"- command: `{result.command}`\n"
+            f"- returncode: `{result.returncode}`\n"
+        )
+    def record_review(self, cycle: CycleProgress, review_gate: ReviewGate) -> None:
+        cycle.review_gate = asdict(review_gate)
+        cycle.issues = review_gate.issues
+        self._upsert(cycle)
+        self.append_report(
+            "\n### Review Gate\n\n"
+            f"- one_behavior_only: `{review_gate.one_behavior_only}`\n"
+            f"- minimal_green: `{review_gate.minimal_green}`\n"
+            f"- tests_unchanged_in_refactor: `{review_gate.tests_unchanged_in_refactor}`\n"
+            f"- acceptance_unit_boundary_ok: `{review_gate.acceptance_unit_boundary_ok}`\n"
+            f"- forbidden_respected: `{review_gate.forbidden_respected}`\n"
+            f"- issues: `{', '.join(review_gate.issues) if review_gate.issues else 'none'}`\n"
+        )
+    def finish_cycle(self, cycle: CycleProgress, status: str) -> None:
+        cycle.status = status
+        cycle.finished_at = datetime.now(UTC).isoformat()
+        self._upsert(cycle)
+        self.append_report(f"\n### Result\n\n`{status}`\n")
+    def fail_cycle(self, cycle: CycleProgress | None, error: BaseException) -> None:
+        if cycle is None:
+            self.data["last_error"] = str(error)
+            self._write()
+            self.append_report(f"\n## Failure\n\n{error}\n")
+            return
+        cycle.status = "failed"
+        cycle.finished_at = datetime.now(UTC).isoformat()
+        cycle.issues.append(str(error))
+        self._upsert(cycle)
+        self.append_report(f"\n### Failure\n\n{error}\n")
+    def next_cycle_index(self) -> int:
+        cycles = self.data.get("cycles", [])
+        for item in cycles:
+            if item.get("status") != "completed":
+                return int(item.get("index", 1))
+        return len(cycles) + 1
+    def snapshot_diff(self, index: int, phase: str) -> None:
+        path = self.cycles_dir / f"{index:03d}-{phase}.diff"
+        diff = _git_diff(self.workdir)
+        path.write_text(diff or "# No git diff available.\n")
+    def append_report(self, text: str) -> None:
+        if not self.report_path.exists():
+            self.report_path.write_text("# AiTdd Report\n")
+        with self.report_path.open("a") as stream:
+            stream.write(text)
+            if not text.endswith("\n"):
+                stream.write("\n")
+    def _load(self) -> dict[str, Any]:
+        if not self.progress_path.exists():
+            return {"cycles": []}
+        return json.loads(self.progress_path.read_text())
+    def _upsert(self, cycle: CycleProgress) -> None:
+        cycles = [item for item in self.data.get("cycles", []) if item.get("index") != cycle.index]
+        cycles.append(asdict(cycle))
+        cycles.sort(key=lambda item: item["index"])
+        self.data["cycles"] = cycles
+        self._write()
+    def _write(self) -> None:
+        self.progress_path.write_text(json.dumps(self.data, ensure_ascii=False, indent=2) + "\n")
+def _phase_dict(result: PhaseTestResult) -> dict[str, Any]:
+    return {
+        "command": result.command,
+        "returncode": result.returncode,
+        "stdout_tail": result.stdout[-2000:],
+        "stderr_tail": result.stderr[-2000:],
+    }
+def _git_diff(workdir: Path) -> str:
+    in_git = subprocess.run(
+        ["git", "-C", str(workdir), "rev-parse", "--is-inside-work-tree"],
+        text=True,
+        capture_output=True,
+        check=False,
+    )
+    if in_git.returncode != 0:
+        return ""
+    tracked = subprocess.run(
+        ["git", "-C", str(workdir), "diff", "--binary", "--no-ext-diff"],
+        text=True,
+        capture_output=True,
+        check=False,
+    )
+    diff = tracked.stdout
+    untracked = subprocess.run(
+        ["git", "-C", str(workdir), "ls-files", "--others", "--exclude-standard"],
+        text=True,
+        capture_output=True,
+        check=False,
+    )
+    for relative in untracked.stdout.splitlines():
+        file_diff = subprocess.run(
+            ["git", "-C", str(workdir), "diff", "--no-index", "--", "/dev/null", relative],
+            text=True,
+            capture_output=True,
+            check=False,
+        )
+        diff += file_diff.stdout
+    return diff

aitdd/review.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""Structured Codex review gates."""
+from __future__ import annotations
+import json
+from dataclasses import dataclass, field
+from typing import Any
+REVIEW_SCHEMA: dict[str, object] = {
+    "type": "object",
+    "properties": {
+        "complete": {"type": "boolean"},
+        "reason": {"type": "string"},
+        "one_behavior_only": {"type": "boolean"},
+        "minimal_green": {"type": "boolean"},
+        "tests_unchanged_in_refactor": {"type": "boolean"},
+        "acceptance_unit_boundary_ok": {"type": "boolean"},
+        "forbidden_respected": {"type": "boolean"},
+        "issues": {"type": "array", "items": {"type": "string"}},
+    },
+    "required": [
+        "complete",
+        "reason",
+        "one_behavior_only",
+        "minimal_green",
+        "tests_unchanged_in_refactor",
+        "acceptance_unit_boundary_ok",
+        "forbidden_respected",
+        "issues",
+    ],
+    "additionalProperties": False,
+}
+PASSING_REVIEW_JSON = (
+    '{"complete": true, "reason": "dry run", "one_behavior_only": true, '
+    '"minimal_green": true, "tests_unchanged_in_refactor": true, '
+    '"acceptance_unit_boundary_ok": true, "forbidden_respected": true, "issues": []}'
+)
+@dataclass(frozen=True)
+class ReviewGate:
+    complete: bool
+    reason: str
+    one_behavior_only: bool
+    minimal_green: bool
+    tests_unchanged_in_refactor: bool
+    acceptance_unit_boundary_ok: bool
+    forbidden_respected: bool
+    issues: list[str] = field(default_factory=list)
+    @classmethod
+    def from_text(cls, text: str) -> ReviewGate:
+        value = _parse_json_object(text)
+        return cls(
+            complete=bool(value.get("complete")),
+            reason=str(value.get("reason") or ""),
+            one_behavior_only=bool(value.get("one_behavior_only")),
+            minimal_green=bool(value.get("minimal_green")),
+            tests_unchanged_in_refactor=bool(value.get("tests_unchanged_in_refactor")),
+            acceptance_unit_boundary_ok=bool(value.get("acceptance_unit_boundary_ok")),
+            forbidden_respected=bool(value.get("forbidden_respected")),
+            issues=[str(item) for item in value.get("issues", [])],
+        )
+    @property
+    def passed(self) -> bool:
+        return all(
+            [
+                self.one_behavior_only,
+                self.minimal_green,
+                self.tests_unchanged_in_refactor,
+                self.acceptance_unit_boundary_ok,
+                self.forbidden_respected,
+            ]
+        )
+    def failure_message(self) -> str:
+        failed = []
+        if not self.one_behavior_only:
+            failed.append("one_behavior_only=false")
+        if not self.minimal_green:
+            failed.append("minimal_green=false")
+        if not self.tests_unchanged_in_refactor:
+            failed.append("tests_unchanged_in_refactor=false")
+        if not self.acceptance_unit_boundary_ok:
+            failed.append("acceptance_unit_boundary_ok=false")
+        if not self.forbidden_respected:
+            failed.append("forbidden_respected=false")
+        details = "; ".join(self.issues) if self.issues else self.reason
+        return f"Codex review gate failed: {', '.join(failed)}. {details}"
+def _parse_json_object(text: str) -> dict[str, Any]:
+    start = text.find("{")
+    end = text.rfind("}")
+    if start == -1 or end == -1 or end < start:
+        raise ValueError("Codex review did not return a JSON object")
+    value = json.loads(text[start : end + 1])
+    if not isinstance(value, dict):
+        raise ValueError("Codex review JSON must be an object")
+    return value

aitdd/runner.py ADDED Viewed

@@ -0,0 +1,341 @@
+"""RED-GREEN-REFACTOR orchestration."""
+from __future__ import annotations
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from .agents import (
+    Agent,
+    AgentResult,
+    CodexSdkAgent,
+    CursorCliAgent,
+    CursorSdkAgent,
+    DryRunAgent,
+)
+from .hook_policy import PhaseTestResult, TddPhase, evaluate_phase
+from .progress import CycleProgress, ProgressStore
+from .review import PASSING_REVIEW_JSON, REVIEW_SCHEMA, ReviewGate
+from .spec import AitddSpec, CycleSpec
+@dataclass(frozen=True)
+class TddLoopConfig:
+    goal: str
+    workdir: Path
+    test_command: str = "pytest"
+    max_cycles: int = 5
+    codex_model: str | None = None
+    cursor_model: str | None = None
+    cursor_backend: str = "sdk"
+    spec_path: Path | None = None
+    resume: bool = False
+    dry_run: bool = False
+@dataclass(frozen=True)
+class CycleResult:
+    index: int
+    red: PhaseTestResult
+    green: PhaseTestResult
+    refactor: PhaseTestResult
+    complete: bool
+    review: str
+    review_gate: ReviewGate
+class TddLoop:
+    def __init__(
+        self,
+        config: TddLoopConfig,
+        planner: Agent | None = None,
+        implementer: Agent | None = None,
+    ) -> None:
+        self.config = config
+        self.spec = AitddSpec.from_file(config.spec_path) if config.spec_path else None
+        self.progress = ProgressStore(config.workdir)
+        self.planner = planner or (
+            DryRunAgent("codex-planner")
+            if config.dry_run
+            else CodexSdkAgent(model=config.codex_model)
+        )
+        self.implementer = implementer or (
+            DryRunAgent("cursor-implementer")
+            if config.dry_run
+            else self._create_cursor_agent()
+        )
+    def _create_cursor_agent(self) -> Agent:
+        model = self.config.cursor_model or "composer-latest"
+        if self.config.cursor_backend == "cli":
+            return CursorCliAgent(model=model)
+        if self.config.cursor_backend == "sdk":
+            return CursorSdkAgent(model=model)
+        raise ValueError(f"Unsupported cursor backend: {self.config.cursor_backend}")
+    def run(self) -> list[CycleResult]:
+        results: list[CycleResult] = []
+        start_index = self.progress.next_cycle_index() if self.config.resume else 1
+        current_progress: CycleProgress | None = None
+        try:
+            for index in range(start_index, self.config.max_cycles + 1):
+                cycle = self._cycle_for(index)
+                plan = self._plan(index, cycle)
+                current_progress = self.progress.start_cycle(
+                    index,
+                    self._behavior(index, cycle),
+                    plan,
+                )
+                self._implement(TddPhase.RED, plan)
+                red = self._run_tests(TddPhase.RED, cycle)
+                self.progress.record_phase(current_progress, "red", red)
+                self._require(TddPhase.RED, red, cycle)
+                self._implement(TddPhase.GREEN, plan)
+                green = self._run_tests(TddPhase.GREEN)
+                self.progress.record_phase(current_progress, "green", green)
+                self._require(TddPhase.GREEN, green)
+                review = self._review(index, plan, green)
+                review_gate = ReviewGate.from_text(review)
+                self.progress.record_review(current_progress, review_gate)
+                self._require_review_gate(review_gate)
+                complete = self._is_complete(review_gate, index)
+                if not complete:
+                    tests_before = self._snapshot_test_files()
+                    self._implement(TddPhase.REFACTOR, review)
+                    self._require_refactor_kept_tests(tests_before)
+                refactor = self._run_tests(TddPhase.REFACTOR)
+                self.progress.record_phase(current_progress, "refactor", refactor)
+                self._require(TddPhase.REFACTOR, refactor)
+                if complete:
+                    self._require_done_when()
+                status = "completed" if complete else "completed"
+                self.progress.finish_cycle(current_progress, status)
+                results.append(
+                    CycleResult(index, red, green, refactor, complete, review, review_gate)
+                )
+                if complete:
+                    break
+        except Exception as exc:
+            self.progress.fail_cycle(current_progress, exc)
+            raise
+        return results
+    def _cycle_for(self, index: int) -> CycleSpec | None:
+        if not self.spec or index > len(self.spec.cycles):
+            return None
+        return self.spec.cycles[index - 1]
+    def _behavior(self, index: int, cycle: CycleSpec | None) -> str:
+        return cycle.behavior if cycle else f"cycle {index}"
+    def _plan(self, index: int, cycle: CycleSpec | None) -> str:
+        spec_text = self.spec.describe() if self.spec else f"Goal:\n{self.config.goal}"
+        cycle_text = (
+            self._cycle_text(cycle)
+            if cycle
+            else "Codex が次の最小 public behavior を 1 つだけ選んでください。"
+        )
+        prompt = f"""
+あなたは t-wada さんの TDD の進め方を尊重する計画担当です。
+作業ディレクトリを読み、次の最小の RED を 1 つだけ計画してください。
+実装やファイル編集は絶対にしないでください。
+1 サイクルで追加してよい public behavior は 1 つだけです。
+acceptance test と unit test の境界を守ってください。
+仕様:
+{spec_text}
+サイクル: {index}
+今回の対象:
+{cycle_text}
+出力は次を含めてください:
+- 次に追加する最小テスト
+- 期待する失敗理由
+- GREEN で許される最小実装
+- リファクタリング観点
+""".strip()
+        return self._run_agent(self.planner, prompt).stdout
+    def _cycle_text(self, cycle: CycleSpec) -> str:
+        lines = [f"Behavior: {cycle.behavior}"]
+        if cycle.expected_red_failure:
+            lines.append("Expected RED failure:")
+            lines.append(f"- exit_code: {cycle.expected_red_failure.exit_code}")
+            for item in cycle.expected_red_failure.must_include or []:
+                lines.append(f"- must_include: {item}")
+            for item in cycle.expected_red_failure.must_not_include or []:
+                lines.append(f"- must_not_include: {item}")
+        if cycle.notes:
+            lines.append("Notes:")
+            lines.extend(f"- {item}" for item in cycle.notes)
+        return "\n".join(lines)
+    def _implement(self, phase: TddPhase, context: str) -> None:
+        prompts = {
+            TddPhase.RED: (
+                "失敗する最小テストだけを書いてください。"
+                "プロダクトコードは原則変更しないでください。"
+            ),
+            TddPhase.GREEN: "今ある失敗を通すための最小実装だけを書いてください。",
+            TddPhase.REFACTOR: (
+                "テストを通したまま設計を少しだけ良くしてください。"
+                "振る舞いは変えないでください。テストファイルは変更しないでください。"
+            ),
+        }
+        prompt = f"""
+あなたは Cursor 実装担当です。t-wada 流の RED-GREEN-REFACTOR を厳守します。
+現在フェーズ: {phase.value.upper()}
+指示:
+{prompts[phase]}
+- 1 サイクルで追加してよい public behavior は 1 つだけです。
+- まだテストされていない先回り実装は禁止です。
+- 受け入れテストとユニットテストの責務を混ぜないでください。
+計画またはレビュー:
+{context}
+""".strip()
+        self._run_agent(self.implementer, prompt)
+    def _review(self, index: int, plan: str, test_run: PhaseTestResult) -> str:
+        if self.config.dry_run and isinstance(self.planner, DryRunAgent):
+            return PASSING_REVIEW_JSON
+        goal = self.spec.describe() if self.spec else self.config.goal
+        prompt = f"""
+あなたは Codex レビュー担当です。作業ディレクトリを読み、TDD サイクルの品質をレビューしてください。
+実装やファイル編集は絶対にしないでください。
+次を厳しく確認してください:
+- 1 サイクルで public behavior が 1 つだけ増えているか
+- GREEN は RED を通す最小差分か
+- REFACTOR でテストが変更されていないか
+- 受け入れテストとユニットテストの境界が守られているか
+- forbidden に触れていないか
+ゴール:
+{goal}
+サイクル: {index}
+計画:
+{plan}
+テスト結果:
+command: {test_run.command}
+returncode: {test_run.returncode}
+最後は必ず JSON schema に従った JSON だけを返してください。
+""".strip()
+        if isinstance(self.planner, CodexSdkAgent):
+            return self.planner.run(prompt, self.config.workdir, REVIEW_SCHEMA).stdout
+        return self._run_agent(self.planner, prompt).stdout
+    def _is_complete(self, review_gate: ReviewGate, index: int) -> bool:
+        if self.spec and self.spec.cycles:
+            return index >= len(self.spec.cycles)
+        return review_gate.complete
+    def _require_done_when(self) -> None:
+        if not self.spec or "acceptance_tests_pass" not in self.spec.done_when:
+            return
+        command = self.spec.acceptance_test_command
+        if not command and self.spec.acceptance_tests:
+            command = "pytest " + " ".join(self.spec.acceptance_tests)
+        command = command or self.config.test_command
+        result = self._run_command(command)
+        if result.failed:
+            raise RuntimeError("done_when rejected: acceptance_tests_pass failed.")
+    def _require_review_gate(self, review_gate: ReviewGate) -> None:
+        if not review_gate.passed:
+            raise RuntimeError(review_gate.failure_message())
+    def _run_tests(
+        self,
+        phase: TddPhase,
+        cycle: CycleSpec | None = None,
+    ) -> PhaseTestResult:
+        if self.config.dry_run:
+            code = 1 if phase is TddPhase.RED else 0
+            stderr = ""
+            if phase is TddPhase.RED and cycle and cycle.expected_red_failure:
+                includes = cycle.expected_red_failure.must_include or []
+                stderr = f"{includes[0]} [dry-run]\n" if includes else "expected RED [dry-run]\n"
+            return PhaseTestResult(
+                self.config.test_command,
+                code,
+                stdout=f"[dry-run:{phase.value}]\n",
+                stderr=stderr,
+            )
+        completed = subprocess.run(
+            self.config.test_command,
+            cwd=self.config.workdir,
+            shell=True,
+            text=True,
+            capture_output=True,
+            check=False,
+        )
+        return PhaseTestResult(
+            command=self.config.test_command,
+            returncode=completed.returncode,
+            stdout=completed.stdout,
+            stderr=completed.stderr,
+        )
+    def _run_command(self, command: str) -> PhaseTestResult:
+        completed = subprocess.run(
+            command,
+            cwd=self.config.workdir,
+            shell=True,
+            text=True,
+            capture_output=True,
+            check=False,
+        )
+        return PhaseTestResult(
+            command=command,
+            returncode=completed.returncode,
+            stdout=completed.stdout,
+            stderr=completed.stderr,
+        )
+    def _require(
+        self,
+        phase: TddPhase,
+        test_run: PhaseTestResult,
+        cycle: CycleSpec | None = None,
+    ) -> None:
+        expected = cycle.expected_red_failure if cycle else None
+        decision = evaluate_phase(phase, test_run, expected)
+        if decision.denied:
+            raise RuntimeError(decision.reason)
+    def _snapshot_test_files(self) -> dict[Path, str]:
+        snapshots: dict[Path, str] = {}
+        for path in self.config.workdir.rglob("*"):
+            if not path.is_file() or "__pycache__" in path.parts:
+                continue
+            if path.name.startswith("test_") or path.parent.name in {"tests", "test"}:
+                snapshots[path.relative_to(self.config.workdir)] = path.read_text(errors="ignore")
+        return snapshots
+    def _require_refactor_kept_tests(self, before: dict[Path, str]) -> None:
+        after = self._snapshot_test_files()
+        if before != after:
+            raise RuntimeError("REFACTOR rejected: test files changed during refactor phase.")
+    def _run_agent(self, agent: Agent, prompt: str) -> AgentResult:
+        result = agent.run(prompt, self.config.workdir)
+        if not result.ok:
+            raise RuntimeError(
+                f"{result.role} failed with exit code {result.returncode}\n{result.stderr}"
+            )
+        return result

aitdd/spec.py ADDED Viewed

@@ -0,0 +1,116 @@
+"""Specification support for complex TDD loops."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+import yaml
+from .hook_policy import ExpectedRed
+@dataclass(frozen=True)
+class CycleSpec:
+    behavior: str
+    expected_red_failure: ExpectedRed | None = None
+    notes: list[str] = field(default_factory=list)
+@dataclass(frozen=True)
+class AitddSpec:
+    goal: str
+    constraints: list[str] = field(default_factory=list)
+    public_api: list[str] = field(default_factory=list)
+    forbidden: list[str] = field(default_factory=list)
+    acceptance_tests: list[str] = field(default_factory=list)
+    unit_tests: list[str] = field(default_factory=list)
+    done_when: list[str] = field(default_factory=lambda: ["all_cycles_complete"])
+    acceptance_test_command: str | None = None
+    cycles: list[CycleSpec] = field(default_factory=list)
+    @classmethod
+    def from_file(cls, path: Path) -> AitddSpec:
+        raw = yaml.safe_load(path.read_text()) or {}
+        if not isinstance(raw, dict):
+            raise ValueError("aitdd spec must be a YAML mapping")
+        goal = str(raw.get("goal") or "").strip()
+        if not goal:
+            raise ValueError("aitdd spec requires a non-empty 'goal'")
+        return cls(
+            goal=goal,
+            constraints=[str(item) for item in _list(raw.get("constraints"))],
+            public_api=[str(item) for item in _list(raw.get("public_api"))],
+            forbidden=[str(item) for item in _list(raw.get("forbidden"))],
+            acceptance_tests=[str(item) for item in _list(raw.get("acceptance_tests"))],
+            unit_tests=[str(item) for item in _list(raw.get("unit_tests"))],
+            done_when=[str(item) for item in _list(raw.get("done_when"))]
+            or ["all_cycles_complete"],
+            acceptance_test_command=(
+                str(raw["acceptance_test_command"])
+                if raw.get("acceptance_test_command") is not None
+                else None
+            ),
+            cycles=[_cycle_from_raw(item) for item in _list(raw.get("cycles"))],
+        )
+    def describe(self) -> str:
+        sections = [f"Goal:\n{self.goal}"]
+        sections.append(_format_list("Constraints", self.constraints))
+        sections.append(_format_list("Public API", self.public_api))
+        sections.append(_format_list("Forbidden", self.forbidden))
+        sections.append(_format_list("Acceptance tests", self.acceptance_tests))
+        sections.append(_format_list("Unit tests", self.unit_tests))
+        return "\n\n".join(section for section in sections if section)
+def _cycle_from_raw(raw: Any) -> CycleSpec:
+    if isinstance(raw, str):
+        return CycleSpec(behavior=raw)
+    if not isinstance(raw, dict):
+        raise ValueError("each cycle must be a string or mapping")
+    behavior = str(raw.get("behavior") or "").strip()
+    if not behavior:
+        raise ValueError("cycle mapping requires 'behavior'")
+    return CycleSpec(
+        behavior=behavior,
+        expected_red_failure=_expected_red_from_raw(raw),
+        notes=[str(item) for item in _list(raw.get("notes"))],
+    )
+def _expected_red_from_raw(raw: dict[str, Any]) -> ExpectedRed | None:
+    if "expected_red" in raw:
+        value = raw["expected_red"]
+        if not isinstance(value, dict):
+            raise ValueError("expected_red must be a mapping")
+        return ExpectedRed(
+            exit_code=str(value.get("exit_code") or "nonzero"),
+            must_include=[str(item) for item in _list(value.get("must_include"))],
+            must_not_include=[str(item) for item in _list(value.get("must_not_include"))],
+        )
+    legacy = [str(item) for item in _list(raw.get("expected_red_failure"))]
+    if not legacy:
+        return None
+    return ExpectedRed(must_include=legacy)
+def _list(value: Any) -> list[Any]:
+    if value is None:
+        return []
+    if isinstance(value, list):
+        return value
+    return [value]
+def _format_list(title: str, items: list[str]) -> str:
+    if not items:
+        return ""
+    body = "\n".join(f"- {item}" for item in items)
+    return f"{title}:\n{body}"

aitdd-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,143 @@
+Metadata-Version: 2.4
+Name: aitdd
+Version: 0.1.0
+Summary: A small Codex-planned, Cursor-implemented TDD loop with Codex hook guards.
+Requires-Python: >=3.10
+Requires-Dist: codex-hookkit>=0.0.2
+Requires-Dist: pyyaml>=6.0
+Description-Content-Type: text/markdown
+# AiTdd
+Codex が計画とレビューを担当し、Cursor が RED / GREEN / REFACTOR の実装を担当する、
+小さな TDD オーケストレータです。Hook の入出力と policy は
+`codex-hookkit` を使います。
+Codex は Python から公式 `@openai/codex-sdk` を呼びます。
+Cursor は既定で Python から公式 `@cursor/sdk` の Composer を使い、
+必要なら `cursor-agent` CLI に切り替えられます。
+## 役割
+- Codex: 次に書くべき最小テストの計画、GREEN 後と REFACTOR 後のレビュー、完了判定
+- Cursor: テスト追加、最小実装、リファクタリング
+- Hook: RED ではテスト失敗を要求し、GREEN / REFACTOR ではテスト成功を要求する
+## 使い方
+```sh
+uv sync --dev
+npm install
+uv run aitdd run "FizzBuzz を t-wada 流 TDD で作る" --test-command "pytest" --max-cycles 5
+```
+PyPI から使う場合も、公式 Node SDK は作業ディレクトリに入れてください。
+```sh
+pip install aitdd
+npm install @openai/codex-sdk @cursor/sdk
+aitdd run "FizzBuzz を t-wada 流 TDD で作る" --test-command "pytest"
+```
+Cursor 実装担当は既定で SDK + Composer alias を使います。
+```sh
+uv run aitdd run "..." --cursor-backend sdk --cursor-model composer-latest
+```
+CLI fallback を使いたい場合:
+```sh
+uv run aitdd run "..." --cursor-backend cli --cursor-model composer-latest
+```
+SDK は `CURSOR_API_KEY` があればそれを使い、無い場合は SDK 側の認証解決に任せます。
+実行内容だけ確認する場合:
+```sh
+uv run aitdd run "TODO アプリの最小モデルを作る" --dry-run
+```
+複雑なクラスや業務要件では `aitdd.yaml` を使って、反復する TDD サイクルを固定できます。
+```sh
+uv run aitdd run "ignored when spec exists" \
+  --spec examples/aitdd.yaml \
+  --test-command "pytest" \
+  --max-cycles 5
+```
+`aitdd.yaml` では次を指定できます。
+- `goal`: 全体ゴール
+- `public_api`: 育てる public API
+- `constraints`: 設計・進め方の制約
+- `forbidden`: 先回り実装や禁止事項
+- `acceptance_tests`: 外側の受け入れテスト
+- `unit_tests`: 内側のユニットテスト
+- `cycles`: 1 サイクル 1 public behavior の反復リスト
+各 cycle には `expected_red` を置けます。RED では「テストが失敗したか」だけでなく、
+期待した理由で失敗したか、禁止した壊れ方をしていないかも検証します。
+REFACTOR フェーズではテストファイル変更を拒否します。
+Codex レビューは JSON schema で機械判定します。各サイクルで次の gate がすべて `true` の場合だけ
+次に進みます。
+- `one_behavior_only`
+- `minimal_green`
+- `tests_unchanged_in_refactor`
+- `acceptance_unit_boundary_ok`
+- `forbidden_respected`
+CLI には cycle ごとの進捗として `red / green / refactor / complete / one_behavior_only /
+minimal_green / boundary_ok` が表示されます。
+実行中の状態は最小構成で次に保存されます。
+- `.aitdd/progress.json`: cycle ごとの `behavior`, `red`, `green`, `refactor`, `review_gate`,
+  `issues`, `started_at`, `finished_at`
+- `.aitdd/cycles/001-red.diff`: phase ごとの git diff snapshot
+- `.aitdd/report.md`: TDD の進行ログ
+途中から再開する場合:
+```sh
+aitdd resume --spec aitdd.yaml --max-cycles 5
+```
+Codex hook として使う場合は `.codex/hooks.json` などに次を登録します。
+```json
+{
+  "hooks": {
+    "PostToolUse": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "uv run python hooks/aitdd_guard.py",
+            "timeout": 30
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+`AITDD_PHASE=red|green|refactor` と `AITDD_TEST_COMMAND` を渡すと、そのフェーズの
+期待に合わない状態を block します。
+## ループ
+1. Codex が次の最小ステップを計画する
+2. Cursor が RED として、失敗するテストだけを書く
+3. テストが失敗しなければ RED をやり直す
+4. Cursor が GREEN として、通すための最小実装を書く
+5. テストが通らなければ GREEN をやり直す
+6. Codex がレビューし、Cursor が必要なら REFACTOR する
+7. テストが通ることを確認し、Codex が完了判定する
+`--max-cycles` は安全弁です。完璧を目指しつつ、暴走しないように上限を持たせています。

aitdd-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+aitdd/__init__.py,sha256=wIuXmq_znuM9FNYift0egMBgax0G72mP5Rup9fqcvDA,142
+aitdd/agents.py,sha256=UUAe2sBzNOr4dKw9Riigld1oCYvjj0vDNl2LH38sIG8,7557
+aitdd/cli.py,sha256=UEawVNzOuEepfEzc_L9HhaEP_21fVhJya1B_5V8Vlas,2658
+aitdd/hook_policy.py,sha256=ruOAiC93Q3A4gdSIpew4c2bhCDkthUTi47mSLpR0zsQ,2502
+aitdd/progress.py,sha256=OeIBdCTvUU6lJrfP8wPsGM6JsijiyahLDmhqoQoBlac,5959
+aitdd/review.py,sha256=PyLUIdsficMSoJy3TqorcIfRll_Nvhxx9_ueBSKiQ8w,3570
+aitdd/runner.py,sha256=GhsejKBBD-w6QLpjAy4vmzg-96xrBIQg2lE0rTswK8E,12830
+aitdd/spec.py,sha256=3R-LokleUJUUC6Ti99PHb5v8t1PLrHq_ea62MZVLNek,4169
+aitdd-0.1.0.dist-info/METADATA,sha256=3_IOOO-S0AtC_yF4Xa2fE_o1OpxJh-BOksxOP1cWM0g,4739
+aitdd-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+aitdd-0.1.0.dist-info/entry_points.txt,sha256=aBpJeHRgJJYU0h0ngUWrfj-9RQXwgbWcUsNNlSdPDPQ,41
+aitdd-0.1.0.dist-info/RECORD,,

aitdd-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.29.0
+Root-Is-Purelib: true
+Tag: py3-none-any

aitdd-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ aitdd = aitdd.cli:main