aitdd 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aitdd/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """AiTdd: Codex-planned, Cursor-implemented TDD loops."""
2
+
3
+ from .runner import TddLoop, TddLoopConfig
4
+
5
+ __all__ = ["TddLoop", "TddLoopConfig"]
aitdd/agents.py ADDED
@@ -0,0 +1,298 @@
1
+ """Agent adapters used by the TDD loop."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import subprocess
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Protocol
11
+
12
+ CODEX_SDK_BRIDGE = r"""
13
+ import { Codex } from "@openai/codex-sdk";
14
+
15
+ async function readStdin() {
16
+ const chunks = [];
17
+ for await (const chunk of process.stdin) chunks.push(chunk);
18
+ return Buffer.concat(chunks).toString("utf8");
19
+ }
20
+
21
+ const input = JSON.parse(await readStdin());
22
+ const codex = new Codex();
23
+ const thread = codex.startThread({
24
+ workingDirectory: input.cwd,
25
+ skipGitRepoCheck: true,
26
+ sandboxMode: "read-only",
27
+ approvalPolicy: "never",
28
+ model: input.model || undefined,
29
+ });
30
+ const turnOptions = {};
31
+ if (input.outputSchema) {
32
+ turnOptions.outputSchema = input.outputSchema;
33
+ }
34
+ const result = await thread.run(input.prompt, turnOptions);
35
+ process.stdout.write(JSON.stringify({
36
+ status: "finished",
37
+ result: result.finalResponse,
38
+ usage: result.usage,
39
+ threadId: thread.id,
40
+ }) + "\n");
41
+ """
42
+
43
+ CURSOR_SDK_BRIDGE = r"""
44
+ import { Agent } from "@cursor/sdk";
45
+
46
+ async function readStdin() {
47
+ const chunks = [];
48
+ for await (const chunk of process.stdin) chunks.push(chunk);
49
+ return Buffer.concat(chunks).toString("utf8");
50
+ }
51
+
52
+ const input = JSON.parse(await readStdin());
53
+ const options = {
54
+ model: { id: input.model || "composer-latest" },
55
+ local: {
56
+ cwd: input.cwd,
57
+ sandboxOptions: { enabled: false },
58
+ },
59
+ };
60
+ if (process.env.CURSOR_API_KEY) {
61
+ options.apiKey = process.env.CURSOR_API_KEY;
62
+ }
63
+ const result = await Agent.prompt(input.prompt, options);
64
+ process.stdout.write(JSON.stringify({
65
+ status: result.status,
66
+ result: result.result ?? "",
67
+ durationMs: result.durationMs,
68
+ }) + "\n");
69
+ """
70
+
71
+
72
+ @dataclass(frozen=True)
73
+ class AgentResult:
74
+ role: str
75
+ prompt: str
76
+ stdout: str
77
+ stderr: str
78
+ returncode: int
79
+
80
+ @property
81
+ def ok(self) -> bool:
82
+ return self.returncode == 0
83
+
84
+
85
+ class Agent(Protocol):
86
+ role: str
87
+
88
+ def run(
89
+ self,
90
+ prompt: str,
91
+ cwd: Path,
92
+ output_schema: dict[str, object] | None = None,
93
+ ) -> AgentResult:
94
+ """Run the agent for one prompt."""
95
+
96
+
97
+ @dataclass(frozen=True)
98
+ class DryRunAgent:
99
+ role: str
100
+
101
+ def run(
102
+ self,
103
+ prompt: str,
104
+ cwd: Path,
105
+ output_schema: dict[str, object] | None = None,
106
+ ) -> AgentResult:
107
+ return AgentResult(
108
+ role=self.role,
109
+ prompt=prompt,
110
+ stdout=f"[dry-run:{self.role}] cwd={cwd}\n{prompt}\n",
111
+ stderr="",
112
+ returncode=0,
113
+ )
114
+
115
+
116
+ @dataclass(frozen=True)
117
+ class CodexSdkAgent:
118
+ """Codex planning/review adapter using the official @openai/codex-sdk."""
119
+
120
+ role: str = "codex"
121
+ model: str | None = None
122
+ timeout: int = 900
123
+ node_bin: str = "node"
124
+
125
+ def run(
126
+ self,
127
+ prompt: str,
128
+ cwd: Path,
129
+ output_schema: dict[str, object] | None = None,
130
+ ) -> AgentResult:
131
+ command = [
132
+ self.node_bin,
133
+ "--input-type=module",
134
+ "-e",
135
+ CODEX_SDK_BRIDGE,
136
+ ]
137
+ completed = subprocess.run(
138
+ command,
139
+ cwd=_node_package_root(cwd),
140
+ input=json.dumps(
141
+ {
142
+ "cwd": str(cwd),
143
+ "model": self.model,
144
+ "prompt": prompt,
145
+ "outputSchema": output_schema,
146
+ }
147
+ ),
148
+ text=True,
149
+ capture_output=True,
150
+ timeout=self.timeout,
151
+ check=False,
152
+ )
153
+ return AgentResult(
154
+ self.role,
155
+ prompt,
156
+ _result_text(completed.stdout),
157
+ completed.stderr,
158
+ completed.returncode,
159
+ )
160
+
161
+
162
+ @dataclass(frozen=True)
163
+ class CursorCliAgent:
164
+ """Cursor implementation adapter using Cursor Agent CLI with Composer."""
165
+
166
+ role: str = "cursor"
167
+ cursor_bin: str = "cursor-agent"
168
+ model: str | None = "composer-latest"
169
+ timeout: int = 1800
170
+ force: bool = True
171
+
172
+ def run(self, prompt: str, cwd: Path) -> AgentResult:
173
+ command = [
174
+ self.cursor_bin,
175
+ "--print",
176
+ "--output-format",
177
+ "text",
178
+ "--trust",
179
+ "--workspace",
180
+ str(cwd),
181
+ ]
182
+ if self.force:
183
+ command.append("--force")
184
+ if self.model:
185
+ command.extend(["--model", self.model])
186
+ command.append(prompt)
187
+ completed = subprocess.run(
188
+ command,
189
+ cwd=cwd,
190
+ text=True,
191
+ capture_output=True,
192
+ timeout=self.timeout,
193
+ check=False,
194
+ env=_cursor_sdk_env(),
195
+ )
196
+ return AgentResult(
197
+ self.role,
198
+ prompt,
199
+ completed.stdout,
200
+ completed.stderr,
201
+ completed.returncode,
202
+ )
203
+
204
+
205
+ @dataclass(frozen=True)
206
+ class CursorSdkAgent:
207
+ """Cursor implementation adapter using the official @cursor/sdk."""
208
+
209
+ role: str = "cursor"
210
+ model: str = "composer-latest"
211
+ timeout: int = 1800
212
+ node_bin: str = "node"
213
+
214
+ def run(self, prompt: str, cwd: Path) -> AgentResult:
215
+ command = [
216
+ self.node_bin,
217
+ "--input-type=module",
218
+ "-e",
219
+ CURSOR_SDK_BRIDGE,
220
+ ]
221
+ completed = subprocess.run(
222
+ command,
223
+ cwd=_node_package_root(cwd),
224
+ input=json.dumps({"cwd": str(cwd), "model": self.model, "prompt": prompt}),
225
+ text=True,
226
+ capture_output=True,
227
+ timeout=self.timeout,
228
+ check=False,
229
+ env=_cursor_sdk_env(),
230
+ )
231
+ return AgentResult(
232
+ self.role,
233
+ prompt,
234
+ _result_text(completed.stdout),
235
+ _friendly_cursor_sdk_stderr(completed.stderr),
236
+ completed.returncode,
237
+ )
238
+
239
+
240
+ def _friendly_cursor_sdk_stderr(stderr: str) -> str:
241
+ if "AuthenticationError" not in stderr:
242
+ return stderr
243
+ return (
244
+ "Cursor SDK authentication failed. "
245
+ "Set CURSOR_API_KEY or make sure the official @cursor/sdk can resolve Cursor auth. "
246
+ "Original stderr follows:\n"
247
+ f"{stderr}"
248
+ )
249
+
250
+
251
+ def _node_package_root(workdir: Path) -> Path:
252
+ if (workdir / "node_modules" / "@cursor" / "sdk").exists():
253
+ return workdir
254
+ return Path(__file__).resolve().parents[2]
255
+
256
+
257
+ def _cursor_sdk_env() -> dict[str, str]:
258
+ env = os.environ.copy()
259
+ if env.get("CURSOR_API_KEY"):
260
+ return env
261
+
262
+ token = _read_macos_keychain_secret("aitdd.cursor_api_key")
263
+ if token:
264
+ env["CURSOR_API_KEY"] = token
265
+ return env
266
+
267
+
268
+ def _read_macos_keychain_secret(service: str) -> str | None:
269
+ completed = subprocess.run(
270
+ ["security", "find-generic-password", "-w", "-s", service],
271
+ text=True,
272
+ capture_output=True,
273
+ check=False,
274
+ )
275
+ if completed.returncode != 0:
276
+ return None
277
+ token = completed.stdout.strip()
278
+ return token or None
279
+
280
+
281
+ def _result_text(stdout: str) -> str:
282
+ value = parse_json_object(stdout)
283
+ result = value.get("result")
284
+ return result if isinstance(result, str) else stdout
285
+
286
+
287
+ def parse_json_object(text: str) -> dict[str, object]:
288
+ """Parse the first JSON object from an agent response."""
289
+
290
+ start = text.find("{")
291
+ end = text.rfind("}")
292
+ if start == -1 or end == -1 or end < start:
293
+ return {}
294
+ try:
295
+ value = json.loads(text[start : end + 1])
296
+ except json.JSONDecodeError:
297
+ return {}
298
+ return value if isinstance(value, dict) else {}
aitdd/cli.py ADDED
@@ -0,0 +1,73 @@
1
+ """Command line entrypoint."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ from pathlib import Path
7
+
8
+ from .runner import TddLoop, TddLoopConfig
9
+
10
+
11
+ def build_parser() -> argparse.ArgumentParser:
12
+ parser = argparse.ArgumentParser(prog="aitdd")
13
+ subparsers = parser.add_subparsers(dest="command", required=True)
14
+
15
+ run = subparsers.add_parser("run", help="run the Codex/Cursor TDD loop")
16
+ _add_run_arguments(run)
17
+
18
+ resume = subparsers.add_parser("resume", help="resume from .aitdd/progress.json")
19
+ _add_run_arguments(resume)
20
+ resume.set_defaults(resume=True)
21
+ return parser
22
+
23
+
24
+ def _add_run_arguments(run: argparse.ArgumentParser) -> None:
25
+ run.add_argument("goal", nargs="?", default="")
26
+ run.add_argument("--workdir", default=".")
27
+ run.add_argument("--test-command", default="pytest")
28
+ run.add_argument("--max-cycles", type=int, default=5)
29
+ run.add_argument("--spec", type=Path, help="path to aitdd.yaml")
30
+ run.add_argument("--codex-model")
31
+ run.add_argument("--cursor-model", default="composer-latest")
32
+ run.add_argument(
33
+ "--cursor-backend",
34
+ choices=["cli", "sdk"],
35
+ default="sdk",
36
+ help="use cursor-agent CLI or the official @cursor/sdk bridge",
37
+ )
38
+ run.add_argument("--dry-run", action="store_true")
39
+
40
+
41
+ def main(argv: list[str] | None = None) -> int:
42
+ args = build_parser().parse_args(argv)
43
+ if args.command in {"run", "resume"}:
44
+ if not args.goal and not args.spec:
45
+ raise SystemExit("goal is required unless --spec is provided")
46
+ config = TddLoopConfig(
47
+ goal=args.goal,
48
+ workdir=Path(args.workdir).resolve(),
49
+ test_command=args.test_command,
50
+ max_cycles=args.max_cycles,
51
+ spec_path=args.spec.resolve() if args.spec else None,
52
+ codex_model=args.codex_model,
53
+ cursor_model=args.cursor_model,
54
+ cursor_backend=args.cursor_backend,
55
+ resume=getattr(args, "resume", False),
56
+ dry_run=args.dry_run,
57
+ )
58
+ results = TddLoop(config).run()
59
+ for result in results:
60
+ print(
61
+ f"cycle={result.index} red={result.red.returncode} "
62
+ f"green={result.green.returncode} refactor={result.refactor.returncode} "
63
+ f"complete={result.complete} "
64
+ f"one_behavior_only={result.review_gate.one_behavior_only} "
65
+ f"minimal_green={result.review_gate.minimal_green} "
66
+ f"boundary_ok={result.review_gate.acceptance_unit_boundary_ok}"
67
+ )
68
+ return 0
69
+ return 2
70
+
71
+
72
+ if __name__ == "__main__":
73
+ raise SystemExit(main())
aitdd/hook_policy.py ADDED
@@ -0,0 +1,85 @@
1
+ """TDD phase policy shared by the loop and Codex hooks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+
8
+ from codex_hookkit import Decision, allow, deny
9
+
10
+
11
+ class TddPhase(str, Enum):
12
+ RED = "red"
13
+ GREEN = "green"
14
+ REFACTOR = "refactor"
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class PhaseTestResult:
19
+ command: str
20
+ returncode: int
21
+ stdout: str = ""
22
+ stderr: str = ""
23
+
24
+ @property
25
+ def passed(self) -> bool:
26
+ return self.returncode == 0
27
+
28
+ @property
29
+ def failed(self) -> bool:
30
+ return not self.passed
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class ExpectedRed:
35
+ exit_code: str = "nonzero"
36
+ must_include: list[str] | None = None
37
+ must_not_include: list[str] | None = None
38
+
39
+
40
+ def evaluate_phase(
41
+ phase: TddPhase,
42
+ test_run: PhaseTestResult,
43
+ expected_red_failure: list[str] | ExpectedRed | None = None,
44
+ ) -> Decision:
45
+ if phase is TddPhase.RED:
46
+ if test_run.passed:
47
+ return deny.decision(
48
+ "RED rejected: tests passed. Add the smallest meaningful failing test first."
49
+ )
50
+
51
+ expected = _normalize_expected_red(expected_red_failure)
52
+ if not expected:
53
+ return allow.decision("RED accepted: tests fail as expected.")
54
+
55
+ combined_output = f"{test_run.stdout}\n{test_run.stderr}"
56
+ forbidden = expected.must_not_include or []
57
+ if any(fragment in combined_output for fragment in forbidden):
58
+ return deny.decision(
59
+ "RED rejected: tests failed with a forbidden reason. "
60
+ f"Forbidden: {', '.join(forbidden)}"
61
+ )
62
+
63
+ required = expected.must_include or []
64
+ if not required or any(fragment in combined_output for fragment in required):
65
+ return allow.decision("RED accepted: tests fail for the expected reason.")
66
+ return deny.decision(
67
+ "RED rejected: tests failed, but not for the expected reason. "
68
+ f"Expected one of: {', '.join(required)}"
69
+ )
70
+
71
+ if test_run.passed:
72
+ return allow.decision(f"{phase.value.upper()} accepted: tests pass.")
73
+
74
+ return deny.decision(
75
+ f"{phase.value.upper()} rejected: tests are failing. "
76
+ "Restore a passing suite before moving on."
77
+ )
78
+
79
+
80
+ def _normalize_expected_red(value: list[str] | ExpectedRed | None) -> ExpectedRed | None:
81
+ if value is None:
82
+ return None
83
+ if isinstance(value, ExpectedRed):
84
+ return value
85
+ return ExpectedRed(must_include=value)
aitdd/progress.py ADDED
@@ -0,0 +1,165 @@
1
+ """Minimal persistent progress and report files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import subprocess
7
+ from dataclasses import asdict, dataclass, field
8
+ from datetime import UTC, datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from .hook_policy import PhaseTestResult
13
+ from .review import ReviewGate
14
+
15
+
16
+ @dataclass
17
+ class CycleProgress:
18
+ index: int
19
+ behavior: str
20
+ status: str = "started"
21
+ started_at: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
22
+ finished_at: str | None = None
23
+ red: dict[str, Any] | None = None
24
+ green: dict[str, Any] | None = None
25
+ refactor: dict[str, Any] | None = None
26
+ review_gate: dict[str, Any] | None = None
27
+ issues: list[str] = field(default_factory=list)
28
+
29
+
30
+ class ProgressStore:
31
+ def __init__(self, workdir: Path) -> None:
32
+ self.workdir = workdir
33
+ self.root = workdir / ".aitdd"
34
+ self.cycles_dir = self.root / "cycles"
35
+ self.progress_path = self.root / "progress.json"
36
+ self.report_path = self.root / "report.md"
37
+ self.root.mkdir(exist_ok=True)
38
+ self.cycles_dir.mkdir(exist_ok=True)
39
+ self.data = self._load()
40
+
41
+ def start_cycle(self, index: int, behavior: str, plan: str) -> CycleProgress:
42
+ cycle = CycleProgress(index=index, behavior=behavior)
43
+ self._upsert(cycle)
44
+ self.append_report(f"## Cycle {index}: {behavior}\n\n### Plan\n\n{plan}\n")
45
+ return cycle
46
+
47
+ def record_phase(self, cycle: CycleProgress, phase: str, result: PhaseTestResult) -> None:
48
+ setattr(cycle, phase, _phase_dict(result))
49
+ self.snapshot_diff(cycle.index, phase)
50
+ self._upsert(cycle)
51
+ self.append_report(
52
+ f"\n### {phase.upper()}\n\n"
53
+ f"- command: `{result.command}`\n"
54
+ f"- returncode: `{result.returncode}`\n"
55
+ )
56
+
57
+ def record_review(self, cycle: CycleProgress, review_gate: ReviewGate) -> None:
58
+ cycle.review_gate = asdict(review_gate)
59
+ cycle.issues = review_gate.issues
60
+ self._upsert(cycle)
61
+ self.append_report(
62
+ "\n### Review Gate\n\n"
63
+ f"- one_behavior_only: `{review_gate.one_behavior_only}`\n"
64
+ f"- minimal_green: `{review_gate.minimal_green}`\n"
65
+ f"- tests_unchanged_in_refactor: `{review_gate.tests_unchanged_in_refactor}`\n"
66
+ f"- acceptance_unit_boundary_ok: `{review_gate.acceptance_unit_boundary_ok}`\n"
67
+ f"- forbidden_respected: `{review_gate.forbidden_respected}`\n"
68
+ f"- issues: `{', '.join(review_gate.issues) if review_gate.issues else 'none'}`\n"
69
+ )
70
+
71
+ def finish_cycle(self, cycle: CycleProgress, status: str) -> None:
72
+ cycle.status = status
73
+ cycle.finished_at = datetime.now(UTC).isoformat()
74
+ self._upsert(cycle)
75
+ self.append_report(f"\n### Result\n\n`{status}`\n")
76
+
77
+ def fail_cycle(self, cycle: CycleProgress | None, error: BaseException) -> None:
78
+ if cycle is None:
79
+ self.data["last_error"] = str(error)
80
+ self._write()
81
+ self.append_report(f"\n## Failure\n\n{error}\n")
82
+ return
83
+ cycle.status = "failed"
84
+ cycle.finished_at = datetime.now(UTC).isoformat()
85
+ cycle.issues.append(str(error))
86
+ self._upsert(cycle)
87
+ self.append_report(f"\n### Failure\n\n{error}\n")
88
+
89
+ def next_cycle_index(self) -> int:
90
+ cycles = self.data.get("cycles", [])
91
+ for item in cycles:
92
+ if item.get("status") != "completed":
93
+ return int(item.get("index", 1))
94
+ return len(cycles) + 1
95
+
96
+ def snapshot_diff(self, index: int, phase: str) -> None:
97
+ path = self.cycles_dir / f"{index:03d}-{phase}.diff"
98
+ diff = _git_diff(self.workdir)
99
+ path.write_text(diff or "# No git diff available.\n")
100
+
101
+ def append_report(self, text: str) -> None:
102
+ if not self.report_path.exists():
103
+ self.report_path.write_text("# AiTdd Report\n")
104
+ with self.report_path.open("a") as stream:
105
+ stream.write(text)
106
+ if not text.endswith("\n"):
107
+ stream.write("\n")
108
+
109
+ def _load(self) -> dict[str, Any]:
110
+ if not self.progress_path.exists():
111
+ return {"cycles": []}
112
+ return json.loads(self.progress_path.read_text())
113
+
114
+ def _upsert(self, cycle: CycleProgress) -> None:
115
+ cycles = [item for item in self.data.get("cycles", []) if item.get("index") != cycle.index]
116
+ cycles.append(asdict(cycle))
117
+ cycles.sort(key=lambda item: item["index"])
118
+ self.data["cycles"] = cycles
119
+ self._write()
120
+
121
+ def _write(self) -> None:
122
+ self.progress_path.write_text(json.dumps(self.data, ensure_ascii=False, indent=2) + "\n")
123
+
124
+
125
+ def _phase_dict(result: PhaseTestResult) -> dict[str, Any]:
126
+ return {
127
+ "command": result.command,
128
+ "returncode": result.returncode,
129
+ "stdout_tail": result.stdout[-2000:],
130
+ "stderr_tail": result.stderr[-2000:],
131
+ }
132
+
133
+
134
+ def _git_diff(workdir: Path) -> str:
135
+ in_git = subprocess.run(
136
+ ["git", "-C", str(workdir), "rev-parse", "--is-inside-work-tree"],
137
+ text=True,
138
+ capture_output=True,
139
+ check=False,
140
+ )
141
+ if in_git.returncode != 0:
142
+ return ""
143
+
144
+ tracked = subprocess.run(
145
+ ["git", "-C", str(workdir), "diff", "--binary", "--no-ext-diff"],
146
+ text=True,
147
+ capture_output=True,
148
+ check=False,
149
+ )
150
+ diff = tracked.stdout
151
+ untracked = subprocess.run(
152
+ ["git", "-C", str(workdir), "ls-files", "--others", "--exclude-standard"],
153
+ text=True,
154
+ capture_output=True,
155
+ check=False,
156
+ )
157
+ for relative in untracked.stdout.splitlines():
158
+ file_diff = subprocess.run(
159
+ ["git", "-C", str(workdir), "diff", "--no-index", "--", "/dev/null", relative],
160
+ text=True,
161
+ capture_output=True,
162
+ check=False,
163
+ )
164
+ diff += file_diff.stdout
165
+ return diff
aitdd/review.py ADDED
@@ -0,0 +1,102 @@
1
+ """Structured Codex review gates."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import dataclass, field
7
+ from typing import Any
8
+
9
+ REVIEW_SCHEMA: dict[str, object] = {
10
+ "type": "object",
11
+ "properties": {
12
+ "complete": {"type": "boolean"},
13
+ "reason": {"type": "string"},
14
+ "one_behavior_only": {"type": "boolean"},
15
+ "minimal_green": {"type": "boolean"},
16
+ "tests_unchanged_in_refactor": {"type": "boolean"},
17
+ "acceptance_unit_boundary_ok": {"type": "boolean"},
18
+ "forbidden_respected": {"type": "boolean"},
19
+ "issues": {"type": "array", "items": {"type": "string"}},
20
+ },
21
+ "required": [
22
+ "complete",
23
+ "reason",
24
+ "one_behavior_only",
25
+ "minimal_green",
26
+ "tests_unchanged_in_refactor",
27
+ "acceptance_unit_boundary_ok",
28
+ "forbidden_respected",
29
+ "issues",
30
+ ],
31
+ "additionalProperties": False,
32
+ }
33
+
34
+ PASSING_REVIEW_JSON = (
35
+ '{"complete": true, "reason": "dry run", "one_behavior_only": true, '
36
+ '"minimal_green": true, "tests_unchanged_in_refactor": true, '
37
+ '"acceptance_unit_boundary_ok": true, "forbidden_respected": true, "issues": []}'
38
+ )
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class ReviewGate:
43
+ complete: bool
44
+ reason: str
45
+ one_behavior_only: bool
46
+ minimal_green: bool
47
+ tests_unchanged_in_refactor: bool
48
+ acceptance_unit_boundary_ok: bool
49
+ forbidden_respected: bool
50
+ issues: list[str] = field(default_factory=list)
51
+
52
+ @classmethod
53
+ def from_text(cls, text: str) -> ReviewGate:
54
+ value = _parse_json_object(text)
55
+ return cls(
56
+ complete=bool(value.get("complete")),
57
+ reason=str(value.get("reason") or ""),
58
+ one_behavior_only=bool(value.get("one_behavior_only")),
59
+ minimal_green=bool(value.get("minimal_green")),
60
+ tests_unchanged_in_refactor=bool(value.get("tests_unchanged_in_refactor")),
61
+ acceptance_unit_boundary_ok=bool(value.get("acceptance_unit_boundary_ok")),
62
+ forbidden_respected=bool(value.get("forbidden_respected")),
63
+ issues=[str(item) for item in value.get("issues", [])],
64
+ )
65
+
66
+ @property
67
+ def passed(self) -> bool:
68
+ return all(
69
+ [
70
+ self.one_behavior_only,
71
+ self.minimal_green,
72
+ self.tests_unchanged_in_refactor,
73
+ self.acceptance_unit_boundary_ok,
74
+ self.forbidden_respected,
75
+ ]
76
+ )
77
+
78
+ def failure_message(self) -> str:
79
+ failed = []
80
+ if not self.one_behavior_only:
81
+ failed.append("one_behavior_only=false")
82
+ if not self.minimal_green:
83
+ failed.append("minimal_green=false")
84
+ if not self.tests_unchanged_in_refactor:
85
+ failed.append("tests_unchanged_in_refactor=false")
86
+ if not self.acceptance_unit_boundary_ok:
87
+ failed.append("acceptance_unit_boundary_ok=false")
88
+ if not self.forbidden_respected:
89
+ failed.append("forbidden_respected=false")
90
+ details = "; ".join(self.issues) if self.issues else self.reason
91
+ return f"Codex review gate failed: {', '.join(failed)}. {details}"
92
+
93
+
94
+ def _parse_json_object(text: str) -> dict[str, Any]:
95
+ start = text.find("{")
96
+ end = text.rfind("}")
97
+ if start == -1 or end == -1 or end < start:
98
+ raise ValueError("Codex review did not return a JSON object")
99
+ value = json.loads(text[start : end + 1])
100
+ if not isinstance(value, dict):
101
+ raise ValueError("Codex review JSON must be an object")
102
+ return value
aitdd/runner.py ADDED
@@ -0,0 +1,341 @@
1
+ """RED-GREEN-REFACTOR orchestration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import subprocess
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+
9
+ from .agents import (
10
+ Agent,
11
+ AgentResult,
12
+ CodexSdkAgent,
13
+ CursorCliAgent,
14
+ CursorSdkAgent,
15
+ DryRunAgent,
16
+ )
17
+ from .hook_policy import PhaseTestResult, TddPhase, evaluate_phase
18
+ from .progress import CycleProgress, ProgressStore
19
+ from .review import PASSING_REVIEW_JSON, REVIEW_SCHEMA, ReviewGate
20
+ from .spec import AitddSpec, CycleSpec
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class TddLoopConfig:
25
+ goal: str
26
+ workdir: Path
27
+ test_command: str = "pytest"
28
+ max_cycles: int = 5
29
+ codex_model: str | None = None
30
+ cursor_model: str | None = None
31
+ cursor_backend: str = "sdk"
32
+ spec_path: Path | None = None
33
+ resume: bool = False
34
+ dry_run: bool = False
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class CycleResult:
39
+ index: int
40
+ red: PhaseTestResult
41
+ green: PhaseTestResult
42
+ refactor: PhaseTestResult
43
+ complete: bool
44
+ review: str
45
+ review_gate: ReviewGate
46
+
47
+
48
+ class TddLoop:
49
+ def __init__(
50
+ self,
51
+ config: TddLoopConfig,
52
+ planner: Agent | None = None,
53
+ implementer: Agent | None = None,
54
+ ) -> None:
55
+ self.config = config
56
+ self.spec = AitddSpec.from_file(config.spec_path) if config.spec_path else None
57
+ self.progress = ProgressStore(config.workdir)
58
+ self.planner = planner or (
59
+ DryRunAgent("codex-planner")
60
+ if config.dry_run
61
+ else CodexSdkAgent(model=config.codex_model)
62
+ )
63
+ self.implementer = implementer or (
64
+ DryRunAgent("cursor-implementer")
65
+ if config.dry_run
66
+ else self._create_cursor_agent()
67
+ )
68
+
69
+ def _create_cursor_agent(self) -> Agent:
70
+ model = self.config.cursor_model or "composer-latest"
71
+ if self.config.cursor_backend == "cli":
72
+ return CursorCliAgent(model=model)
73
+ if self.config.cursor_backend == "sdk":
74
+ return CursorSdkAgent(model=model)
75
+ raise ValueError(f"Unsupported cursor backend: {self.config.cursor_backend}")
76
+
77
+ def run(self) -> list[CycleResult]:
78
+ results: list[CycleResult] = []
79
+ start_index = self.progress.next_cycle_index() if self.config.resume else 1
80
+ current_progress: CycleProgress | None = None
81
+ try:
82
+ for index in range(start_index, self.config.max_cycles + 1):
83
+ cycle = self._cycle_for(index)
84
+ plan = self._plan(index, cycle)
85
+ current_progress = self.progress.start_cycle(
86
+ index,
87
+ self._behavior(index, cycle),
88
+ plan,
89
+ )
90
+
91
+ self._implement(TddPhase.RED, plan)
92
+ red = self._run_tests(TddPhase.RED, cycle)
93
+ self.progress.record_phase(current_progress, "red", red)
94
+ self._require(TddPhase.RED, red, cycle)
95
+
96
+ self._implement(TddPhase.GREEN, plan)
97
+ green = self._run_tests(TddPhase.GREEN)
98
+ self.progress.record_phase(current_progress, "green", green)
99
+ self._require(TddPhase.GREEN, green)
100
+
101
+ review = self._review(index, plan, green)
102
+ review_gate = ReviewGate.from_text(review)
103
+ self.progress.record_review(current_progress, review_gate)
104
+ self._require_review_gate(review_gate)
105
+
106
+ complete = self._is_complete(review_gate, index)
107
+ if not complete:
108
+ tests_before = self._snapshot_test_files()
109
+ self._implement(TddPhase.REFACTOR, review)
110
+ self._require_refactor_kept_tests(tests_before)
111
+ refactor = self._run_tests(TddPhase.REFACTOR)
112
+ self.progress.record_phase(current_progress, "refactor", refactor)
113
+ self._require(TddPhase.REFACTOR, refactor)
114
+
115
+ if complete:
116
+ self._require_done_when()
117
+
118
+ status = "completed" if complete else "completed"
119
+ self.progress.finish_cycle(current_progress, status)
120
+ results.append(
121
+ CycleResult(index, red, green, refactor, complete, review, review_gate)
122
+ )
123
+ if complete:
124
+ break
125
+ except Exception as exc:
126
+ self.progress.fail_cycle(current_progress, exc)
127
+ raise
128
+ return results
129
+
130
+ def _cycle_for(self, index: int) -> CycleSpec | None:
131
+ if not self.spec or index > len(self.spec.cycles):
132
+ return None
133
+ return self.spec.cycles[index - 1]
134
+
135
+ def _behavior(self, index: int, cycle: CycleSpec | None) -> str:
136
+ return cycle.behavior if cycle else f"cycle {index}"
137
+
138
+ def _plan(self, index: int, cycle: CycleSpec | None) -> str:
139
+ spec_text = self.spec.describe() if self.spec else f"Goal:\n{self.config.goal}"
140
+ cycle_text = (
141
+ self._cycle_text(cycle)
142
+ if cycle
143
+ else "Codex が次の最小 public behavior を 1 つだけ選んでください。"
144
+ )
145
+ prompt = f"""
146
+ あなたは t-wada さんの TDD の進め方を尊重する計画担当です。
147
+ 作業ディレクトリを読み、次の最小の RED を 1 つだけ計画してください。
148
+ 実装やファイル編集は絶対にしないでください。
149
+ 1 サイクルで追加してよい public behavior は 1 つだけです。
150
+ acceptance test と unit test の境界を守ってください。
151
+
152
+ 仕様:
153
+ {spec_text}
154
+
155
+ サイクル: {index}
156
+ 今回の対象:
157
+ {cycle_text}
158
+
159
+ 出力は次を含めてください:
160
+ - 次に追加する最小テスト
161
+ - 期待する失敗理由
162
+ - GREEN で許される最小実装
163
+ - リファクタリング観点
164
+ """.strip()
165
+ return self._run_agent(self.planner, prompt).stdout
166
+
167
+ def _cycle_text(self, cycle: CycleSpec) -> str:
168
+ lines = [f"Behavior: {cycle.behavior}"]
169
+ if cycle.expected_red_failure:
170
+ lines.append("Expected RED failure:")
171
+ lines.append(f"- exit_code: {cycle.expected_red_failure.exit_code}")
172
+ for item in cycle.expected_red_failure.must_include or []:
173
+ lines.append(f"- must_include: {item}")
174
+ for item in cycle.expected_red_failure.must_not_include or []:
175
+ lines.append(f"- must_not_include: {item}")
176
+ if cycle.notes:
177
+ lines.append("Notes:")
178
+ lines.extend(f"- {item}" for item in cycle.notes)
179
+ return "\n".join(lines)
180
+
181
+ def _implement(self, phase: TddPhase, context: str) -> None:
182
+ prompts = {
183
+ TddPhase.RED: (
184
+ "失敗する最小テストだけを書いてください。"
185
+ "プロダクトコードは原則変更しないでください。"
186
+ ),
187
+ TddPhase.GREEN: "今ある失敗を通すための最小実装だけを書いてください。",
188
+ TddPhase.REFACTOR: (
189
+ "テストを通したまま設計を少しだけ良くしてください。"
190
+ "振る舞いは変えないでください。テストファイルは変更しないでください。"
191
+ ),
192
+ }
193
+ prompt = f"""
194
+ あなたは Cursor 実装担当です。t-wada 流の RED-GREEN-REFACTOR を厳守します。
195
+ 現在フェーズ: {phase.value.upper()}
196
+
197
+ 指示:
198
+ {prompts[phase]}
199
+ - 1 サイクルで追加してよい public behavior は 1 つだけです。
200
+ - まだテストされていない先回り実装は禁止です。
201
+ - 受け入れテストとユニットテストの責務を混ぜないでください。
202
+
203
+ 計画またはレビュー:
204
+ {context}
205
+ """.strip()
206
+ self._run_agent(self.implementer, prompt)
207
+
208
+ def _review(self, index: int, plan: str, test_run: PhaseTestResult) -> str:
209
+ if self.config.dry_run and isinstance(self.planner, DryRunAgent):
210
+ return PASSING_REVIEW_JSON
211
+
212
+ goal = self.spec.describe() if self.spec else self.config.goal
213
+ prompt = f"""
214
+ あなたは Codex レビュー担当です。作業ディレクトリを読み、TDD サイクルの品質をレビューしてください。
215
+ 実装やファイル編集は絶対にしないでください。
216
+ 次を厳しく確認してください:
217
+ - 1 サイクルで public behavior が 1 つだけ増えているか
218
+ - GREEN は RED を通す最小差分か
219
+ - REFACTOR でテストが変更されていないか
220
+ - 受け入れテストとユニットテストの境界が守られているか
221
+ - forbidden に触れていないか
222
+
223
+ ゴール:
224
+ {goal}
225
+
226
+ サイクル: {index}
227
+ 計画:
228
+ {plan}
229
+
230
+ テスト結果:
231
+ command: {test_run.command}
232
+ returncode: {test_run.returncode}
233
+
234
+ 最後は必ず JSON schema に従った JSON だけを返してください。
235
+ """.strip()
236
+ if isinstance(self.planner, CodexSdkAgent):
237
+ return self.planner.run(prompt, self.config.workdir, REVIEW_SCHEMA).stdout
238
+ return self._run_agent(self.planner, prompt).stdout
239
+
240
+ def _is_complete(self, review_gate: ReviewGate, index: int) -> bool:
241
+ if self.spec and self.spec.cycles:
242
+ return index >= len(self.spec.cycles)
243
+ return review_gate.complete
244
+
245
+ def _require_done_when(self) -> None:
246
+ if not self.spec or "acceptance_tests_pass" not in self.spec.done_when:
247
+ return
248
+
249
+ command = self.spec.acceptance_test_command
250
+ if not command and self.spec.acceptance_tests:
251
+ command = "pytest " + " ".join(self.spec.acceptance_tests)
252
+ command = command or self.config.test_command
253
+ result = self._run_command(command)
254
+ if result.failed:
255
+ raise RuntimeError("done_when rejected: acceptance_tests_pass failed.")
256
+
257
+ def _require_review_gate(self, review_gate: ReviewGate) -> None:
258
+ if not review_gate.passed:
259
+ raise RuntimeError(review_gate.failure_message())
260
+
261
+ def _run_tests(
262
+ self,
263
+ phase: TddPhase,
264
+ cycle: CycleSpec | None = None,
265
+ ) -> PhaseTestResult:
266
+ if self.config.dry_run:
267
+ code = 1 if phase is TddPhase.RED else 0
268
+ stderr = ""
269
+ if phase is TddPhase.RED and cycle and cycle.expected_red_failure:
270
+ includes = cycle.expected_red_failure.must_include or []
271
+ stderr = f"{includes[0]} [dry-run]\n" if includes else "expected RED [dry-run]\n"
272
+ return PhaseTestResult(
273
+ self.config.test_command,
274
+ code,
275
+ stdout=f"[dry-run:{phase.value}]\n",
276
+ stderr=stderr,
277
+ )
278
+
279
+ completed = subprocess.run(
280
+ self.config.test_command,
281
+ cwd=self.config.workdir,
282
+ shell=True,
283
+ text=True,
284
+ capture_output=True,
285
+ check=False,
286
+ )
287
+ return PhaseTestResult(
288
+ command=self.config.test_command,
289
+ returncode=completed.returncode,
290
+ stdout=completed.stdout,
291
+ stderr=completed.stderr,
292
+ )
293
+
294
+ def _run_command(self, command: str) -> PhaseTestResult:
295
+ completed = subprocess.run(
296
+ command,
297
+ cwd=self.config.workdir,
298
+ shell=True,
299
+ text=True,
300
+ capture_output=True,
301
+ check=False,
302
+ )
303
+ return PhaseTestResult(
304
+ command=command,
305
+ returncode=completed.returncode,
306
+ stdout=completed.stdout,
307
+ stderr=completed.stderr,
308
+ )
309
+
310
+ def _require(
311
+ self,
312
+ phase: TddPhase,
313
+ test_run: PhaseTestResult,
314
+ cycle: CycleSpec | None = None,
315
+ ) -> None:
316
+ expected = cycle.expected_red_failure if cycle else None
317
+ decision = evaluate_phase(phase, test_run, expected)
318
+ if decision.denied:
319
+ raise RuntimeError(decision.reason)
320
+
321
+ def _snapshot_test_files(self) -> dict[Path, str]:
322
+ snapshots: dict[Path, str] = {}
323
+ for path in self.config.workdir.rglob("*"):
324
+ if not path.is_file() or "__pycache__" in path.parts:
325
+ continue
326
+ if path.name.startswith("test_") or path.parent.name in {"tests", "test"}:
327
+ snapshots[path.relative_to(self.config.workdir)] = path.read_text(errors="ignore")
328
+ return snapshots
329
+
330
+ def _require_refactor_kept_tests(self, before: dict[Path, str]) -> None:
331
+ after = self._snapshot_test_files()
332
+ if before != after:
333
+ raise RuntimeError("REFACTOR rejected: test files changed during refactor phase.")
334
+
335
+ def _run_agent(self, agent: Agent, prompt: str) -> AgentResult:
336
+ result = agent.run(prompt, self.config.workdir)
337
+ if not result.ok:
338
+ raise RuntimeError(
339
+ f"{result.role} failed with exit code {result.returncode}\n{result.stderr}"
340
+ )
341
+ return result
aitdd/spec.py ADDED
@@ -0,0 +1,116 @@
1
+ """Specification support for complex TDD loops."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import yaml
10
+
11
+ from .hook_policy import ExpectedRed
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class CycleSpec:
16
+ behavior: str
17
+ expected_red_failure: ExpectedRed | None = None
18
+ notes: list[str] = field(default_factory=list)
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class AitddSpec:
23
+ goal: str
24
+ constraints: list[str] = field(default_factory=list)
25
+ public_api: list[str] = field(default_factory=list)
26
+ forbidden: list[str] = field(default_factory=list)
27
+ acceptance_tests: list[str] = field(default_factory=list)
28
+ unit_tests: list[str] = field(default_factory=list)
29
+ done_when: list[str] = field(default_factory=lambda: ["all_cycles_complete"])
30
+ acceptance_test_command: str | None = None
31
+ cycles: list[CycleSpec] = field(default_factory=list)
32
+
33
+ @classmethod
34
+ def from_file(cls, path: Path) -> AitddSpec:
35
+ raw = yaml.safe_load(path.read_text()) or {}
36
+ if not isinstance(raw, dict):
37
+ raise ValueError("aitdd spec must be a YAML mapping")
38
+
39
+ goal = str(raw.get("goal") or "").strip()
40
+ if not goal:
41
+ raise ValueError("aitdd spec requires a non-empty 'goal'")
42
+
43
+ return cls(
44
+ goal=goal,
45
+ constraints=[str(item) for item in _list(raw.get("constraints"))],
46
+ public_api=[str(item) for item in _list(raw.get("public_api"))],
47
+ forbidden=[str(item) for item in _list(raw.get("forbidden"))],
48
+ acceptance_tests=[str(item) for item in _list(raw.get("acceptance_tests"))],
49
+ unit_tests=[str(item) for item in _list(raw.get("unit_tests"))],
50
+ done_when=[str(item) for item in _list(raw.get("done_when"))]
51
+ or ["all_cycles_complete"],
52
+ acceptance_test_command=(
53
+ str(raw["acceptance_test_command"])
54
+ if raw.get("acceptance_test_command") is not None
55
+ else None
56
+ ),
57
+ cycles=[_cycle_from_raw(item) for item in _list(raw.get("cycles"))],
58
+ )
59
+
60
+ def describe(self) -> str:
61
+ sections = [f"Goal:\n{self.goal}"]
62
+ sections.append(_format_list("Constraints", self.constraints))
63
+ sections.append(_format_list("Public API", self.public_api))
64
+ sections.append(_format_list("Forbidden", self.forbidden))
65
+ sections.append(_format_list("Acceptance tests", self.acceptance_tests))
66
+ sections.append(_format_list("Unit tests", self.unit_tests))
67
+ return "\n\n".join(section for section in sections if section)
68
+
69
+
70
+ def _cycle_from_raw(raw: Any) -> CycleSpec:
71
+ if isinstance(raw, str):
72
+ return CycleSpec(behavior=raw)
73
+ if not isinstance(raw, dict):
74
+ raise ValueError("each cycle must be a string or mapping")
75
+
76
+ behavior = str(raw.get("behavior") or "").strip()
77
+ if not behavior:
78
+ raise ValueError("cycle mapping requires 'behavior'")
79
+
80
+ return CycleSpec(
81
+ behavior=behavior,
82
+ expected_red_failure=_expected_red_from_raw(raw),
83
+ notes=[str(item) for item in _list(raw.get("notes"))],
84
+ )
85
+
86
+
87
+ def _expected_red_from_raw(raw: dict[str, Any]) -> ExpectedRed | None:
88
+ if "expected_red" in raw:
89
+ value = raw["expected_red"]
90
+ if not isinstance(value, dict):
91
+ raise ValueError("expected_red must be a mapping")
92
+ return ExpectedRed(
93
+ exit_code=str(value.get("exit_code") or "nonzero"),
94
+ must_include=[str(item) for item in _list(value.get("must_include"))],
95
+ must_not_include=[str(item) for item in _list(value.get("must_not_include"))],
96
+ )
97
+
98
+ legacy = [str(item) for item in _list(raw.get("expected_red_failure"))]
99
+ if not legacy:
100
+ return None
101
+ return ExpectedRed(must_include=legacy)
102
+
103
+
104
+ def _list(value: Any) -> list[Any]:
105
+ if value is None:
106
+ return []
107
+ if isinstance(value, list):
108
+ return value
109
+ return [value]
110
+
111
+
112
+ def _format_list(title: str, items: list[str]) -> str:
113
+ if not items:
114
+ return ""
115
+ body = "\n".join(f"- {item}" for item in items)
116
+ return f"{title}:\n{body}"
@@ -0,0 +1,143 @@
1
+ Metadata-Version: 2.4
2
+ Name: aitdd
3
+ Version: 0.1.0
4
+ Summary: A small Codex-planned, Cursor-implemented TDD loop with Codex hook guards.
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: codex-hookkit>=0.0.2
7
+ Requires-Dist: pyyaml>=6.0
8
+ Description-Content-Type: text/markdown
9
+
10
+ # AiTdd
11
+
12
+ Codex が計画とレビューを担当し、Cursor が RED / GREEN / REFACTOR の実装を担当する、
13
+ 小さな TDD オーケストレータです。Hook の入出力と policy は
14
+ `codex-hookkit` を使います。
15
+ Codex は Python から公式 `@openai/codex-sdk` を呼びます。
16
+ Cursor は既定で Python から公式 `@cursor/sdk` の Composer を使い、
17
+ 必要なら `cursor-agent` CLI に切り替えられます。
18
+
19
+ ## 役割
20
+
21
+ - Codex: 次に書くべき最小テストの計画、GREEN 後と REFACTOR 後のレビュー、完了判定
22
+ - Cursor: テスト追加、最小実装、リファクタリング
23
+ - Hook: RED ではテスト失敗を要求し、GREEN / REFACTOR ではテスト成功を要求する
24
+
25
+ ## 使い方
26
+
27
+ ```sh
28
+ uv sync --dev
29
+ npm install
30
+ uv run aitdd run "FizzBuzz を t-wada 流 TDD で作る" --test-command "pytest" --max-cycles 5
31
+ ```
32
+
33
+ PyPI から使う場合も、公式 Node SDK は作業ディレクトリに入れてください。
34
+
35
+ ```sh
36
+ pip install aitdd
37
+ npm install @openai/codex-sdk @cursor/sdk
38
+ aitdd run "FizzBuzz を t-wada 流 TDD で作る" --test-command "pytest"
39
+ ```
40
+
41
+ Cursor 実装担当は既定で SDK + Composer alias を使います。
42
+
43
+ ```sh
44
+ uv run aitdd run "..." --cursor-backend sdk --cursor-model composer-latest
45
+ ```
46
+
47
+ CLI fallback を使いたい場合:
48
+
49
+ ```sh
50
+ uv run aitdd run "..." --cursor-backend cli --cursor-model composer-latest
51
+ ```
52
+
53
+ SDK は `CURSOR_API_KEY` があればそれを使い、無い場合は SDK 側の認証解決に任せます。
54
+
55
+ 実行内容だけ確認する場合:
56
+
57
+ ```sh
58
+ uv run aitdd run "TODO アプリの最小モデルを作る" --dry-run
59
+ ```
60
+
61
+ 複雑なクラスや業務要件では `aitdd.yaml` を使って、反復する TDD サイクルを固定できます。
62
+
63
+ ```sh
64
+ uv run aitdd run "ignored when spec exists" \
65
+ --spec examples/aitdd.yaml \
66
+ --test-command "pytest" \
67
+ --max-cycles 5
68
+ ```
69
+
70
+ `aitdd.yaml` では次を指定できます。
71
+
72
+ - `goal`: 全体ゴール
73
+ - `public_api`: 育てる public API
74
+ - `constraints`: 設計・進め方の制約
75
+ - `forbidden`: 先回り実装や禁止事項
76
+ - `acceptance_tests`: 外側の受け入れテスト
77
+ - `unit_tests`: 内側のユニットテスト
78
+ - `cycles`: 1 サイクル 1 public behavior の反復リスト
79
+
80
+ 各 cycle には `expected_red` を置けます。RED では「テストが失敗したか」だけでなく、
81
+ 期待した理由で失敗したか、禁止した壊れ方をしていないかも検証します。
82
+ REFACTOR フェーズではテストファイル変更を拒否します。
83
+
84
+ Codex レビューは JSON schema で機械判定します。各サイクルで次の gate がすべて `true` の場合だけ
85
+ 次に進みます。
86
+
87
+ - `one_behavior_only`
88
+ - `minimal_green`
89
+ - `tests_unchanged_in_refactor`
90
+ - `acceptance_unit_boundary_ok`
91
+ - `forbidden_respected`
92
+
93
+ CLI には cycle ごとの進捗として `red / green / refactor / complete / one_behavior_only /
94
+ minimal_green / boundary_ok` が表示されます。
95
+
96
+ 実行中の状態は最小構成で次に保存されます。
97
+
98
+ - `.aitdd/progress.json`: cycle ごとの `behavior`, `red`, `green`, `refactor`, `review_gate`,
99
+ `issues`, `started_at`, `finished_at`
100
+ - `.aitdd/cycles/001-red.diff`: phase ごとの git diff snapshot
101
+ - `.aitdd/report.md`: TDD の進行ログ
102
+
103
+ 途中から再開する場合:
104
+
105
+ ```sh
106
+ aitdd resume --spec aitdd.yaml --max-cycles 5
107
+ ```
108
+
109
+ Codex hook として使う場合は `.codex/hooks.json` などに次を登録します。
110
+
111
+ ```json
112
+ {
113
+ "hooks": {
114
+ "PostToolUse": [
115
+ {
116
+ "matcher": "*",
117
+ "hooks": [
118
+ {
119
+ "type": "command",
120
+ "command": "uv run python hooks/aitdd_guard.py",
121
+ "timeout": 30
122
+ }
123
+ ]
124
+ }
125
+ ]
126
+ }
127
+ }
128
+ ```
129
+
130
+ `AITDD_PHASE=red|green|refactor` と `AITDD_TEST_COMMAND` を渡すと、そのフェーズの
131
+ 期待に合わない状態を block します。
132
+
133
+ ## ループ
134
+
135
+ 1. Codex が次の最小ステップを計画する
136
+ 2. Cursor が RED として、失敗するテストだけを書く
137
+ 3. テストが失敗しなければ RED をやり直す
138
+ 4. Cursor が GREEN として、通すための最小実装を書く
139
+ 5. テストが通らなければ GREEN をやり直す
140
+ 6. Codex がレビューし、Cursor が必要なら REFACTOR する
141
+ 7. テストが通ることを確認し、Codex が完了判定する
142
+
143
+ `--max-cycles` は安全弁です。完璧を目指しつつ、暴走しないように上限を持たせています。
@@ -0,0 +1,12 @@
1
+ aitdd/__init__.py,sha256=wIuXmq_znuM9FNYift0egMBgax0G72mP5Rup9fqcvDA,142
2
+ aitdd/agents.py,sha256=UUAe2sBzNOr4dKw9Riigld1oCYvjj0vDNl2LH38sIG8,7557
3
+ aitdd/cli.py,sha256=UEawVNzOuEepfEzc_L9HhaEP_21fVhJya1B_5V8Vlas,2658
4
+ aitdd/hook_policy.py,sha256=ruOAiC93Q3A4gdSIpew4c2bhCDkthUTi47mSLpR0zsQ,2502
5
+ aitdd/progress.py,sha256=OeIBdCTvUU6lJrfP8wPsGM6JsijiyahLDmhqoQoBlac,5959
6
+ aitdd/review.py,sha256=PyLUIdsficMSoJy3TqorcIfRll_Nvhxx9_ueBSKiQ8w,3570
7
+ aitdd/runner.py,sha256=GhsejKBBD-w6QLpjAy4vmzg-96xrBIQg2lE0rTswK8E,12830
8
+ aitdd/spec.py,sha256=3R-LokleUJUUC6Ti99PHb5v8t1PLrHq_ea62MZVLNek,4169
9
+ aitdd-0.1.0.dist-info/METADATA,sha256=3_IOOO-S0AtC_yF4Xa2fE_o1OpxJh-BOksxOP1cWM0g,4739
10
+ aitdd-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
11
+ aitdd-0.1.0.dist-info/entry_points.txt,sha256=aBpJeHRgJJYU0h0ngUWrfj-9RQXwgbWcUsNNlSdPDPQ,41
12
+ aitdd-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ aitdd = aitdd.cli:main