leanlab 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- leanlab/__init__.py +1 -0
- leanlab/cli.py +315 -0
- leanlab/core/__init__.py +1 -0
- leanlab/core/agents/__init__.py +10 -0
- leanlab/core/agents/claude.py +38 -0
- leanlab/core/agents/port.py +49 -0
- leanlab/core/agents/protocol.py +64 -0
- leanlab/core/coding/__init__.py +1 -0
- leanlab/core/coding/board.py +335 -0
- leanlab/core/coding/board_dist/assets/index-BBCkNArL.css +1 -0
- leanlab/core/coding/board_dist/assets/index-CNGMDAuO.js +40 -0
- leanlab/core/coding/board_dist/index.html +13 -0
- leanlab/core/coding/engineer.py +304 -0
- leanlab/core/coding/gate.py +63 -0
- leanlab/core/coding/personas.py +23 -0
- leanlab/core/coding/playbook.py +47 -0
- leanlab/core/coding/spec.py +232 -0
- leanlab/core/doctor.py +220 -0
- leanlab/core/init.py +219 -0
- leanlab/core/loop.py +374 -0
- leanlab/core/monitor.py +553 -0
- leanlab/templates/agents/CLAUDE.md +52 -0
- leanlab/templates/agents/critic.md +38 -0
- leanlab/templates/agents/director.md +37 -0
- leanlab/templates/agents/engineer.md +12 -0
- leanlab/templates/agents/reviewer.md +34 -0
- leanlab/templates/agents/techlead.md +7 -0
- leanlab/templates/skill/SKILL.md +99 -0
- leanlab-0.2.1.dist-info/METADATA +273 -0
- leanlab-0.2.1.dist-info/RECORD +33 -0
- leanlab-0.2.1.dist-info/WHEEL +4 -0
- leanlab-0.2.1.dist-info/entry_points.txt +2 -0
- leanlab-0.2.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""The engineer loop — implement a spec'd task to a green gate + reviewer sign-off, then merge.
|
|
2
|
+
|
|
3
|
+
Realizes the build-task use case. The engineer edits the worktree; the gate checks it; on a
|
|
4
|
+
green gate the reviewer judges the diff. It loops on gate failures / review feedback, then
|
|
5
|
+
commits and merges the branch into main. `runner` / `ui` are injected for testing.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
12
|
+
import shlex
|
|
13
|
+
import subprocess
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from ..loop import make_runner
|
|
18
|
+
from .board import log_event
|
|
19
|
+
from .gate import run_gate
|
|
20
|
+
from .personas import spec_text
|
|
21
|
+
from .playbook import read_playbook, update_playbook
|
|
22
|
+
|
|
23
|
+
_APPROVED = (True, "true", "yes", "True")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _git(repo, *args):
|
|
27
|
+
return subprocess.run(["git", "-C", str(repo), *args], capture_output=True, text=True)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _record(repo, rec):
|
|
31
|
+
"""Append a build outcome so `leanlab board` can show it."""
|
|
32
|
+
p = Path(repo) / ".leanlab" / "coding-results.jsonl"
|
|
33
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
34
|
+
rec = {**rec, "ts": datetime.now(timezone.utc).isoformat()}
|
|
35
|
+
with p.open("a") as f:
|
|
36
|
+
f.write(json.dumps(rec) + "\n")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _load_lock(repo, slug):
|
|
40
|
+
"""Load the out-of-tree lock (pristine acceptance tests). None if the task wasn't spec'd."""
|
|
41
|
+
p = Path(repo) / ".leanlab" / "locks" / f"{slug}.json"
|
|
42
|
+
if not p.exists():
|
|
43
|
+
return None
|
|
44
|
+
try:
|
|
45
|
+
return json.loads(p.read_text())
|
|
46
|
+
except (OSError, ValueError):
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _is_pristine(lock, wt) -> bool:
|
|
51
|
+
"""Did the engineer leave the locked tests untouched? (missing or changed = tampered)"""
|
|
52
|
+
for it in lock.get("tests", []):
|
|
53
|
+
p = Path(wt) / it["path"]
|
|
54
|
+
if not p.exists() or hashlib.sha256(p.read_bytes()).hexdigest() != it["sha256"]:
|
|
55
|
+
return False
|
|
56
|
+
return True
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _isolated_acceptance(wt, lock, accept_cmd):
|
|
60
|
+
"""Re-run the pristine acceptance tests with engineer conftest/fixtures DISABLED.
|
|
61
|
+
|
|
62
|
+
If they passed the normal gate but fail here (exit 1), the pass relied on engineer-added
|
|
63
|
+
test infrastructure (a conftest monkeypatch, a fixture) — i.e. gaming. Any other exit
|
|
64
|
+
(can't collect / import) means we couldn't isolate cleanly, so we don't block on it.
|
|
65
|
+
"""
|
|
66
|
+
paths = [it["path"] for it in lock.get("tests", [])]
|
|
67
|
+
if not paths:
|
|
68
|
+
return True, ""
|
|
69
|
+
try:
|
|
70
|
+
proc = subprocess.run(shlex.split(accept_cmd) + paths, cwd=Path(wt),
|
|
71
|
+
capture_output=True, text=True, timeout=600)
|
|
72
|
+
except Exception as e: # noqa: BLE001
|
|
73
|
+
return True, f"(isolation skipped: {e})"
|
|
74
|
+
out = (proc.stdout + ("\n" + proc.stderr if proc.stderr else "")).strip()
|
|
75
|
+
return (proc.returncode != 1), out
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _restore_tests(lock, wt) -> None:
|
|
79
|
+
"""Overwrite the worktree's acceptance tests with the pristine, out-of-tree copies, so the
|
|
80
|
+
gate always runs the ORIGINAL tests no matter what the engineer did to them."""
|
|
81
|
+
for it in lock.get("tests", []):
|
|
82
|
+
p = Path(wt) / it["path"]
|
|
83
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
84
|
+
if p.exists():
|
|
85
|
+
p.chmod(0o644)
|
|
86
|
+
p.write_text(it["content"])
|
|
87
|
+
p.chmod(0o444)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _stage(wt):
|
|
91
|
+
"""Stage all changes except gate caches and the lock file."""
|
|
92
|
+
ep = _git(wt, "rev-parse", "--git-path", "info/exclude").stdout.strip()
|
|
93
|
+
epath = Path(ep) if Path(ep).is_absolute() else Path(wt) / ep
|
|
94
|
+
try:
|
|
95
|
+
epath.parent.mkdir(parents=True, exist_ok=True)
|
|
96
|
+
cur = epath.read_text() if epath.exists() else ""
|
|
97
|
+
for pat in ("__pycache__/", ".pytest_cache/", ".leanlab-lock.json"):
|
|
98
|
+
if pat not in cur:
|
|
99
|
+
cur += pat + "\n"
|
|
100
|
+
epath.write_text(cur)
|
|
101
|
+
except Exception: # noqa: BLE001
|
|
102
|
+
pass
|
|
103
|
+
_git(wt, "add", "-A")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _engineer_prompt(spec_md, persona_set, feedback, playbook=""):
|
|
107
|
+
base = spec_text("engineer", persona_set) + "\n\n## The task spec\n" + spec_md + "\n\n"
|
|
108
|
+
if playbook:
|
|
109
|
+
base += "## Project playbook (follow it)\n" + playbook + "\n\n"
|
|
110
|
+
base += (
|
|
111
|
+
"Implement the change in this worktree so the gate passes. Read the locked acceptance "
|
|
112
|
+
"tests under the test directory and make them pass — do NOT modify them. Follow the "
|
|
113
|
+
"repository's conventions. Edit files with your tools, then stop."
|
|
114
|
+
)
|
|
115
|
+
if feedback:
|
|
116
|
+
base += "\n\n## Fix this (from the last attempt)\n" + feedback
|
|
117
|
+
return base
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
_DIFF_LIMIT = 40000
|
|
121
|
+
|
|
122
|
+
# Each panel reviewer attacks from a distinct angle — diversity catches what one lens misses.
|
|
123
|
+
REVIEW_LENSES = [
|
|
124
|
+
{"name": "correctness",
|
|
125
|
+
"focus": "logic errors, off-by-one, wrong operators, integer division, edge cases, error paths"},
|
|
126
|
+
{"name": "spec-conformance",
|
|
127
|
+
"focus": "requirements stated in the spec that the locked tests do NOT check — find one the code gets wrong"},
|
|
128
|
+
{"name": "security",
|
|
129
|
+
"focus": "injection, path traversal, unsafe input handling, leaked secrets, resource exhaustion"},
|
|
130
|
+
{"name": "robustness",
|
|
131
|
+
"focus": "behaviour on bad/empty/huge input, concurrency, mutable shared state, failure recovery"},
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _clip_diff(diff):
|
|
136
|
+
if len(diff) <= _DIFF_LIMIT:
|
|
137
|
+
return diff
|
|
138
|
+
return (diff[:_DIFF_LIMIT]
|
|
139
|
+
+ f"\n…(diff truncated — {len(diff) - _DIFF_LIMIT} more chars not shown; "
|
|
140
|
+
"do NOT approve code you could not see)")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _lenses_for(n):
|
|
144
|
+
"""Lenses for a panel of n reviewers. n<=1 → one general reviewer (no extra focus)."""
|
|
145
|
+
if n <= 1:
|
|
146
|
+
return [None]
|
|
147
|
+
return [REVIEW_LENSES[i % len(REVIEW_LENSES)] for i in range(n)]
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _review_prompt(spec_md, diff, persona_set, lens=None):
|
|
151
|
+
body = spec_text("reviewer", persona_set)
|
|
152
|
+
if lens:
|
|
153
|
+
body += (f"\n\n## Your lens: {lens['name']}\nWeight your attack toward {lens['focus']}. "
|
|
154
|
+
"Still reject any blocking defect you find outside this lens.")
|
|
155
|
+
return (body + "\n\n## Task spec\n" + spec_md
|
|
156
|
+
+ "\n\n## The diff to review\n```diff\n" + _clip_diff(diff) + "\n```")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _review_panel(runner, spec_md, diff, persona_set, lenses):
|
|
160
|
+
"""Adversarial quorum: run one reviewer per lens. Approved only if ALL approve; score is the
|
|
161
|
+
harshest (min); feedback aggregates every blocker, labelled by lens. Returns
|
|
162
|
+
(approved, score, feedback, verdicts)."""
|
|
163
|
+
verdicts = []
|
|
164
|
+
for lens in lenses:
|
|
165
|
+
res = runner.run_structured(_review_prompt(spec_md, diff, persona_set, lens),
|
|
166
|
+
["approved", "feedback"])
|
|
167
|
+
ok = res.ok and res.data.get("approved") in _APPROVED
|
|
168
|
+
try:
|
|
169
|
+
sc = float(res.data.get("score", 100)) if res.ok else 0.0
|
|
170
|
+
except (TypeError, ValueError):
|
|
171
|
+
sc = 0.0
|
|
172
|
+
fb = str(res.data.get("feedback", "")) if res.ok else "(review call failed)"
|
|
173
|
+
verdicts.append({"lens": lens["name"] if lens else "review",
|
|
174
|
+
"approved": ok, "score": sc, "feedback": fb})
|
|
175
|
+
approved = bool(verdicts) and all(v["approved"] for v in verdicts)
|
|
176
|
+
score = min((v["score"] for v in verdicts), default=0.0)
|
|
177
|
+
feedback = "\n\n".join(f"[{v['lens']}] {v['feedback']}"
|
|
178
|
+
for v in verdicts if not v["approved"] and v["feedback"])
|
|
179
|
+
return approved, score, feedback, verdicts
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def build_task(repo, slug, *, runner=None, ui=None, gate_cmds=None,
|
|
183
|
+
persona_set="coding", max_attempts=5, playbook=True, min_quality=0,
|
|
184
|
+
isolate=True, accept_cmd="pytest --noconftest -q", reviewers=1):
|
|
185
|
+
"""Run the engineer loop on a spec'd task. Returns a result dict or None."""
|
|
186
|
+
repo = Path(repo).resolve()
|
|
187
|
+
ui = ui or BuildUI()
|
|
188
|
+
wt = repo / ".leanlab" / "worktrees" / slug
|
|
189
|
+
if not wt.is_dir():
|
|
190
|
+
ui.error(f"no worktree at {wt} — run `leanlab spec` first.")
|
|
191
|
+
return None
|
|
192
|
+
branch = f"leanlab/{slug}"
|
|
193
|
+
spec_md = (wt / "SPEC.md").read_text() if (wt / "SPEC.md").exists() else ""
|
|
194
|
+
pb = read_playbook(repo)
|
|
195
|
+
lock = _load_lock(repo, slug)
|
|
196
|
+
runner = runner or make_runner(wt)
|
|
197
|
+
|
|
198
|
+
feedback = None
|
|
199
|
+
for attempt in range(1, max_attempts + 1):
|
|
200
|
+
ui.attempt(attempt, max_attempts)
|
|
201
|
+
with ui.status("Engineer is implementing the change…"):
|
|
202
|
+
runner.run_plain(_engineer_prompt(spec_md, persona_set, feedback, pb))
|
|
203
|
+
|
|
204
|
+
tampered = lock is not None and not _is_pristine(lock, wt)
|
|
205
|
+
if lock is not None:
|
|
206
|
+
_restore_tests(lock, wt) # the gate ALWAYS runs the pristine acceptance tests
|
|
207
|
+
|
|
208
|
+
result = run_gate(wt, gate_cmds)
|
|
209
|
+
ui.gate(result)
|
|
210
|
+
log_event(repo, slug, {"event": "attempt", "n": attempt, "gate_passed": result.passed,
|
|
211
|
+
"failures": [c.name for c in result.failures()]})
|
|
212
|
+
if not result.passed:
|
|
213
|
+
feedback = "The gate failed:\n" + "\n".join(
|
|
214
|
+
f"[{c.name}]\n{c.output[-800:]}" for c in result.failures())
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
if tampered:
|
|
218
|
+
ui.error("⚠ locked acceptance tests were modified — restored; rejecting this attempt.")
|
|
219
|
+
log_event(repo, slug, {"event": "tamper", "n": attempt})
|
|
220
|
+
feedback = ("You modified the locked acceptance tests (they were restored). They are "
|
|
221
|
+
"FROZEN — solve the task without touching them.")
|
|
222
|
+
continue
|
|
223
|
+
|
|
224
|
+
if lock is not None and isolate:
|
|
225
|
+
ok_iso, _iso = _isolated_acceptance(wt, lock, accept_cmd)
|
|
226
|
+
if not ok_iso:
|
|
227
|
+
ui.error("⚠ acceptance tests fail without the engineer's fixtures — gamed.")
|
|
228
|
+
log_event(repo, slug, {"event": "isolation", "n": attempt})
|
|
229
|
+
feedback = ("Your change passes only with extra fixtures/conftest. The acceptance "
|
|
230
|
+
"tests must pass on their own — implement the real behaviour.")
|
|
231
|
+
continue
|
|
232
|
+
|
|
233
|
+
_stage(wt)
|
|
234
|
+
diff = _git(wt, "diff", "--cached").stdout
|
|
235
|
+
lenses = _lenses_for(reviewers)
|
|
236
|
+
msg = ("Reviewer is checking the diff…" if len(lenses) == 1
|
|
237
|
+
else f"{len(lenses)} reviewers are attacking the diff…")
|
|
238
|
+
with ui.status(msg):
|
|
239
|
+
approved, score, review_fb, verdicts = _review_panel(
|
|
240
|
+
runner, spec_md, diff, persona_set, lenses)
|
|
241
|
+
log_event(repo, slug, {"event": "review", "n": attempt, "approved": bool(approved),
|
|
242
|
+
"score": score, "feedback": review_fb[:200],
|
|
243
|
+
"reviewers": [{"lens": v["lens"], "approved": v["approved"],
|
|
244
|
+
"score": v["score"]} for v in verdicts]})
|
|
245
|
+
if approved and score >= min_quality:
|
|
246
|
+
merged = _merge(repo, wt, branch, slug, ui)
|
|
247
|
+
log_event(repo, slug, {"event": "merged", "branch": branch, "merged": merged})
|
|
248
|
+
if merged:
|
|
249
|
+
ui.success(branch, attempt)
|
|
250
|
+
if playbook:
|
|
251
|
+
update_playbook(repo, slug=slug, ui=ui) # tech-lead refreshes the PLAYBOOK
|
|
252
|
+
_record(repo, {"slug": slug, "branch": branch, "attempts": attempt,
|
|
253
|
+
"merged": merged, "quality": score})
|
|
254
|
+
return {"branch": branch, "attempts": attempt, "merged": merged, "quality": score}
|
|
255
|
+
if approved: # passed review but below the quality bar
|
|
256
|
+
feedback = (f"Quality {score:.0f} is below the required {min_quality:.0f} — improve it. "
|
|
257
|
+
+ review_fb)
|
|
258
|
+
else:
|
|
259
|
+
feedback = "The reviewer(s) requested changes:\n" + (review_fb or "(no feedback)")
|
|
260
|
+
|
|
261
|
+
ui.error(f"Gave up after {max_attempts} attempts — not merged.")
|
|
262
|
+
log_event(repo, slug, {"event": "gaveup", "attempts": max_attempts})
|
|
263
|
+
_record(repo, {"slug": slug, "branch": branch, "attempts": max_attempts, "merged": False})
|
|
264
|
+
return {"branch": branch, "attempts": max_attempts, "merged": False}
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _merge(repo, wt, branch, slug, ui) -> bool:
|
|
268
|
+
_stage(wt)
|
|
269
|
+
_git(wt, "commit", "-m", f"leanlab: {slug}")
|
|
270
|
+
r = _git(repo, "merge", "--no-ff", "-m", f"leanlab: merge {slug}", branch)
|
|
271
|
+
if r.returncode != 0:
|
|
272
|
+
ui.error("merge failed (resolve by hand): " + (r.stderr or r.stdout).strip())
|
|
273
|
+
return False
|
|
274
|
+
return True
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class BuildUI:
|
|
278
|
+
"""Terminal UI for `leanlab build` — attempt rules, spinners, gate report, merge panel."""
|
|
279
|
+
|
|
280
|
+
def __init__(self):
|
|
281
|
+
from rich.console import Console
|
|
282
|
+
self.console = Console()
|
|
283
|
+
|
|
284
|
+
def attempt(self, n, total):
|
|
285
|
+
self.console.rule(f"[bold cyan]Attempt {n}/{total}", style="cyan")
|
|
286
|
+
|
|
287
|
+
def status(self, message):
|
|
288
|
+
return self.console.status(f"[bold cyan]{message}", spinner="dots")
|
|
289
|
+
|
|
290
|
+
def gate(self, result):
|
|
291
|
+
from .gate import report
|
|
292
|
+
report(result, self.console)
|
|
293
|
+
|
|
294
|
+
def note(self, message):
|
|
295
|
+
self.console.print(message)
|
|
296
|
+
|
|
297
|
+
def error(self, message):
|
|
298
|
+
self.console.print(f"[bold red]{message}[/bold red]")
|
|
299
|
+
|
|
300
|
+
def success(self, branch, attempts):
|
|
301
|
+
from rich.panel import Panel
|
|
302
|
+
self.console.print(Panel(
|
|
303
|
+
f"Merged [bold]{branch}[/bold] into main after {attempts} attempt(s).",
|
|
304
|
+
title="✓ Task complete", border_style="green"))
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""The gate — the deterministic checks a code change must pass.
|
|
2
|
+
|
|
3
|
+
Objective and binary: every configured command must exit 0 (tests incl. the locked
|
|
4
|
+
acceptance tests, plus optional lint / typecheck). Returns a structured GateResult.
|
|
5
|
+
The LLM quality score (the reviewer) is a separate, later concern.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import shlex
|
|
11
|
+
import subprocess
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
DEFAULT_GATE = [{"name": "tests", "cmd": "pytest -q"}]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class GateCheck:
|
|
20
|
+
name: str
|
|
21
|
+
ok: bool
|
|
22
|
+
code: int
|
|
23
|
+
output: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class GateResult:
|
|
28
|
+
passed: bool
|
|
29
|
+
checks: list
|
|
30
|
+
|
|
31
|
+
def failures(self):
|
|
32
|
+
return [c for c in self.checks if not c.ok]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def run_gate(worktree, gate_cmds=None, *, timeout=600) -> GateResult:
|
|
36
|
+
"""Run each gate command in the worktree; the change passes only if all exit 0."""
|
|
37
|
+
wt = Path(worktree)
|
|
38
|
+
checks = []
|
|
39
|
+
for step in (gate_cmds or DEFAULT_GATE):
|
|
40
|
+
name, cmd = step["name"], step["cmd"]
|
|
41
|
+
try:
|
|
42
|
+
proc = subprocess.run(shlex.split(cmd), cwd=wt, capture_output=True,
|
|
43
|
+
text=True, timeout=timeout)
|
|
44
|
+
out = (proc.stdout + ("\n" + proc.stderr if proc.stderr else "")).strip()
|
|
45
|
+
checks.append(GateCheck(name, proc.returncode == 0, proc.returncode, out))
|
|
46
|
+
except Exception as e: # noqa: BLE001 — couldn't even run it
|
|
47
|
+
checks.append(GateCheck(name, False, -1, f"could not run `{cmd}`: {e}"))
|
|
48
|
+
return GateResult(passed=all(c.ok for c in checks), checks=checks)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def report(result: GateResult, console=None):
|
|
52
|
+
"""Print a rich pass/fail report."""
|
|
53
|
+
if console is None:
|
|
54
|
+
from rich.console import Console
|
|
55
|
+
console = Console()
|
|
56
|
+
for c in result.checks:
|
|
57
|
+
mark = "[green]✓[/green]" if c.ok else "[red]✗[/red]"
|
|
58
|
+
console.print(f"{mark} [bold]{c.name}[/bold] (exit {c.code})")
|
|
59
|
+
if not c.ok:
|
|
60
|
+
tail = "\n".join(c.output.splitlines()[-12:])
|
|
61
|
+
console.print(f"[dim]{tail}[/dim]")
|
|
62
|
+
verdict = "[green]GATE PASSED[/green]" if result.passed else "[red]GATE FAILED[/red]"
|
|
63
|
+
console.print(f"\n[bold]{verdict}[/bold]")
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Configurable agent persona sets — which package template each role uses.
|
|
2
|
+
|
|
3
|
+
A lab picks a set ("metric" for the classic Worker/Director/Critic, "coding" for the
|
|
4
|
+
Engineer/Reviewer/Tech-lead). Selectable via lab config and a CLI flag.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from importlib import resources
|
|
10
|
+
|
|
11
|
+
PERSONAS = {
|
|
12
|
+
"metric": {"worker": "CLAUDE.md", "director": "director.md", "critic": "critic.md"},
|
|
13
|
+
"coding": {"engineer": "engineer.md", "reviewer": "reviewer.md", "techlead": "techlead.md"},
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def spec_text(role: str, persona_set: str = "coding") -> str:
|
|
18
|
+
"""Load the template text for a role in a persona set (shipped as package data)."""
|
|
19
|
+
try:
|
|
20
|
+
fname = PERSONAS[persona_set][role]
|
|
21
|
+
except KeyError as e:
|
|
22
|
+
raise KeyError(f"no persona '{role}' in set '{persona_set}'") from e
|
|
23
|
+
return (resources.files("leanlab") / "templates" / "agents" / fname).read_text().strip()
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""The PLAYBOOK — project knowledge the tech-lead maintains and the engineer reads.
|
|
2
|
+
|
|
3
|
+
`.leanlab/PLAYBOOK.md` accumulates conventions, architecture notes, and pitfalls so each
|
|
4
|
+
task starts smarter — the coding lab's version of memory. The engineer reads it; after a
|
|
5
|
+
successful merge the tech-lead rewrites it. (The test "ratchet" is automatic: each merged
|
|
6
|
+
task's locked acceptance tests join the main branch's suite and stay.)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from ..loop import make_runner
|
|
14
|
+
from .personas import spec_text
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def playbook_path(repo) -> Path:
|
|
18
|
+
return Path(repo) / ".leanlab" / "PLAYBOOK.md"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def read_playbook(repo) -> str:
|
|
22
|
+
p = playbook_path(repo)
|
|
23
|
+
return p.read_text().strip() if p.exists() else ""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def update_playbook(repo, *, slug=None, runner=None, ui=None) -> None:
|
|
27
|
+
"""Have the tech-lead study recent changes and rewrite .leanlab/PLAYBOOK.md.
|
|
28
|
+
|
|
29
|
+
`slug` ties the update to the task that triggered it, so it shows on that task's
|
|
30
|
+
timeline as the tech-lead's step in the loop.
|
|
31
|
+
"""
|
|
32
|
+
runner = runner or make_runner(Path(repo))
|
|
33
|
+
prompt = (
|
|
34
|
+
spec_text("techlead", "coding") + "\n\n"
|
|
35
|
+
"Study the recent merged changes (use `git log -p -5` and read key files), then write a "
|
|
36
|
+
"concise `.leanlab/PLAYBOOK.md`: conventions to follow, the architecture map, and "
|
|
37
|
+
"pitfalls already hit, as guidance for the next tasks. Create the `.leanlab` directory if "
|
|
38
|
+
"needed. Write ONLY that file, then stop."
|
|
39
|
+
)
|
|
40
|
+
if ui is not None:
|
|
41
|
+
with ui.status("Tech-lead is updating the PLAYBOOK…"):
|
|
42
|
+
runner.run_plain(prompt)
|
|
43
|
+
else:
|
|
44
|
+
runner.run_plain(prompt)
|
|
45
|
+
if slug:
|
|
46
|
+
from .board import log_event # lazy: board imports playbook, so import here
|
|
47
|
+
log_event(repo, slug, {"event": "playbook"})
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""Spec a coding task — the spec-writer drafts a spec + acceptance tests in an isolated
|
|
2
|
+
git worktree, loops on the operator's feedback, then LOCKS the tests as the frozen
|
|
3
|
+
criteria the engineer is judged by (and can't change).
|
|
4
|
+
|
|
5
|
+
Realizes the spec-task use case. `runner` / `ui` are injected so it's testable without
|
|
6
|
+
Claude or a terminal.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import json
|
|
13
|
+
import re
|
|
14
|
+
import subprocess
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from ..loop import make_runner
|
|
18
|
+
from .board import log_event
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Filler words dropped from slugs so the name leads with what matters.
|
|
22
|
+
_SLUG_STOP = {"a", "an", "the", "to", "of", "for", "in", "on", "and"}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _slug(task: str, max_len: int = 50) -> str:
|
|
26
|
+
"""A short, readable, stable slug from a task description.
|
|
27
|
+
|
|
28
|
+
Rules so the name stays meaningful (vs. a blind 40-char chop):
|
|
29
|
+
1. Use only the first sentence/line — task briefs are often multi-sentence.
|
|
30
|
+
2. Drop filler words ("a", "the", "to", …) so the slug leads with the verb/noun.
|
|
31
|
+
3. Kebab-case and cut on a WORD boundary — never mid-word, never a trailing dash.
|
|
32
|
+
Deterministic: the same task always yields the same slug.
|
|
33
|
+
"""
|
|
34
|
+
first = re.split(r"[.\n!?]", task.strip(), maxsplit=1)[0]
|
|
35
|
+
words = re.sub(r"[^a-z0-9]+", " ", first.lower()).split()
|
|
36
|
+
meaningful = [w for w in words if w not in _SLUG_STOP] or words
|
|
37
|
+
out = ""
|
|
38
|
+
for w in meaningful:
|
|
39
|
+
candidate = f"{out}-{w}" if out else w
|
|
40
|
+
if len(candidate) > max_len:
|
|
41
|
+
break
|
|
42
|
+
out = candidate
|
|
43
|
+
if not out and meaningful: # a single first word longer than max_len
|
|
44
|
+
out = meaningful[0][:max_len]
|
|
45
|
+
return out or "task"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _git(repo, *args):
|
|
49
|
+
return subprocess.run(["git", "-C", str(repo), *args], capture_output=True, text=True)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _is_git_repo(repo) -> bool:
|
|
53
|
+
return _git(repo, "rev-parse", "--is-inside-work-tree").returncode == 0
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _create_worktree(repo, slug):
|
|
57
|
+
"""Create (or reuse) an isolated worktree + branch for this task."""
|
|
58
|
+
wt = Path(repo) / ".leanlab" / "worktrees" / slug
|
|
59
|
+
branch = f"leanlab/{slug}"
|
|
60
|
+
gi = Path(repo) / ".gitignore"
|
|
61
|
+
line = ".leanlab/worktrees/"
|
|
62
|
+
if not gi.exists() or line not in gi.read_text():
|
|
63
|
+
with gi.open("a") as f:
|
|
64
|
+
f.write(("" if not gi.exists() or gi.read_text().endswith("\n") else "\n") + line + "\n")
|
|
65
|
+
if wt.exists():
|
|
66
|
+
return wt, branch
|
|
67
|
+
wt.parent.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
r = _git(repo, "worktree", "add", "-b", branch, str(wt))
|
|
69
|
+
if r.returncode != 0: # branch may already exist — attach to it
|
|
70
|
+
r2 = _git(repo, "worktree", "add", str(wt), branch)
|
|
71
|
+
if r2.returncode != 0:
|
|
72
|
+
raise RuntimeError("git worktree add failed: " + (r.stderr or r2.stderr).strip())
|
|
73
|
+
return wt, branch
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _merged_branches(repo):
|
|
77
|
+
# `git branch` marks the current branch with "* " and worktree-checked-out ones with "+ ";
|
|
78
|
+
# the name is after the 2-char marker.
|
|
79
|
+
out = _git(repo, "branch", "--merged").stdout
|
|
80
|
+
return {ln[2:].strip() for ln in out.splitlines() if ln.strip()}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def clean_worktrees(repo, slug=None, *, remove_all=False) -> list[str]:
|
|
84
|
+
"""Remove task worktrees + branches. Bulk removes only merged ones unless remove_all."""
|
|
85
|
+
repo = Path(repo).resolve()
|
|
86
|
+
wtroot = repo / ".leanlab" / "worktrees"
|
|
87
|
+
if not wtroot.is_dir():
|
|
88
|
+
return []
|
|
89
|
+
merged = _merged_branches(repo)
|
|
90
|
+
if slug:
|
|
91
|
+
targets = [slug] if (wtroot / slug).is_dir() else []
|
|
92
|
+
else:
|
|
93
|
+
all_slugs = [d.name for d in sorted(wtroot.iterdir()) if d.is_dir()]
|
|
94
|
+
targets = all_slugs if remove_all else [s for s in all_slugs if f"leanlab/{s}" in merged]
|
|
95
|
+
removed = []
|
|
96
|
+
for s in targets:
|
|
97
|
+
branch = f"leanlab/{s}"
|
|
98
|
+
force_branch = remove_all or bool(slug) or branch not in merged
|
|
99
|
+
# always --force the worktree: real task worktrees carry an untracked .leanlab-lock.json
|
|
100
|
+
# that would otherwise block removal. Branch deletion stays safe (-d) unless forced.
|
|
101
|
+
_git(repo, "worktree", "remove", "--force", str(wtroot / s))
|
|
102
|
+
_git(repo, "branch", "-D" if force_branch else "-d", branch)
|
|
103
|
+
(repo / ".leanlab" / "locks" / f"{s}.json").unlink(missing_ok=True)
|
|
104
|
+
removed.append(s)
|
|
105
|
+
return removed
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _spec_prompt(task: str, feedback: str | None) -> str:
|
|
109
|
+
base = (
|
|
110
|
+
"You are the SPEC-WRITER for a coding lab. Turn the task below into a precise spec and "
|
|
111
|
+
"a set of ACCEPTANCE TESTS that define 'done'. A different agent (the engineer) will be "
|
|
112
|
+
"judged ONLY by these tests and must not change them — so make them concrete, fair, and "
|
|
113
|
+
"runnable.\n\n"
|
|
114
|
+
f"TASK:\n{task}\n\n"
|
|
115
|
+
"Study the repository in the current directory (read files) to match its language, test "
|
|
116
|
+
"framework, and conventions. Do NOT create or edit any files — return everything in the "
|
|
117
|
+
"JSON. Use one or more acceptance test files as needed. Reply with ONLY this JSON object: "
|
|
118
|
+
'{"spec_md": "<the spec, as markdown>", '
|
|
119
|
+
'"tests": [{"path": "<relative test file path>", "content": "<full file contents>"}]}'
|
|
120
|
+
)
|
|
121
|
+
if feedback:
|
|
122
|
+
return (f"The operator gave feedback on your previous draft:\n\n{feedback}\n\n"
|
|
123
|
+
f"Revise the spec and tests accordingly. {base}")
|
|
124
|
+
return base
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def spec_task(repo, task, *, runner=None, ui=None, yes=False):
|
|
128
|
+
"""Draft → approve → lock the acceptance tests for a task. Returns a dict or None.
|
|
129
|
+
|
|
130
|
+
yes=True auto-approves the first draft (headless, for an agent driving leanlab).
|
|
131
|
+
"""
|
|
132
|
+
repo = Path(repo).resolve()
|
|
133
|
+
ui = ui or SpecUI()
|
|
134
|
+
if not _is_git_repo(repo):
|
|
135
|
+
ui.error("not a git repository — coding labs need git for worktree isolation")
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
slug = _slug(task)
|
|
139
|
+
wt, branch = _create_worktree(repo, slug)
|
|
140
|
+
runner = runner or make_runner(wt) # the spec-writer works inside the worktree
|
|
141
|
+
|
|
142
|
+
feedback = None
|
|
143
|
+
while True:
|
|
144
|
+
with ui.status("Spec-writer is drafting the spec + acceptance tests…"):
|
|
145
|
+
res = runner.run_structured(_spec_prompt(task, feedback), ["spec_md", "tests"])
|
|
146
|
+
if not res.ok:
|
|
147
|
+
ui.error("could not draft the spec — aborting.")
|
|
148
|
+
return None
|
|
149
|
+
files = [t for t in res.data["tests"]
|
|
150
|
+
if isinstance(t, dict) and t.get("path") and "content" in t]
|
|
151
|
+
if not files:
|
|
152
|
+
ui.error("the spec-writer returned no acceptance test files — aborting.")
|
|
153
|
+
return None
|
|
154
|
+
(wt / "SPEC.md").write_text(res.data["spec_md"])
|
|
155
|
+
for t in files:
|
|
156
|
+
p = wt / t["path"]
|
|
157
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
158
|
+
if p.exists():
|
|
159
|
+
p.chmod(0o644) # a prior spec run may have locked this file
|
|
160
|
+
p.write_text(t["content"])
|
|
161
|
+
|
|
162
|
+
ui.spec(res.data["spec_md"])
|
|
163
|
+
if yes:
|
|
164
|
+
action, text = "approve", None
|
|
165
|
+
else:
|
|
166
|
+
action, text = ui.decide("\n\n".join(f"# {t['path']}\n{t['content']}" for t in files))
|
|
167
|
+
if action == "approve":
|
|
168
|
+
# Store the lock + a PRISTINE copy OUTSIDE the worktree, where the engineer (which
|
|
169
|
+
# works inside the worktree) cannot reach it. The build step restores from here.
|
|
170
|
+
locked = [{"path": t["path"], "content": t["content"],
|
|
171
|
+
"sha256": hashlib.sha256(t["content"].encode()).hexdigest()} for t in files]
|
|
172
|
+
for t in files:
|
|
173
|
+
(wt / t["path"]).chmod(0o444) # in-tree lock is a cosmetic guardrail only
|
|
174
|
+
locks = repo / ".leanlab" / "locks"
|
|
175
|
+
locks.mkdir(parents=True, exist_ok=True)
|
|
176
|
+
(locks / f"{slug}.json").write_text(json.dumps({"tests": locked}))
|
|
177
|
+
log_event(repo, slug, {"event": "spec", "tests": [t["path"] for t in files]})
|
|
178
|
+
break
|
|
179
|
+
if action == "cancel":
|
|
180
|
+
ui.note("Cancelled — worktree kept, tests not locked.")
|
|
181
|
+
return None
|
|
182
|
+
feedback = text
|
|
183
|
+
|
|
184
|
+
ui.success(wt, branch)
|
|
185
|
+
return {"worktree": str(wt), "branch": branch, "test_paths": [t["path"] for t in files]}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class SpecUI:
|
|
189
|
+
"""Terminal UI for `leanlab spec` — spinner, spec panel, arrow-key approve menu."""
|
|
190
|
+
|
|
191
|
+
def __init__(self):
|
|
192
|
+
from rich.console import Console
|
|
193
|
+
self.console = Console()
|
|
194
|
+
|
|
195
|
+
def status(self, message):
|
|
196
|
+
return self.console.status(f"[bold cyan]{message}", spinner="dots")
|
|
197
|
+
|
|
198
|
+
def note(self, message):
|
|
199
|
+
self.console.print(message)
|
|
200
|
+
|
|
201
|
+
def error(self, message):
|
|
202
|
+
self.console.print(f"[bold red]{message}[/bold red]")
|
|
203
|
+
|
|
204
|
+
def spec(self, spec_md):
|
|
205
|
+
from rich.markdown import Markdown
|
|
206
|
+
from rich.panel import Panel
|
|
207
|
+
self.console.print(Panel(Markdown(spec_md), title="Proposed spec", border_style="magenta"))
|
|
208
|
+
|
|
209
|
+
def decide(self, test_code):
|
|
210
|
+
import questionary
|
|
211
|
+
from rich.syntax import Syntax
|
|
212
|
+
approve, view, feedback, cancel = (
|
|
213
|
+
"✓ Approve & lock the acceptance tests", "👁 View the acceptance tests",
|
|
214
|
+
"✍ Give feedback (revise)", "✖ Cancel")
|
|
215
|
+
while True:
|
|
216
|
+
choice = questionary.select("What now?", choices=[approve, view, feedback, cancel]).ask()
|
|
217
|
+
if choice is None or choice == cancel:
|
|
218
|
+
return ("cancel", None)
|
|
219
|
+
if choice == view:
|
|
220
|
+
self.console.print(Syntax(test_code, "python", theme="ansi_dark",
|
|
221
|
+
line_numbers=True, word_wrap=True))
|
|
222
|
+
continue
|
|
223
|
+
if choice == approve:
|
|
224
|
+
return ("approve", None)
|
|
225
|
+
return ("feedback", questionary.text("Your feedback for the spec-writer:").ask() or "")
|
|
226
|
+
|
|
227
|
+
def success(self, worktree, branch):
|
|
228
|
+
from rich.panel import Panel
|
|
229
|
+
self.console.print(Panel(
|
|
230
|
+
f"Spec locked in [bold]{worktree}[/bold]\nbranch [bold]{branch}[/bold]\n\n"
|
|
231
|
+
"Acceptance tests are frozen — the engineer will implement against them.",
|
|
232
|
+
title="✓ Spec ready", border_style="green"))
|