sembl-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. sembl_stack/__init__.py +3 -0
  2. sembl_stack/adapters/__init__.py +0 -0
  3. sembl_stack/adapters/_redact.py +19 -0
  4. sembl_stack/adapters/base.py +179 -0
  5. sembl_stack/adapters/codegraph_cbm.py +95 -0
  6. sembl_stack/adapters/deploy_vercel.py +215 -0
  7. sembl_stack/adapters/execute_aider.py +115 -0
  8. sembl_stack/adapters/execute_claude.py +114 -0
  9. sembl_stack/adapters/execute_mock.py +53 -0
  10. sembl_stack/adapters/execute_opencode.py +114 -0
  11. sembl_stack/adapters/merge_git.py +107 -0
  12. sembl_stack/adapters/postdeploy_http.py +82 -0
  13. sembl_stack/adapters/review_coderabbit.py +215 -0
  14. sembl_stack/adapters/review_llm.py +142 -0
  15. sembl_stack/adapters/review_mock.py +42 -0
  16. sembl_stack/adapters/sandbox_worktree.py +79 -0
  17. sembl_stack/adapters/spec_sembl.py +91 -0
  18. sembl_stack/adapters/verify_sembl.py +77 -0
  19. sembl_stack/artifacts.py +207 -0
  20. sembl_stack/cli.py +759 -0
  21. sembl_stack/config.py +87 -0
  22. sembl_stack/contextgraph.py +154 -0
  23. sembl_stack/doctor.py +111 -0
  24. sembl_stack/loop.py +380 -0
  25. sembl_stack/onboarding.py +272 -0
  26. sembl_stack/presets.py +114 -0
  27. sembl_stack/profile.py +193 -0
  28. sembl_stack/reconciliation.py +138 -0
  29. sembl_stack/registry.py +91 -0
  30. sembl_stack/rsi.py +188 -0
  31. sembl_stack/runner.py +134 -0
  32. sembl_stack/session.py +86 -0
  33. sembl_stack/specgraph.py +146 -0
  34. sembl_stack/store.py +112 -0
  35. sembl_stack/tracing.py +51 -0
  36. sembl_stack/transport/__init__.py +0 -0
  37. sembl_stack/transport/mcp_client.py +58 -0
  38. sembl_stack/tui.py +86 -0
  39. sembl_stack/views.py +74 -0
  40. sembl_stack/wizard.py +233 -0
  41. sembl_stack-0.1.0.dist-info/METADATA +165 -0
  42. sembl_stack-0.1.0.dist-info/RECORD +45 -0
  43. sembl_stack-0.1.0.dist-info/WHEEL +4 -0
  44. sembl_stack-0.1.0.dist-info/entry_points.txt +2 -0
  45. sembl_stack-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,3 @@
1
+ """sembl-stack — an open, swappable spec-driven coding factory."""
2
+
3
+ __version__ = "0.1.0"
File without changes
@@ -0,0 +1,19 @@
1
+ """Redaction helper for adapter artifacts.
2
+
3
+ Third-party process output (HTTP health bodies, CLI stdout/stderr, reviewer output) can carry
4
+ debug pages, stack traces, env-shaped values, diff snippets, or auth errors. Persisting it raw
5
+ into `.sembl/runs/<id>/` would violate the no-secrets-in-artifacts invariant. We keep only a
6
+ non-reversible fingerprint: byte count + sha256. That preserves "output existed / did it change"
7
+ signal without ever serializing the content.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+
13
+
14
+ def summarize(text) -> dict:
15
+ """Reduce arbitrary third-party text to {bytes, sha256} — never the content itself."""
16
+ if text is None:
17
+ return {"bytes": 0, "sha256": None}
18
+ raw = text if isinstance(text, bytes) else str(text).encode("utf-8", "replace")
19
+ return {"bytes": len(raw), "sha256": hashlib.sha256(raw).hexdigest()}
@@ -0,0 +1,179 @@
1
+ """The platform contract.
2
+
3
+ The data types are the canonical artifacts (see `sembl_stack/artifacts.py`); the
4
+ Protocols below are what an adapter must satisfy to be swappable into a layer. Re-exported
5
+ here so adapters import everything they need from one place.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import re
11
+ import subprocess
12
+ from typing import Protocol, runtime_checkable
13
+
14
+ from ..artifacts import ( # noqa: F401 (re-exported for adapters)
15
+ Bounds,
16
+ Change,
17
+ Context,
18
+ Delivery,
19
+ ExecutionResult,
20
+ MergeRecord,
21
+ ReconciliationReport,
22
+ ReviewReport,
23
+ SpecGraph,
24
+ Task,
25
+ Trace,
26
+ Verdict,
27
+ )
28
+
29
+
30
+ # --- Shared adapter helpers ---------------------------------------------------
31
+
32
+ def changed_files_from_diff(diff: str) -> list[str]:
33
+ """Files touched by a unified git diff, order-preserved and de-duplicated.
34
+
35
+ Reads BOTH the `diff --git a/… b/…` headers and the `+++ b/…` markers, unioned:
36
+ * the `diff --git` header names a file even when it has no `+++` hunk — e.g. an
37
+ EMPTY new file an errored agent created. A `+++`-only parser silently drops it,
38
+ and the gate then flags a spurious "unreported change";
39
+ * the `+++ b/` marker is the fallback for a diff fragment that arrives without a
40
+ full header.
41
+ `/dev/null` (the add/delete sentinel) is skipped. Every executor adapter uses this
42
+ one parser so Claude/OpenCode/Aider report changed files consistently.
43
+ """
44
+ seen: set[str] = set()
45
+ out: list[str] = []
46
+
47
+ def add(path: str) -> None:
48
+ path = path.strip()
49
+ if path and path != "/dev/null" and path not in seen:
50
+ seen.add(path)
51
+ out.append(path)
52
+
53
+ for line in diff.splitlines():
54
+ if line.startswith("diff --git "):
55
+ _, _, tail = line.partition(" b/")
56
+ if tail:
57
+ add(tail)
58
+ elif line.startswith("+++ "):
59
+ marker = line[4:]
60
+ if marker.startswith("b/"):
61
+ marker = marker[2:]
62
+ add(marker.split("\t", 1)[0]) # drop a trailing tab-timestamp if present
63
+ return out
64
+
65
+
66
+ # Env-var names whose values are credentials; a secret only ever lives in the
67
+ # environment, so an executor CLI echoing one (e.g. in an auth error) is the one
68
+ # path it could reach a persisted run artifact. Scrubbed by value below.
69
+ _SECRET_ENV_NAME = re.compile(r"(API_KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL)S?$", re.IGNORECASE)
70
+ # Generic provider-key shapes (sk-ant-…, sk-proj-…, sk-or-v1-…) as a second net.
71
+ _SECRET_TOKEN = re.compile(r"sk-[A-Za-z0-9_\-]{8,}")
72
+
73
+
74
+ def scrub_secrets(text: str) -> str:
75
+ """Redact anything secret-shaped before it reaches a run artifact.
76
+
77
+ Executor stdout/stderr is persisted into `.sembl/runs/<id>/change.json` for
78
+ debuggability; the security invariant (no key value ever stored) must hold even
79
+ when a CLI misbehaves and echoes a credential. Env values are compared in memory
80
+ only — nothing read here is ever written anywhere except as its redaction marker.
81
+ """
82
+ if not text:
83
+ return text
84
+ for name, value in os.environ.items():
85
+ if len(value) >= 8 and _SECRET_ENV_NAME.search(name):
86
+ text = text.replace(value, f"[redacted:{name}]")
87
+ return _SECRET_TOKEN.sub("[redacted:key]", text)
88
+
89
+
90
+ def run_executor(cmd: list[str], cwd: str, timeout: int, **run_kwargs):
91
+ """Run an executor subprocess, turning a timeout into a structured signal.
92
+
93
+ Returns ``(returncode, stdout, stderr, timed_out)``. A `subprocess.TimeoutExpired`
94
+ is caught here (its partial stdout/stderr preserved) instead of being allowed to
95
+ propagate and abort the whole loop — the caller records `timed_out` in the report so
96
+ the gate stage can convert it to a BLOCK rather than a crash.
97
+ """
98
+ try:
99
+ # encoding/errors explicit: agents emit UTF-8 (box-drawing, emoji, ✓). The default
100
+ # text=True decodes with the locale codec (cp1252 on Windows), which crashes the
101
+ # stdout reader thread mid-run and silently loses the output. Decode as UTF-8 and
102
+ # replace undecodable bytes so capture never aborts the loop.
103
+ proc = subprocess.run(
104
+ cmd, cwd=cwd, capture_output=True, text=True, timeout=timeout,
105
+ encoding="utf-8", errors="replace", **run_kwargs)
106
+ return proc.returncode, proc.stdout or "", proc.stderr or "", False
107
+ except subprocess.TimeoutExpired as exc:
108
+ out, err = exc.stdout or "", exc.stderr or ""
109
+ if isinstance(out, bytes):
110
+ out = out.decode("utf-8", "replace")
111
+ if isinstance(err, bytes):
112
+ err = err.decode("utf-8", "replace")
113
+ return -1, out, err, True
114
+
115
+
116
+ # --- Layer interfaces (Protocols) ---------------------------------------------
117
+
118
+ class Sandbox(Protocol): # an open sandbox handle (from L4)
119
+ workdir: str
120
+ def diff(self) -> str: ...
121
+ def close(self) -> None: ...
122
+
123
+
124
+ @runtime_checkable
125
+ class SpecAdapter(Protocol): # L2: Task -> Bounds
126
+ def plan(self, task: Task) -> Bounds: ...
127
+
128
+
129
+ @runtime_checkable
130
+ class SandboxAdapter(Protocol): # L4: Change -> Change (contained)
131
+ def open(self, repo: str) -> Sandbox: ...
132
+
133
+
134
+ @runtime_checkable
135
+ class ExecuteAdapter(Protocol): # L3: Task+Bounds(+Context) -> Change
136
+ def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
137
+ feedback: str | None) -> ExecutionResult: ...
138
+
139
+
140
+ @runtime_checkable
141
+ class VerifyAdapter(Protocol): # L5: Change+Bounds -> Verdict
142
+ def verify(self, bounds: Bounds, result: ExecutionResult,
143
+ strict: bool) -> Verdict: ...
144
+
145
+
146
+ @runtime_checkable
147
+ class ReconcileAdapter(Protocol): # L5.5: SpecGraph+CodeGraph -> report
148
+ def reconcile(self, spec_graph: SpecGraph, code_graph: dict) -> ReconciliationReport:
149
+ ...
150
+
151
+
152
+ @runtime_checkable
153
+ class MergeAdapter(Protocol): # L6.5: Verdict(PASS) -> MergeRecord
154
+ def merge(self, repo: str, *, into: str = "main", source: str = "HEAD",
155
+ no_ff: bool = True, message: str | None = None) -> MergeRecord:
156
+ ...
157
+
158
+
159
+ @runtime_checkable
160
+ class DeployAdapter(Protocol): # L7: Verdict(PASS) -> Delivery; rollback reverts it
161
+ def deploy(self, repo: str, *, production: bool = False,
162
+ prebuilt: bool = False) -> Delivery:
163
+ ...
164
+
165
+ def rollback(self, repo: str, *, to: str | None = None) -> Delivery:
166
+ ...
167
+
168
+
169
+ @runtime_checkable
170
+ class PostDeployAdapter(Protocol): # L8: Delivery -> Verdict
171
+ def verify(self, delivery: Delivery, *, health_path: str = "/",
172
+ timeout_s: float = 10.0) -> Verdict:
173
+ ...
174
+
175
+
176
+ @runtime_checkable
177
+ class ReviewAdapter(Protocol): # L5.5 quality: a diff -> ReviewReport (advisory)
178
+ def review(self, diff: str, *, reviewer_hint: str = "") -> ReviewReport:
179
+ ...
@@ -0,0 +1,95 @@
1
+ """L5.5 code-graph source — drive codebase-memory-mcp (CBM) headlessly.
2
+
3
+ reconcile (S9) compares a SpecGraph against a code graph. Previously the code graph was a
4
+ hand-passed JSON file; this adapter produces it LIVE from a real CBM index so a per-PR reconcile
5
+ needs no manual step. CBM is driven via its single-shot CLI (`cbm cli <tool> <json-args>`) — the
6
+ same subprocess containment as the symgraph adapter, never a package dependency. Advisory only: a
7
+ failure returns an empty graph (reconcile then reports UNKNOWN), never an exception that could be
8
+ mistaken for a gate.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import shutil
14
+ import subprocess
15
+ from pathlib import Path
16
+
17
+
18
+ class CbmCodeGraph:
19
+ """Drives the codebase-memory-mcp binary to export a code graph for reconciliation."""
20
+
21
+ def __init__(self, binary: str = "codebase-memory-mcp", timeout: int = 600,
22
+ limit: int = 5000):
23
+ self.binary = binary
24
+ self.timeout = timeout
25
+ self.limit = limit
26
+
27
+ def _exe(self) -> str | None:
28
+ return shutil.which(self.binary)
29
+
30
+ def available(self) -> bool:
31
+ return self._exe() is not None
32
+
33
+ def _run(self, tool: str, payload: dict) -> dict:
34
+ exe = self._exe()
35
+ if not exe:
36
+ return {}
37
+ try:
38
+ proc = subprocess.run(
39
+ [exe, "cli", tool, json.dumps(payload)],
40
+ capture_output=True, text=True, encoding="utf-8", errors="replace",
41
+ timeout=self.timeout)
42
+ except (OSError, subprocess.TimeoutExpired):
43
+ return {}
44
+ return _parse_json(proc.stdout)
45
+
46
+ def _project_slug(self, repo: str) -> str | None:
47
+ target = _norm(str(Path(repo).resolve()))
48
+ listing = self._run("list_projects", {})
49
+ for proj in listing.get("projects", []):
50
+ if _norm(proj.get("root_path", "")) == target and proj.get("name"):
51
+ return proj["name"]
52
+ return None
53
+
54
+ def code_graph(self, repo: str, *, index: bool = True) -> dict:
55
+ """Return a CBM code-graph payload `{"results":[...]}` reconcile can consume.
56
+
57
+ Indexes the repo (idempotent refresh), resolves the project slug via CBM's own
58
+ list_projects mapping, then pulls every node with a broad pattern. Returns `{}` on any
59
+ failure — reconcile degrades to UNKNOWN, never blocks.
60
+ """
61
+ if index:
62
+ # CBM's tool contract requires `repo_path` (`path` is silently rejected);
63
+ # `mode: fast` skips similarity/semantic edges — reconcile only needs symbols.
64
+ self._run("index_repository",
65
+ {"repo_path": str(Path(repo).resolve()), "mode": "fast"})
66
+ slug = self._project_slug(repo)
67
+ if not slug:
68
+ return {}
69
+ return self._run(
70
+ "search_graph",
71
+ {"project": slug, "name_pattern": ".", "limit": self.limit})
72
+
73
+
74
+ def _norm(p: str) -> str:
75
+ return p.replace("\\", "/").strip().rstrip("/").lower()
76
+
77
+
78
+ def _parse_json(text: str | None) -> dict:
79
+ """Parse CBM stdout, tolerating a leading `level=info ...` log line."""
80
+ if not text:
81
+ return {}
82
+ try:
83
+ out = json.loads(text)
84
+ return out if isinstance(out, dict) else {}
85
+ except json.JSONDecodeError:
86
+ pass
87
+ for line in text.splitlines():
88
+ line = line.strip()
89
+ if line.startswith("{"):
90
+ try:
91
+ out = json.loads(line)
92
+ return out if isinstance(out, dict) else {}
93
+ except json.JSONDecodeError:
94
+ continue
95
+ return {}
@@ -0,0 +1,215 @@
1
+ """L7 deploy adapter for Vercel CLI.
2
+
3
+ The stage owns the Delivery artifact, not the hosting mechanism. Credentials are
4
+ left to the local Vercel CLI environment (`vercel login`, `VERCEL_TOKEN`, or linked
5
+ project config) and are never copied into the artifact.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ import shutil
11
+ import subprocess
12
+ import time
13
+ from pathlib import Path
14
+
15
+ from ._redact import summarize
16
+ from .base import Delivery
17
+
18
+ _URL_RE = re.compile(r"https://[^\s]+")
19
+
20
+
21
+ class VercelDeployAdapter:
22
+ def __init__(self, timeout: int = 1800, yes: bool = True):
23
+ self.timeout = timeout
24
+ self.yes = yes
25
+
26
+ def available(self) -> bool:
27
+ return bool(_resolve_vercel())
28
+
29
+ def deploy(self, repo: str, *, production: bool = False,
30
+ prebuilt: bool = False) -> Delivery:
31
+ repo_path = str(Path(repo).resolve())
32
+ cmd = _resolve_vercel()
33
+ if not cmd:
34
+ return Delivery(target="vercel", status="failed",
35
+ data={"reason": "vercel CLI not found on PATH"})
36
+ if prebuilt:
37
+ cmd.append("deploy")
38
+ cmd.append("--prebuilt")
39
+ if production:
40
+ cmd.append("--prod")
41
+ if self.yes:
42
+ cmd.append("--yes")
43
+
44
+ t0 = time.perf_counter()
45
+ try:
46
+ proc = subprocess.run(
47
+ cmd, cwd=repo_path, capture_output=True, text=True,
48
+ encoding="utf-8", errors="replace", timeout=self.timeout)
49
+ except subprocess.TimeoutExpired as exc:
50
+ return Delivery(
51
+ target="vercel",
52
+ status="failed",
53
+ data={
54
+ "reason": "timeout",
55
+ "latency_s": round(time.perf_counter() - t0, 3),
56
+ "command": _safe_command(cmd),
57
+ "stdout": summarize(exc.stdout),
58
+ "stderr": summarize(exc.stderr),
59
+ },
60
+ )
61
+
62
+ stdout = proc.stdout or ""
63
+ stderr = proc.stderr or ""
64
+ url = _last_url(stdout) or _last_url(stderr)
65
+ status = "deployed" if proc.returncode == 0 and url else "failed"
66
+ return Delivery(
67
+ target="vercel",
68
+ url=url,
69
+ status=status,
70
+ data={
71
+ "production": production,
72
+ "prebuilt": prebuilt,
73
+ "returncode": proc.returncode,
74
+ "latency_s": round(time.perf_counter() - t0, 3),
75
+ "command": _safe_command(cmd),
76
+ "stdout": summarize(stdout),
77
+ "stderr": summarize(stderr),
78
+ },
79
+ )
80
+
81
+ def rollback(self, repo: str, *, to: str | None = None) -> Delivery:
82
+ """Promote the previous production deployment (Vercel rollback).
83
+
84
+ Mechanism only: the decision to roll back is the caller's (the L8 gate Verdict).
85
+ `to` optionally names a specific deployment URL/id to roll back to. When omitted,
86
+ this looks up the immediately previous production deployment and rolls back to it
87
+ explicitly — verified live against a real deploy: bare `vercel rollback` (no target)
88
+ only reports in-progress rollback *status* on current CLI versions ("No deployment
89
+ rollback in progress", exit 0) rather than performing one, so the old
90
+ target-less/omitted call silently never rolled anything back.
91
+ """
92
+ repo_path = str(Path(repo).resolve())
93
+ if not _resolve_vercel():
94
+ return Delivery(target="vercel", status="rollback_failed",
95
+ data={"reason": "vercel CLI not found on PATH"})
96
+ if not to:
97
+ to = self._previous_production_url(repo_path)
98
+ if not to:
99
+ return Delivery(
100
+ target="vercel",
101
+ status="rollback_failed",
102
+ data={"reason": "no previous production deployment found"},
103
+ )
104
+ cmd = _resolve_vercel() + ["rollback", to]
105
+ if self.yes:
106
+ cmd.append("--yes")
107
+
108
+ t0 = time.perf_counter()
109
+ try:
110
+ proc = subprocess.run(
111
+ cmd, cwd=repo_path, capture_output=True, text=True,
112
+ encoding="utf-8", errors="replace", timeout=self.timeout)
113
+ except subprocess.TimeoutExpired as exc:
114
+ return Delivery(
115
+ target="vercel",
116
+ status="rollback_failed",
117
+ data={
118
+ "reason": "timeout",
119
+ "latency_s": round(time.perf_counter() - t0, 3),
120
+ "command": _safe_command(cmd),
121
+ "stdout": summarize(exc.stdout),
122
+ "stderr": summarize(exc.stderr),
123
+ },
124
+ )
125
+
126
+ stdout = proc.stdout or ""
127
+ stderr = proc.stderr or ""
128
+ url = _last_url(stdout) or _last_url(stderr)
129
+ status = "rolled_back" if proc.returncode == 0 else "rollback_failed"
130
+ return Delivery(
131
+ target="vercel",
132
+ url=url,
133
+ status=status,
134
+ data={
135
+ "rolled_back_to": to,
136
+ "returncode": proc.returncode,
137
+ "latency_s": round(time.perf_counter() - t0, 3),
138
+ "command": _safe_command(cmd),
139
+ "stdout": summarize(stdout),
140
+ "stderr": summarize(stderr),
141
+ },
142
+ )
143
+
144
+ def _previous_production_url(self, repo_path: str) -> str | None:
145
+ """The production deployment immediately before the current one.
146
+
147
+ `vercel ls --prod` prints a scriptable bare-URL list at the end of its output,
148
+ newest first — index 0 is the (bad) deployment we're rolling back FROM, index 1 is
149
+ the one to roll back TO. Returns None if there's no prior production deployment.
150
+ """
151
+ try:
152
+ proc = subprocess.run(
153
+ _resolve_vercel() + ["ls", "--prod"], cwd=repo_path,
154
+ capture_output=True, text=True, encoding="utf-8", errors="replace",
155
+ timeout=self.timeout)
156
+ except subprocess.TimeoutExpired:
157
+ return None
158
+ urls = [ln.strip() for ln in (proc.stdout or "").splitlines()
159
+ if ln.strip().startswith("https://")]
160
+ return urls[1] if len(urls) > 1 else None
161
+
162
+
163
+ def _resolve_vercel() -> list[str]:
164
+ """Return the argv prefix that actually launches the Vercel CLI.
165
+
166
+ On Windows, npm installs `vercel` as a `.cmd`/`.ps1` shim (pure-JS package, no vendored
167
+ native binary like opencode has) — `subprocess.run(["vercel", ...])` without a shell
168
+ raises `FileNotFoundError` because CreateProcess can't launch a batch file directly.
169
+ Route through the shim's own interpreter instead. On POSIX `which` already returns the
170
+ real (shebang'd) executable, so it needs no wrapping.
171
+ """
172
+ exe = shutil.which("vercel")
173
+ if not exe:
174
+ return []
175
+ low = exe.lower()
176
+ if low.endswith((".cmd", ".bat")):
177
+ return ["cmd", "/c", exe]
178
+ if low.endswith(".ps1"):
179
+ return ["powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", exe]
180
+ return [exe]
181
+
182
+
183
+ _DEPLOYMENT_HOST_RE = re.compile(r"^https://[^/\s]+\.vercel\.app(?:/|$)")
184
+
185
+
186
+ def _last_url(text: str | None) -> str | None:
187
+ """The last *deployment* URL Vercel CLI printed — never a dashboard/API link.
188
+
189
+ Live-proof finding: the CLI interleaves other `https://` links into stdout alongside
190
+ the human-facing deployment URL — an `https://vercel.com/<team>/<project>/<id>`
191
+ dashboard "Inspect" link, and on some versions an `https://api.vercel.com/v13/
192
+ deployments/...` internal status-poll call. Picking the textually-last URL without
193
+ filtering returns one of those instead, which then silently breaks every downstream
194
+ health check against it. Every real deployment/preview/production URL Vercel serves
195
+ lives on the `*.vercel.app` domain (never `vercel.com`/`api.vercel.com`), so prefer
196
+ matches on that host; fall back to any match only if that's all there is.
197
+ """
198
+ urls = _URL_RE.findall(text or "")
199
+ preferred = [u for u in urls if _DEPLOYMENT_HOST_RE.match(u)]
200
+ picked = preferred or urls
201
+ return picked[-1].rstrip(".,)\"'") if picked else None
202
+
203
+
204
+ def _safe_command(cmd: list[str]) -> list[str]:
205
+ safe: list[str] = []
206
+ redact_next = False
207
+ for part in cmd:
208
+ if redact_next:
209
+ safe.append("<redacted>")
210
+ redact_next = False
211
+ continue
212
+ safe.append(part)
213
+ if part == "--token":
214
+ redact_next = True
215
+ return safe
@@ -0,0 +1,115 @@
1
+ """L3 executor: Aider (OSS) driven headless in the sandbox.
2
+
3
+ Hands the task (plus the gate's feedback on retry, and the in-scope file list) to a
4
+ non-interactive `aider --message ...` run inside the worktree, then reads back the diff.
5
+ Aider resolves its own model credentials from the environment (e.g. OPENAI_API_BASE +
6
+ OPENAI_API_KEY for an OpenAI-compatible router, or ANTHROPIC_API_KEY) — sembl-stack never
7
+ handles a token. Requires `aider` on PATH.
8
+
9
+ Why the flags:
10
+ --message <prompt> run one instruction non-interactively and exit (headless).
11
+ --yes-always auto-confirm (create files, apply edits) — no TTY prompts.
12
+ --no-auto-commits leave edits in the WORKING TREE so the sandbox's `git diff`
13
+ captures them; otherwise aider commits and the diff looks empty.
14
+ --no-stream / --no-check-update / --no-show-model-warnings
15
+ quiet, deterministic, non-blocking startup.
16
+ The model is a one-line config (`options.execute.model`), e.g. an `openai/<name>` route.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import glob
21
+ import shutil
22
+ import subprocess
23
+ from pathlib import Path
24
+
25
+ from .base import (
26
+ Bounds,
27
+ ExecutionResult,
28
+ Sandbox,
29
+ Task,
30
+ changed_files_from_diff as _changed_files,
31
+ run_executor,
32
+ scrub_secrets,
33
+ )
34
+
35
+
36
+ class AiderExecutor:
37
+ def __init__(self, model: str | None = None, timeout: int = 900):
38
+ self.model = model
39
+ self.timeout = timeout
40
+
41
+ def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
42
+ feedback: str | None) -> ExecutionResult:
43
+ exe = shutil.which("aider")
44
+ if not exe:
45
+ raise RuntimeError(
46
+ "L3: `aider` not found on PATH. `pip install aider-chat`, or set execute: mock.")
47
+
48
+ prompt = self._prompt(task, bounds, feedback)
49
+ cmd = [exe, "--yes-always", "--no-auto-commits", "--no-stream",
50
+ "--no-check-update", "--no-show-model-warnings", "--no-gitignore"]
51
+ if self.model:
52
+ cmd += ["--model", self.model]
53
+ cmd += ["--message", prompt]
54
+ cmd += _file_targets(bounds) # focus aider on the in-scope files
55
+ rc, out, err, timed_out = run_executor(
56
+ cmd, cwd=sandbox.workdir, timeout=self.timeout, stdin=subprocess.DEVNULL)
57
+
58
+ _clean_aider_scratch(sandbox.workdir) # drop aider's own .aider* artifacts
59
+ diff = sandbox.diff()
60
+ report = {
61
+ "files_modified": _changed_files(diff),
62
+ "agent": "aider",
63
+ "model": self.model,
64
+ "exit_code": rc,
65
+ "output": scrub_secrets(out)[-2000:],
66
+ "stderr": scrub_secrets(err)[-1000:],
67
+ }
68
+ if timed_out: # surfaced to the gate as a BLOCK, not a crash
69
+ report["error"] = "timeout"
70
+ report["timed_out"] = True
71
+ return ExecutionResult(diff=diff, report=report, workdir=sandbox.workdir)
72
+
73
+ @staticmethod
74
+ def _prompt(task: Task, bounds: Bounds, feedback: str | None) -> str:
75
+ lines = [task.text, ""]
76
+ if bounds.editable_paths:
77
+ lines.append("You may ONLY edit these paths: "
78
+ + ", ".join(bounds.editable_paths))
79
+ if bounds.forbidden_areas:
80
+ lines.append("Never touch: " + ", ".join(bounds.forbidden_areas))
81
+ if feedback:
82
+ lines += ["", feedback]
83
+ return "\n".join(lines)
84
+
85
+
86
+ def _clean_aider_scratch(workdir: str) -> None:
87
+ """Remove aider's own working files (`.aider*`) before the diff is captured.
88
+
89
+ Aider writes `.aider.chat.history.md`, `.aider.input.history`, and a
90
+ `.aider.tags.cache.v4/` dir into the working directory. With `--no-gitignore` these are
91
+ untracked clutter that the sandbox diff would otherwise pick up as out-of-scope edits.
92
+ They are aider internals, never part of the change, so we delete them in the disposable
93
+ cage before gating. Best-effort; failures are non-fatal.
94
+ """
95
+ for p in glob.glob(str(Path(workdir) / ".aider*")):
96
+ path = Path(p)
97
+ try:
98
+ if path.is_dir():
99
+ shutil.rmtree(path, ignore_errors=True)
100
+ else:
101
+ path.unlink(missing_ok=True)
102
+ except OSError:
103
+ pass
104
+
105
+
106
+ def _file_targets(bounds: Bounds) -> list[str]:
107
+ """The concrete files aider should add to the chat (skip directory bounds)."""
108
+ out = []
109
+ for p in bounds.editable_paths:
110
+ p = p.replace("\\", "/")
111
+ if p.endswith("/"):
112
+ continue # a directory prefix, not a file target
113
+ if "." in p.rsplit("/", 1)[-1]: # looks like a file (has an extension)
114
+ out.append(p)
115
+ return out