sembl-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sembl_stack/__init__.py +3 -0
- sembl_stack/adapters/__init__.py +0 -0
- sembl_stack/adapters/_redact.py +19 -0
- sembl_stack/adapters/base.py +179 -0
- sembl_stack/adapters/codegraph_cbm.py +95 -0
- sembl_stack/adapters/deploy_vercel.py +215 -0
- sembl_stack/adapters/execute_aider.py +115 -0
- sembl_stack/adapters/execute_claude.py +114 -0
- sembl_stack/adapters/execute_mock.py +53 -0
- sembl_stack/adapters/execute_opencode.py +114 -0
- sembl_stack/adapters/merge_git.py +107 -0
- sembl_stack/adapters/postdeploy_http.py +82 -0
- sembl_stack/adapters/review_coderabbit.py +215 -0
- sembl_stack/adapters/review_llm.py +142 -0
- sembl_stack/adapters/review_mock.py +42 -0
- sembl_stack/adapters/sandbox_worktree.py +79 -0
- sembl_stack/adapters/spec_sembl.py +91 -0
- sembl_stack/adapters/verify_sembl.py +77 -0
- sembl_stack/artifacts.py +207 -0
- sembl_stack/cli.py +759 -0
- sembl_stack/config.py +87 -0
- sembl_stack/contextgraph.py +154 -0
- sembl_stack/doctor.py +111 -0
- sembl_stack/loop.py +380 -0
- sembl_stack/onboarding.py +272 -0
- sembl_stack/presets.py +114 -0
- sembl_stack/profile.py +193 -0
- sembl_stack/reconciliation.py +138 -0
- sembl_stack/registry.py +91 -0
- sembl_stack/rsi.py +188 -0
- sembl_stack/runner.py +134 -0
- sembl_stack/session.py +86 -0
- sembl_stack/specgraph.py +146 -0
- sembl_stack/store.py +112 -0
- sembl_stack/tracing.py +51 -0
- sembl_stack/transport/__init__.py +0 -0
- sembl_stack/transport/mcp_client.py +58 -0
- sembl_stack/tui.py +86 -0
- sembl_stack/views.py +74 -0
- sembl_stack/wizard.py +233 -0
- sembl_stack-0.1.0.dist-info/METADATA +165 -0
- sembl_stack-0.1.0.dist-info/RECORD +45 -0
- sembl_stack-0.1.0.dist-info/WHEEL +4 -0
- sembl_stack-0.1.0.dist-info/entry_points.txt +2 -0
- sembl_stack-0.1.0.dist-info/licenses/LICENSE +201 -0
sembl_stack/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Redaction helper for adapter artifacts.
|
|
2
|
+
|
|
3
|
+
Third-party process output (HTTP health bodies, CLI stdout/stderr, reviewer output) can carry
|
|
4
|
+
debug pages, stack traces, env-shaped values, diff snippets, or auth errors. Persisting it raw
|
|
5
|
+
into `.sembl/runs/<id>/` would violate the no-secrets-in-artifacts invariant. We keep only a
|
|
6
|
+
non-reversible fingerprint: byte count + sha256. That preserves "output existed / did it change"
|
|
7
|
+
signal without ever serializing the content.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def summarize(text) -> dict:
|
|
15
|
+
"""Reduce arbitrary third-party text to {bytes, sha256} — never the content itself."""
|
|
16
|
+
if text is None:
|
|
17
|
+
return {"bytes": 0, "sha256": None}
|
|
18
|
+
raw = text if isinstance(text, bytes) else str(text).encode("utf-8", "replace")
|
|
19
|
+
return {"bytes": len(raw), "sha256": hashlib.sha256(raw).hexdigest()}
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""The platform contract.
|
|
2
|
+
|
|
3
|
+
The data types are the canonical artifacts (see `sembl_stack/artifacts.py`); the
|
|
4
|
+
Protocols below are what an adapter must satisfy to be swappable into a layer. Re-exported
|
|
5
|
+
here so adapters import everything they need from one place.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import subprocess
|
|
12
|
+
from typing import Protocol, runtime_checkable
|
|
13
|
+
|
|
14
|
+
from ..artifacts import ( # noqa: F401 (re-exported for adapters)
|
|
15
|
+
Bounds,
|
|
16
|
+
Change,
|
|
17
|
+
Context,
|
|
18
|
+
Delivery,
|
|
19
|
+
ExecutionResult,
|
|
20
|
+
MergeRecord,
|
|
21
|
+
ReconciliationReport,
|
|
22
|
+
ReviewReport,
|
|
23
|
+
SpecGraph,
|
|
24
|
+
Task,
|
|
25
|
+
Trace,
|
|
26
|
+
Verdict,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# --- Shared adapter helpers ---------------------------------------------------
|
|
31
|
+
|
|
32
|
+
def changed_files_from_diff(diff: str) -> list[str]:
|
|
33
|
+
"""Files touched by a unified git diff, order-preserved and de-duplicated.
|
|
34
|
+
|
|
35
|
+
Reads BOTH the `diff --git a/… b/…` headers and the `+++ b/…` markers, unioned:
|
|
36
|
+
* the `diff --git` header names a file even when it has no `+++` hunk — e.g. an
|
|
37
|
+
EMPTY new file an errored agent created. A `+++`-only parser silently drops it,
|
|
38
|
+
and the gate then flags a spurious "unreported change";
|
|
39
|
+
* the `+++ b/` marker is the fallback for a diff fragment that arrives without a
|
|
40
|
+
full header.
|
|
41
|
+
`/dev/null` (the add/delete sentinel) is skipped. Every executor adapter uses this
|
|
42
|
+
one parser so Claude/OpenCode/Aider report changed files consistently.
|
|
43
|
+
"""
|
|
44
|
+
seen: set[str] = set()
|
|
45
|
+
out: list[str] = []
|
|
46
|
+
|
|
47
|
+
def add(path: str) -> None:
|
|
48
|
+
path = path.strip()
|
|
49
|
+
if path and path != "/dev/null" and path not in seen:
|
|
50
|
+
seen.add(path)
|
|
51
|
+
out.append(path)
|
|
52
|
+
|
|
53
|
+
for line in diff.splitlines():
|
|
54
|
+
if line.startswith("diff --git "):
|
|
55
|
+
_, _, tail = line.partition(" b/")
|
|
56
|
+
if tail:
|
|
57
|
+
add(tail)
|
|
58
|
+
elif line.startswith("+++ "):
|
|
59
|
+
marker = line[4:]
|
|
60
|
+
if marker.startswith("b/"):
|
|
61
|
+
marker = marker[2:]
|
|
62
|
+
add(marker.split("\t", 1)[0]) # drop a trailing tab-timestamp if present
|
|
63
|
+
return out
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# Env-var names whose values are credentials; a secret only ever lives in the
|
|
67
|
+
# environment, so an executor CLI echoing one (e.g. in an auth error) is the one
|
|
68
|
+
# path it could reach a persisted run artifact. Scrubbed by value below.
|
|
69
|
+
_SECRET_ENV_NAME = re.compile(r"(API_KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL)S?$", re.IGNORECASE)
|
|
70
|
+
# Generic provider-key shapes (sk-ant-…, sk-proj-…, sk-or-v1-…) as a second net.
|
|
71
|
+
_SECRET_TOKEN = re.compile(r"sk-[A-Za-z0-9_\-]{8,}")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def scrub_secrets(text: str) -> str:
|
|
75
|
+
"""Redact anything secret-shaped before it reaches a run artifact.
|
|
76
|
+
|
|
77
|
+
Executor stdout/stderr is persisted into `.sembl/runs/<id>/change.json` for
|
|
78
|
+
debuggability; the security invariant (no key value ever stored) must hold even
|
|
79
|
+
when a CLI misbehaves and echoes a credential. Env values are compared in memory
|
|
80
|
+
only — nothing read here is ever written anywhere except as its redaction marker.
|
|
81
|
+
"""
|
|
82
|
+
if not text:
|
|
83
|
+
return text
|
|
84
|
+
for name, value in os.environ.items():
|
|
85
|
+
if len(value) >= 8 and _SECRET_ENV_NAME.search(name):
|
|
86
|
+
text = text.replace(value, f"[redacted:{name}]")
|
|
87
|
+
return _SECRET_TOKEN.sub("[redacted:key]", text)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def run_executor(cmd: list[str], cwd: str, timeout: int, **run_kwargs):
|
|
91
|
+
"""Run an executor subprocess, turning a timeout into a structured signal.
|
|
92
|
+
|
|
93
|
+
Returns ``(returncode, stdout, stderr, timed_out)``. A `subprocess.TimeoutExpired`
|
|
94
|
+
is caught here (its partial stdout/stderr preserved) instead of being allowed to
|
|
95
|
+
propagate and abort the whole loop — the caller records `timed_out` in the report so
|
|
96
|
+
the gate stage can convert it to a BLOCK rather than a crash.
|
|
97
|
+
"""
|
|
98
|
+
try:
|
|
99
|
+
# encoding/errors explicit: agents emit UTF-8 (box-drawing, emoji, ✓). The default
|
|
100
|
+
# text=True decodes with the locale codec (cp1252 on Windows), which crashes the
|
|
101
|
+
# stdout reader thread mid-run and silently loses the output. Decode as UTF-8 and
|
|
102
|
+
# replace undecodable bytes so capture never aborts the loop.
|
|
103
|
+
proc = subprocess.run(
|
|
104
|
+
cmd, cwd=cwd, capture_output=True, text=True, timeout=timeout,
|
|
105
|
+
encoding="utf-8", errors="replace", **run_kwargs)
|
|
106
|
+
return proc.returncode, proc.stdout or "", proc.stderr or "", False
|
|
107
|
+
except subprocess.TimeoutExpired as exc:
|
|
108
|
+
out, err = exc.stdout or "", exc.stderr or ""
|
|
109
|
+
if isinstance(out, bytes):
|
|
110
|
+
out = out.decode("utf-8", "replace")
|
|
111
|
+
if isinstance(err, bytes):
|
|
112
|
+
err = err.decode("utf-8", "replace")
|
|
113
|
+
return -1, out, err, True
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# --- Layer interfaces (Protocols) ---------------------------------------------
|
|
117
|
+
|
|
118
|
+
class Sandbox(Protocol): # an open sandbox handle (from L4)
|
|
119
|
+
workdir: str
|
|
120
|
+
def diff(self) -> str: ...
|
|
121
|
+
def close(self) -> None: ...
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@runtime_checkable
|
|
125
|
+
class SpecAdapter(Protocol): # L2: Task -> Bounds
|
|
126
|
+
def plan(self, task: Task) -> Bounds: ...
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@runtime_checkable
|
|
130
|
+
class SandboxAdapter(Protocol): # L4: Change -> Change (contained)
|
|
131
|
+
def open(self, repo: str) -> Sandbox: ...
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@runtime_checkable
|
|
135
|
+
class ExecuteAdapter(Protocol): # L3: Task+Bounds(+Context) -> Change
|
|
136
|
+
def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
|
|
137
|
+
feedback: str | None) -> ExecutionResult: ...
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@runtime_checkable
|
|
141
|
+
class VerifyAdapter(Protocol): # L5: Change+Bounds -> Verdict
|
|
142
|
+
def verify(self, bounds: Bounds, result: ExecutionResult,
|
|
143
|
+
strict: bool) -> Verdict: ...
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@runtime_checkable
|
|
147
|
+
class ReconcileAdapter(Protocol): # L5.5: SpecGraph+CodeGraph -> report
|
|
148
|
+
def reconcile(self, spec_graph: SpecGraph, code_graph: dict) -> ReconciliationReport:
|
|
149
|
+
...
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@runtime_checkable
|
|
153
|
+
class MergeAdapter(Protocol): # L6.5: Verdict(PASS) -> MergeRecord
|
|
154
|
+
def merge(self, repo: str, *, into: str = "main", source: str = "HEAD",
|
|
155
|
+
no_ff: bool = True, message: str | None = None) -> MergeRecord:
|
|
156
|
+
...
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@runtime_checkable
|
|
160
|
+
class DeployAdapter(Protocol): # L7: Verdict(PASS) -> Delivery; rollback reverts it
|
|
161
|
+
def deploy(self, repo: str, *, production: bool = False,
|
|
162
|
+
prebuilt: bool = False) -> Delivery:
|
|
163
|
+
...
|
|
164
|
+
|
|
165
|
+
def rollback(self, repo: str, *, to: str | None = None) -> Delivery:
|
|
166
|
+
...
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@runtime_checkable
|
|
170
|
+
class PostDeployAdapter(Protocol): # L8: Delivery -> Verdict
|
|
171
|
+
def verify(self, delivery: Delivery, *, health_path: str = "/",
|
|
172
|
+
timeout_s: float = 10.0) -> Verdict:
|
|
173
|
+
...
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@runtime_checkable
|
|
177
|
+
class ReviewAdapter(Protocol): # L5.5 quality: a diff -> ReviewReport (advisory)
|
|
178
|
+
def review(self, diff: str, *, reviewer_hint: str = "") -> ReviewReport:
|
|
179
|
+
...
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""L5.5 code-graph source — drive codebase-memory-mcp (CBM) headlessly.
|
|
2
|
+
|
|
3
|
+
reconcile (S9) compares a SpecGraph against a code graph. Previously the code graph was a
|
|
4
|
+
hand-passed JSON file; this adapter produces it LIVE from a real CBM index so a per-PR reconcile
|
|
5
|
+
needs no manual step. CBM is driven via its single-shot CLI (`cbm cli <tool> <json-args>`) — the
|
|
6
|
+
same subprocess containment as the symgraph adapter, never a package dependency. Advisory only: a
|
|
7
|
+
failure returns an empty graph (reconcile then reports UNKNOWN), never an exception that could be
|
|
8
|
+
mistaken for a gate.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import shutil
|
|
14
|
+
import subprocess
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CbmCodeGraph:
|
|
19
|
+
"""Drives the codebase-memory-mcp binary to export a code graph for reconciliation."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, binary: str = "codebase-memory-mcp", timeout: int = 600,
|
|
22
|
+
limit: int = 5000):
|
|
23
|
+
self.binary = binary
|
|
24
|
+
self.timeout = timeout
|
|
25
|
+
self.limit = limit
|
|
26
|
+
|
|
27
|
+
def _exe(self) -> str | None:
|
|
28
|
+
return shutil.which(self.binary)
|
|
29
|
+
|
|
30
|
+
def available(self) -> bool:
|
|
31
|
+
return self._exe() is not None
|
|
32
|
+
|
|
33
|
+
def _run(self, tool: str, payload: dict) -> dict:
|
|
34
|
+
exe = self._exe()
|
|
35
|
+
if not exe:
|
|
36
|
+
return {}
|
|
37
|
+
try:
|
|
38
|
+
proc = subprocess.run(
|
|
39
|
+
[exe, "cli", tool, json.dumps(payload)],
|
|
40
|
+
capture_output=True, text=True, encoding="utf-8", errors="replace",
|
|
41
|
+
timeout=self.timeout)
|
|
42
|
+
except (OSError, subprocess.TimeoutExpired):
|
|
43
|
+
return {}
|
|
44
|
+
return _parse_json(proc.stdout)
|
|
45
|
+
|
|
46
|
+
def _project_slug(self, repo: str) -> str | None:
|
|
47
|
+
target = _norm(str(Path(repo).resolve()))
|
|
48
|
+
listing = self._run("list_projects", {})
|
|
49
|
+
for proj in listing.get("projects", []):
|
|
50
|
+
if _norm(proj.get("root_path", "")) == target and proj.get("name"):
|
|
51
|
+
return proj["name"]
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
def code_graph(self, repo: str, *, index: bool = True) -> dict:
|
|
55
|
+
"""Return a CBM code-graph payload `{"results":[...]}` reconcile can consume.
|
|
56
|
+
|
|
57
|
+
Indexes the repo (idempotent refresh), resolves the project slug via CBM's own
|
|
58
|
+
list_projects mapping, then pulls every node with a broad pattern. Returns `{}` on any
|
|
59
|
+
failure — reconcile degrades to UNKNOWN, never blocks.
|
|
60
|
+
"""
|
|
61
|
+
if index:
|
|
62
|
+
# CBM's tool contract requires `repo_path` (`path` is silently rejected);
|
|
63
|
+
# `mode: fast` skips similarity/semantic edges — reconcile only needs symbols.
|
|
64
|
+
self._run("index_repository",
|
|
65
|
+
{"repo_path": str(Path(repo).resolve()), "mode": "fast"})
|
|
66
|
+
slug = self._project_slug(repo)
|
|
67
|
+
if not slug:
|
|
68
|
+
return {}
|
|
69
|
+
return self._run(
|
|
70
|
+
"search_graph",
|
|
71
|
+
{"project": slug, "name_pattern": ".", "limit": self.limit})
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _norm(p: str) -> str:
|
|
75
|
+
return p.replace("\\", "/").strip().rstrip("/").lower()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _parse_json(text: str | None) -> dict:
|
|
79
|
+
"""Parse CBM stdout, tolerating a leading `level=info ...` log line."""
|
|
80
|
+
if not text:
|
|
81
|
+
return {}
|
|
82
|
+
try:
|
|
83
|
+
out = json.loads(text)
|
|
84
|
+
return out if isinstance(out, dict) else {}
|
|
85
|
+
except json.JSONDecodeError:
|
|
86
|
+
pass
|
|
87
|
+
for line in text.splitlines():
|
|
88
|
+
line = line.strip()
|
|
89
|
+
if line.startswith("{"):
|
|
90
|
+
try:
|
|
91
|
+
out = json.loads(line)
|
|
92
|
+
return out if isinstance(out, dict) else {}
|
|
93
|
+
except json.JSONDecodeError:
|
|
94
|
+
continue
|
|
95
|
+
return {}
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""L7 deploy adapter for Vercel CLI.
|
|
2
|
+
|
|
3
|
+
The stage owns the Delivery artifact, not the hosting mechanism. Credentials are
|
|
4
|
+
left to the local Vercel CLI environment (`vercel login`, `VERCEL_TOKEN`, or linked
|
|
5
|
+
project config) and are never copied into the artifact.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
import shutil
|
|
11
|
+
import subprocess
|
|
12
|
+
import time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from ._redact import summarize
|
|
16
|
+
from .base import Delivery
|
|
17
|
+
|
|
18
|
+
_URL_RE = re.compile(r"https://[^\s]+")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class VercelDeployAdapter:
|
|
22
|
+
def __init__(self, timeout: int = 1800, yes: bool = True):
|
|
23
|
+
self.timeout = timeout
|
|
24
|
+
self.yes = yes
|
|
25
|
+
|
|
26
|
+
def available(self) -> bool:
|
|
27
|
+
return bool(_resolve_vercel())
|
|
28
|
+
|
|
29
|
+
def deploy(self, repo: str, *, production: bool = False,
|
|
30
|
+
prebuilt: bool = False) -> Delivery:
|
|
31
|
+
repo_path = str(Path(repo).resolve())
|
|
32
|
+
cmd = _resolve_vercel()
|
|
33
|
+
if not cmd:
|
|
34
|
+
return Delivery(target="vercel", status="failed",
|
|
35
|
+
data={"reason": "vercel CLI not found on PATH"})
|
|
36
|
+
if prebuilt:
|
|
37
|
+
cmd.append("deploy")
|
|
38
|
+
cmd.append("--prebuilt")
|
|
39
|
+
if production:
|
|
40
|
+
cmd.append("--prod")
|
|
41
|
+
if self.yes:
|
|
42
|
+
cmd.append("--yes")
|
|
43
|
+
|
|
44
|
+
t0 = time.perf_counter()
|
|
45
|
+
try:
|
|
46
|
+
proc = subprocess.run(
|
|
47
|
+
cmd, cwd=repo_path, capture_output=True, text=True,
|
|
48
|
+
encoding="utf-8", errors="replace", timeout=self.timeout)
|
|
49
|
+
except subprocess.TimeoutExpired as exc:
|
|
50
|
+
return Delivery(
|
|
51
|
+
target="vercel",
|
|
52
|
+
status="failed",
|
|
53
|
+
data={
|
|
54
|
+
"reason": "timeout",
|
|
55
|
+
"latency_s": round(time.perf_counter() - t0, 3),
|
|
56
|
+
"command": _safe_command(cmd),
|
|
57
|
+
"stdout": summarize(exc.stdout),
|
|
58
|
+
"stderr": summarize(exc.stderr),
|
|
59
|
+
},
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
stdout = proc.stdout or ""
|
|
63
|
+
stderr = proc.stderr or ""
|
|
64
|
+
url = _last_url(stdout) or _last_url(stderr)
|
|
65
|
+
status = "deployed" if proc.returncode == 0 and url else "failed"
|
|
66
|
+
return Delivery(
|
|
67
|
+
target="vercel",
|
|
68
|
+
url=url,
|
|
69
|
+
status=status,
|
|
70
|
+
data={
|
|
71
|
+
"production": production,
|
|
72
|
+
"prebuilt": prebuilt,
|
|
73
|
+
"returncode": proc.returncode,
|
|
74
|
+
"latency_s": round(time.perf_counter() - t0, 3),
|
|
75
|
+
"command": _safe_command(cmd),
|
|
76
|
+
"stdout": summarize(stdout),
|
|
77
|
+
"stderr": summarize(stderr),
|
|
78
|
+
},
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def rollback(self, repo: str, *, to: str | None = None) -> Delivery:
|
|
82
|
+
"""Promote the previous production deployment (Vercel rollback).
|
|
83
|
+
|
|
84
|
+
Mechanism only: the decision to roll back is the caller's (the L8 gate Verdict).
|
|
85
|
+
`to` optionally names a specific deployment URL/id to roll back to. When omitted,
|
|
86
|
+
this looks up the immediately previous production deployment and rolls back to it
|
|
87
|
+
explicitly — verified live against a real deploy: bare `vercel rollback` (no target)
|
|
88
|
+
only reports in-progress rollback *status* on current CLI versions ("No deployment
|
|
89
|
+
rollback in progress", exit 0) rather than performing one, so the old
|
|
90
|
+
target-less/omitted call silently never rolled anything back.
|
|
91
|
+
"""
|
|
92
|
+
repo_path = str(Path(repo).resolve())
|
|
93
|
+
if not _resolve_vercel():
|
|
94
|
+
return Delivery(target="vercel", status="rollback_failed",
|
|
95
|
+
data={"reason": "vercel CLI not found on PATH"})
|
|
96
|
+
if not to:
|
|
97
|
+
to = self._previous_production_url(repo_path)
|
|
98
|
+
if not to:
|
|
99
|
+
return Delivery(
|
|
100
|
+
target="vercel",
|
|
101
|
+
status="rollback_failed",
|
|
102
|
+
data={"reason": "no previous production deployment found"},
|
|
103
|
+
)
|
|
104
|
+
cmd = _resolve_vercel() + ["rollback", to]
|
|
105
|
+
if self.yes:
|
|
106
|
+
cmd.append("--yes")
|
|
107
|
+
|
|
108
|
+
t0 = time.perf_counter()
|
|
109
|
+
try:
|
|
110
|
+
proc = subprocess.run(
|
|
111
|
+
cmd, cwd=repo_path, capture_output=True, text=True,
|
|
112
|
+
encoding="utf-8", errors="replace", timeout=self.timeout)
|
|
113
|
+
except subprocess.TimeoutExpired as exc:
|
|
114
|
+
return Delivery(
|
|
115
|
+
target="vercel",
|
|
116
|
+
status="rollback_failed",
|
|
117
|
+
data={
|
|
118
|
+
"reason": "timeout",
|
|
119
|
+
"latency_s": round(time.perf_counter() - t0, 3),
|
|
120
|
+
"command": _safe_command(cmd),
|
|
121
|
+
"stdout": summarize(exc.stdout),
|
|
122
|
+
"stderr": summarize(exc.stderr),
|
|
123
|
+
},
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
stdout = proc.stdout or ""
|
|
127
|
+
stderr = proc.stderr or ""
|
|
128
|
+
url = _last_url(stdout) or _last_url(stderr)
|
|
129
|
+
status = "rolled_back" if proc.returncode == 0 else "rollback_failed"
|
|
130
|
+
return Delivery(
|
|
131
|
+
target="vercel",
|
|
132
|
+
url=url,
|
|
133
|
+
status=status,
|
|
134
|
+
data={
|
|
135
|
+
"rolled_back_to": to,
|
|
136
|
+
"returncode": proc.returncode,
|
|
137
|
+
"latency_s": round(time.perf_counter() - t0, 3),
|
|
138
|
+
"command": _safe_command(cmd),
|
|
139
|
+
"stdout": summarize(stdout),
|
|
140
|
+
"stderr": summarize(stderr),
|
|
141
|
+
},
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def _previous_production_url(self, repo_path: str) -> str | None:
|
|
145
|
+
"""The production deployment immediately before the current one.
|
|
146
|
+
|
|
147
|
+
`vercel ls --prod` prints a scriptable bare-URL list at the end of its output,
|
|
148
|
+
newest first — index 0 is the (bad) deployment we're rolling back FROM, index 1 is
|
|
149
|
+
the one to roll back TO. Returns None if there's no prior production deployment.
|
|
150
|
+
"""
|
|
151
|
+
try:
|
|
152
|
+
proc = subprocess.run(
|
|
153
|
+
_resolve_vercel() + ["ls", "--prod"], cwd=repo_path,
|
|
154
|
+
capture_output=True, text=True, encoding="utf-8", errors="replace",
|
|
155
|
+
timeout=self.timeout)
|
|
156
|
+
except subprocess.TimeoutExpired:
|
|
157
|
+
return None
|
|
158
|
+
urls = [ln.strip() for ln in (proc.stdout or "").splitlines()
|
|
159
|
+
if ln.strip().startswith("https://")]
|
|
160
|
+
return urls[1] if len(urls) > 1 else None
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _resolve_vercel() -> list[str]:
|
|
164
|
+
"""Return the argv prefix that actually launches the Vercel CLI.
|
|
165
|
+
|
|
166
|
+
On Windows, npm installs `vercel` as a `.cmd`/`.ps1` shim (pure-JS package, no vendored
|
|
167
|
+
native binary like opencode has) — `subprocess.run(["vercel", ...])` without a shell
|
|
168
|
+
raises `FileNotFoundError` because CreateProcess can't launch a batch file directly.
|
|
169
|
+
Route through the shim's own interpreter instead. On POSIX `which` already returns the
|
|
170
|
+
real (shebang'd) executable, so it needs no wrapping.
|
|
171
|
+
"""
|
|
172
|
+
exe = shutil.which("vercel")
|
|
173
|
+
if not exe:
|
|
174
|
+
return []
|
|
175
|
+
low = exe.lower()
|
|
176
|
+
if low.endswith((".cmd", ".bat")):
|
|
177
|
+
return ["cmd", "/c", exe]
|
|
178
|
+
if low.endswith(".ps1"):
|
|
179
|
+
return ["powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", exe]
|
|
180
|
+
return [exe]
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
_DEPLOYMENT_HOST_RE = re.compile(r"^https://[^/\s]+\.vercel\.app(?:/|$)")
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _last_url(text: str | None) -> str | None:
|
|
187
|
+
"""The last *deployment* URL Vercel CLI printed — never a dashboard/API link.
|
|
188
|
+
|
|
189
|
+
Live-proof finding: the CLI interleaves other `https://` links into stdout alongside
|
|
190
|
+
the human-facing deployment URL — an `https://vercel.com/<team>/<project>/<id>`
|
|
191
|
+
dashboard "Inspect" link, and on some versions an `https://api.vercel.com/v13/
|
|
192
|
+
deployments/...` internal status-poll call. Picking the textually-last URL without
|
|
193
|
+
filtering returns one of those instead, which then silently breaks every downstream
|
|
194
|
+
health check against it. Every real deployment/preview/production URL Vercel serves
|
|
195
|
+
lives on the `*.vercel.app` domain (never `vercel.com`/`api.vercel.com`), so prefer
|
|
196
|
+
matches on that host; fall back to any match only if that's all there is.
|
|
197
|
+
"""
|
|
198
|
+
urls = _URL_RE.findall(text or "")
|
|
199
|
+
preferred = [u for u in urls if _DEPLOYMENT_HOST_RE.match(u)]
|
|
200
|
+
picked = preferred or urls
|
|
201
|
+
return picked[-1].rstrip(".,)\"'") if picked else None
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _safe_command(cmd: list[str]) -> list[str]:
|
|
205
|
+
safe: list[str] = []
|
|
206
|
+
redact_next = False
|
|
207
|
+
for part in cmd:
|
|
208
|
+
if redact_next:
|
|
209
|
+
safe.append("<redacted>")
|
|
210
|
+
redact_next = False
|
|
211
|
+
continue
|
|
212
|
+
safe.append(part)
|
|
213
|
+
if part == "--token":
|
|
214
|
+
redact_next = True
|
|
215
|
+
return safe
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""L3 executor: Aider (OSS) driven headless in the sandbox.
|
|
2
|
+
|
|
3
|
+
Hands the task (plus the gate's feedback on retry, and the in-scope file list) to a
|
|
4
|
+
non-interactive `aider --message ...` run inside the worktree, then reads back the diff.
|
|
5
|
+
Aider resolves its own model credentials from the environment (e.g. OPENAI_API_BASE +
|
|
6
|
+
OPENAI_API_KEY for an OpenAI-compatible router, or ANTHROPIC_API_KEY) — sembl-stack never
|
|
7
|
+
handles a token. Requires `aider` on PATH.
|
|
8
|
+
|
|
9
|
+
Why the flags:
|
|
10
|
+
--message <prompt> run one instruction non-interactively and exit (headless).
|
|
11
|
+
--yes-always auto-confirm (create files, apply edits) — no TTY prompts.
|
|
12
|
+
--no-auto-commits leave edits in the WORKING TREE so the sandbox's `git diff`
|
|
13
|
+
captures them; otherwise aider commits and the diff looks empty.
|
|
14
|
+
--no-stream / --no-check-update / --no-show-model-warnings
|
|
15
|
+
quiet, deterministic, non-blocking startup.
|
|
16
|
+
The model is a one-line config (`options.execute.model`), e.g. an `openai/<name>` route.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import glob
|
|
21
|
+
import shutil
|
|
22
|
+
import subprocess
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
from .base import (
|
|
26
|
+
Bounds,
|
|
27
|
+
ExecutionResult,
|
|
28
|
+
Sandbox,
|
|
29
|
+
Task,
|
|
30
|
+
changed_files_from_diff as _changed_files,
|
|
31
|
+
run_executor,
|
|
32
|
+
scrub_secrets,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class AiderExecutor:
|
|
37
|
+
def __init__(self, model: str | None = None, timeout: int = 900):
|
|
38
|
+
self.model = model
|
|
39
|
+
self.timeout = timeout
|
|
40
|
+
|
|
41
|
+
def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
|
|
42
|
+
feedback: str | None) -> ExecutionResult:
|
|
43
|
+
exe = shutil.which("aider")
|
|
44
|
+
if not exe:
|
|
45
|
+
raise RuntimeError(
|
|
46
|
+
"L3: `aider` not found on PATH. `pip install aider-chat`, or set execute: mock.")
|
|
47
|
+
|
|
48
|
+
prompt = self._prompt(task, bounds, feedback)
|
|
49
|
+
cmd = [exe, "--yes-always", "--no-auto-commits", "--no-stream",
|
|
50
|
+
"--no-check-update", "--no-show-model-warnings", "--no-gitignore"]
|
|
51
|
+
if self.model:
|
|
52
|
+
cmd += ["--model", self.model]
|
|
53
|
+
cmd += ["--message", prompt]
|
|
54
|
+
cmd += _file_targets(bounds) # focus aider on the in-scope files
|
|
55
|
+
rc, out, err, timed_out = run_executor(
|
|
56
|
+
cmd, cwd=sandbox.workdir, timeout=self.timeout, stdin=subprocess.DEVNULL)
|
|
57
|
+
|
|
58
|
+
_clean_aider_scratch(sandbox.workdir) # drop aider's own .aider* artifacts
|
|
59
|
+
diff = sandbox.diff()
|
|
60
|
+
report = {
|
|
61
|
+
"files_modified": _changed_files(diff),
|
|
62
|
+
"agent": "aider",
|
|
63
|
+
"model": self.model,
|
|
64
|
+
"exit_code": rc,
|
|
65
|
+
"output": scrub_secrets(out)[-2000:],
|
|
66
|
+
"stderr": scrub_secrets(err)[-1000:],
|
|
67
|
+
}
|
|
68
|
+
if timed_out: # surfaced to the gate as a BLOCK, not a crash
|
|
69
|
+
report["error"] = "timeout"
|
|
70
|
+
report["timed_out"] = True
|
|
71
|
+
return ExecutionResult(diff=diff, report=report, workdir=sandbox.workdir)
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def _prompt(task: Task, bounds: Bounds, feedback: str | None) -> str:
|
|
75
|
+
lines = [task.text, ""]
|
|
76
|
+
if bounds.editable_paths:
|
|
77
|
+
lines.append("You may ONLY edit these paths: "
|
|
78
|
+
+ ", ".join(bounds.editable_paths))
|
|
79
|
+
if bounds.forbidden_areas:
|
|
80
|
+
lines.append("Never touch: " + ", ".join(bounds.forbidden_areas))
|
|
81
|
+
if feedback:
|
|
82
|
+
lines += ["", feedback]
|
|
83
|
+
return "\n".join(lines)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _clean_aider_scratch(workdir: str) -> None:
|
|
87
|
+
"""Remove aider's own working files (`.aider*`) before the diff is captured.
|
|
88
|
+
|
|
89
|
+
Aider writes `.aider.chat.history.md`, `.aider.input.history`, and a
|
|
90
|
+
`.aider.tags.cache.v4/` dir into the working directory. With `--no-gitignore` these are
|
|
91
|
+
untracked clutter that the sandbox diff would otherwise pick up as out-of-scope edits.
|
|
92
|
+
They are aider internals, never part of the change, so we delete them in the disposable
|
|
93
|
+
cage before gating. Best-effort; failures are non-fatal.
|
|
94
|
+
"""
|
|
95
|
+
for p in glob.glob(str(Path(workdir) / ".aider*")):
|
|
96
|
+
path = Path(p)
|
|
97
|
+
try:
|
|
98
|
+
if path.is_dir():
|
|
99
|
+
shutil.rmtree(path, ignore_errors=True)
|
|
100
|
+
else:
|
|
101
|
+
path.unlink(missing_ok=True)
|
|
102
|
+
except OSError:
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _file_targets(bounds: Bounds) -> list[str]:
|
|
107
|
+
"""The concrete files aider should add to the chat (skip directory bounds)."""
|
|
108
|
+
out = []
|
|
109
|
+
for p in bounds.editable_paths:
|
|
110
|
+
p = p.replace("\\", "/")
|
|
111
|
+
if p.endswith("/"):
|
|
112
|
+
continue # a directory prefix, not a file target
|
|
113
|
+
if "." in p.rsplit("/", 1)[-1]: # looks like a file (has an extension)
|
|
114
|
+
out.append(p)
|
|
115
|
+
return out
|