sembl-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sembl_stack/__init__.py +3 -0
- sembl_stack/adapters/__init__.py +0 -0
- sembl_stack/adapters/_redact.py +19 -0
- sembl_stack/adapters/base.py +179 -0
- sembl_stack/adapters/codegraph_cbm.py +95 -0
- sembl_stack/adapters/deploy_vercel.py +215 -0
- sembl_stack/adapters/execute_aider.py +115 -0
- sembl_stack/adapters/execute_claude.py +114 -0
- sembl_stack/adapters/execute_mock.py +53 -0
- sembl_stack/adapters/execute_opencode.py +114 -0
- sembl_stack/adapters/merge_git.py +107 -0
- sembl_stack/adapters/postdeploy_http.py +82 -0
- sembl_stack/adapters/review_coderabbit.py +215 -0
- sembl_stack/adapters/review_llm.py +142 -0
- sembl_stack/adapters/review_mock.py +42 -0
- sembl_stack/adapters/sandbox_worktree.py +79 -0
- sembl_stack/adapters/spec_sembl.py +91 -0
- sembl_stack/adapters/verify_sembl.py +77 -0
- sembl_stack/artifacts.py +207 -0
- sembl_stack/cli.py +759 -0
- sembl_stack/config.py +87 -0
- sembl_stack/contextgraph.py +154 -0
- sembl_stack/doctor.py +111 -0
- sembl_stack/loop.py +380 -0
- sembl_stack/onboarding.py +272 -0
- sembl_stack/presets.py +114 -0
- sembl_stack/profile.py +193 -0
- sembl_stack/reconciliation.py +138 -0
- sembl_stack/registry.py +91 -0
- sembl_stack/rsi.py +188 -0
- sembl_stack/runner.py +134 -0
- sembl_stack/session.py +86 -0
- sembl_stack/specgraph.py +146 -0
- sembl_stack/store.py +112 -0
- sembl_stack/tracing.py +51 -0
- sembl_stack/transport/__init__.py +0 -0
- sembl_stack/transport/mcp_client.py +58 -0
- sembl_stack/tui.py +86 -0
- sembl_stack/views.py +74 -0
- sembl_stack/wizard.py +233 -0
- sembl_stack-0.1.0.dist-info/METADATA +165 -0
- sembl_stack-0.1.0.dist-info/RECORD +45 -0
- sembl_stack-0.1.0.dist-info/WHEEL +4 -0
- sembl_stack-0.1.0.dist-info/entry_points.txt +2 -0
- sembl_stack-0.1.0.dist-info/licenses/LICENSE +201 -0
sembl_stack/loop.py
ADDED
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""L6 orchestration: the short loop as a state machine.
|
|
2
|
+
|
|
3
|
+
plan (L2) -> execute (L3, in a fresh sandbox L4) -> verify (L5) ->
|
|
4
|
+
BLOCK & attempts left? feed the gate's reasons back and retry
|
|
5
|
+
PASS/WARN? accept.
|
|
6
|
+
|
|
7
|
+
Driven by LangGraph when installed (real retry graph + checkpointable), with a
|
|
8
|
+
built-in fallback runner of identical semantics so the loop boots with zero extra
|
|
9
|
+
installs. Every node is wrapped in a Langfuse span via the tracer.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import subprocess
|
|
14
|
+
import time
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import Any, TypedDict
|
|
17
|
+
|
|
18
|
+
from .adapters.base import Task, Verdict
|
|
19
|
+
from .artifacts import Change, Trace, bind_verdict
|
|
20
|
+
from .config import StackConfig
|
|
21
|
+
from .specgraph import build_spec_graph
|
|
22
|
+
from .store import RunStore
|
|
23
|
+
from .tracing import get_tracer
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class LoopState(TypedDict, total=False):
|
|
27
|
+
"""The state threaded through the graph (last-value channels)."""
|
|
28
|
+
attempt: int
|
|
29
|
+
feedback: str | None
|
|
30
|
+
history: list
|
|
31
|
+
bounds: Any
|
|
32
|
+
sandbox: Any
|
|
33
|
+
result: Any
|
|
34
|
+
verdict: Any
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class LoopResult:
|
|
39
|
+
verdict: Verdict
|
|
40
|
+
attempts: int
|
|
41
|
+
history: list = field(default_factory=list) # [(attempt, status), ...]
|
|
42
|
+
workdir: str | None = None
|
|
43
|
+
engine: str = "fallback"
|
|
44
|
+
run_id: str | None = None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _is_empty_change(change) -> bool:
|
|
48
|
+
"""True when the executor produced no substantive change.
|
|
49
|
+
|
|
50
|
+
Not just "no diff": an executor that errored or hit a dead model often *creates an
|
|
51
|
+
empty file*, which has a `diff --git` header but no content — a no-op in substance. So
|
|
52
|
+
the real signal is the absence of added/removed content (or a structural rename/delete/
|
|
53
|
+
copy). `+++`/`---` file markers are skipped; any other `+`/`-` line is real content.
|
|
54
|
+
"""
|
|
55
|
+
diff = getattr(change, "diff", "") or ""
|
|
56
|
+
for line in diff.splitlines():
|
|
57
|
+
s = line.rstrip()
|
|
58
|
+
if s.startswith(("+++", "---")):
|
|
59
|
+
continue
|
|
60
|
+
if s.startswith(("+", "-")):
|
|
61
|
+
return False # real content added or removed
|
|
62
|
+
if s.startswith(("rename ", "deleted file", "copy ")):
|
|
63
|
+
return False # structural change with no +/- body
|
|
64
|
+
return True
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _execution_error(change) -> str | None:
|
|
68
|
+
"""A hard executor failure (timeout / crash) recorded by the adapter, or None.
|
|
69
|
+
|
|
70
|
+
The adapters convert a `TimeoutExpired` / internal crash into `report["error"]`
|
|
71
|
+
instead of letting it abort the loop; this reads that signal back so the verify
|
|
72
|
+
stage can BLOCK rather than the loop raising.
|
|
73
|
+
"""
|
|
74
|
+
report = getattr(change, "report", {}) or {}
|
|
75
|
+
err = report.get("error")
|
|
76
|
+
return str(err) if err else None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _nonzero_exit(change) -> int | None:
|
|
80
|
+
"""The executor's non-zero process exit code, if any (else None).
|
|
81
|
+
|
|
82
|
+
A non-zero exit means the agent process did not finish cleanly. The gate verifies the
|
|
83
|
+
*change* (bounds + claim integrity), not process health — so a non-zero exit that still
|
|
84
|
+
produced an in-scope diff would otherwise PASS silently. The loop surfaces it instead.
|
|
85
|
+
"""
|
|
86
|
+
report = getattr(change, "report", {}) or {}
|
|
87
|
+
rc = report.get("exit_code")
|
|
88
|
+
return rc if isinstance(rc, int) and rc != 0 else None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _usage_tokens(report: dict):
|
|
92
|
+
"""Total tokens an executor reported, if any (C1.3) — best-effort, never required.
|
|
93
|
+
|
|
94
|
+
Accepts a few shapes: `usage.total_tokens`, `usage.tokens`, or a bare `tokens`. Returns
|
|
95
|
+
None when the executor didn't surface usage (the common case for the OAuth/CLI agents).
|
|
96
|
+
"""
|
|
97
|
+
usage = report.get("usage")
|
|
98
|
+
if isinstance(usage, dict):
|
|
99
|
+
return usage.get("total_tokens") or usage.get("tokens")
|
|
100
|
+
return report.get("tokens")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _maybe_expand(cfg: StackConfig, task: Task, bounds, tracer) -> None:
|
|
104
|
+
"""L1 context stage (in-loop): widen `bounds.editable_paths` along the coupling closure.
|
|
105
|
+
|
|
106
|
+
Opt-in via `loop.expand_bounds`. This makes the running loop the fuller pipeline
|
|
107
|
+
(L1→L2→L3→L4→L5) instead of only the `bounds --expand` CLI. It is a no-op — and so leaves
|
|
108
|
+
the gate exactly as strict — when no context adapter is configured/available or the seed
|
|
109
|
+
has no indexed files. Mutates `bounds` in place (one hop, closure-capped; EXP-05).
|
|
110
|
+
"""
|
|
111
|
+
if not (cfg.raw.get("loop", {}) or {}).get("expand_bounds"):
|
|
112
|
+
return
|
|
113
|
+
g = cfg.context
|
|
114
|
+
if g is None or not getattr(g, "available", lambda: False)():
|
|
115
|
+
return
|
|
116
|
+
from .contextgraph import expand_bounds as _eb
|
|
117
|
+
|
|
118
|
+
opts = (cfg.raw.get("options", {}) or {}).get("context", {}) or {}
|
|
119
|
+
with tracer.span("L1.context"):
|
|
120
|
+
g.index(task.repo)
|
|
121
|
+
fg = g.file_graph(task.repo)
|
|
122
|
+
bounds.editable_paths = _eb(
|
|
123
|
+
list(bounds.editable_paths), fg, hops=opts.get("hops", 1),
|
|
124
|
+
min_strength=opts.get("min_strength", 0), max_fraction=opts.get("max_fraction", 0.4))
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# --- L4 isolation guard (defense-in-depth) -----------------------------------
|
|
128
|
+
#
|
|
129
|
+
# The sandbox (L4) clones the repo so the executor (L3) edits ONLY a disposable copy; the
|
|
130
|
+
# gate verifies that copy's diff. But a swapped-in executor can ignore the cage and edit the
|
|
131
|
+
# SOURCE tree instead (this happened live 2026-06-20: `opencode` ignored the inherited cwd
|
|
132
|
+
# and wrote into the source repo until `--dir <sandbox.workdir>` was passed, commit 4a76163).
|
|
133
|
+
# That leak was caught only by eye. These helpers assert — cheaply, once before and once
|
|
134
|
+
# after the run — that the source working tree is left untouched, so a future regression
|
|
135
|
+
# fails LOUD (forced BLOCK) instead of slipping through.
|
|
136
|
+
|
|
137
|
+
_STORE_PREFIX = ".sembl/" # the run store writes here BY DESIGN; never a breach
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _source_tree_status(repo: str) -> set[str] | None:
|
|
141
|
+
"""Snapshot the source repo's dirty working tree, EXCLUDING the `.sembl/` run store.
|
|
142
|
+
|
|
143
|
+
Returns the set of `git status --porcelain` lines for paths outside `.sembl/` (which
|
|
144
|
+
`store.py` writes into the source repo on every run, by design). Returns None — so the
|
|
145
|
+
caller skips the guard gracefully — when `repo` is not a git repo or git is unavailable.
|
|
146
|
+
"""
|
|
147
|
+
try:
|
|
148
|
+
proc = subprocess.run(
|
|
149
|
+
["git", "status", "--porcelain"], cwd=repo,
|
|
150
|
+
capture_output=True, text=True, encoding="utf-8", errors="replace")
|
|
151
|
+
except (OSError, ValueError):
|
|
152
|
+
return None # git missing / bad path: can't guard
|
|
153
|
+
if proc.returncode != 0:
|
|
154
|
+
return None # not a git repo: nothing to guard
|
|
155
|
+
lines: set[str] = set()
|
|
156
|
+
for line in proc.stdout.splitlines():
|
|
157
|
+
if not line.strip():
|
|
158
|
+
continue
|
|
159
|
+
path = line[3:].strip() # porcelain: "XY <path>"
|
|
160
|
+
if " -> " in path: # a rename: "old -> new"
|
|
161
|
+
path = path.split(" -> ", 1)[1]
|
|
162
|
+
path = path.strip().strip('"')
|
|
163
|
+
if path == _STORE_PREFIX.rstrip("/") or path.startswith(_STORE_PREFIX):
|
|
164
|
+
continue # run-store writes are expected
|
|
165
|
+
lines.add(line)
|
|
166
|
+
return lines
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _isolation_breach(before: set[str] | None, after: set[str] | None) -> str | None:
|
|
170
|
+
"""A human reason if the source tree changed during the run (the cage leaked), else None.
|
|
171
|
+
|
|
172
|
+
`before`/`after` are `_source_tree_status` snapshots (or None when unguardable). Any
|
|
173
|
+
difference outside `.sembl/` means the executor wrote into the SOURCE repo instead of the
|
|
174
|
+
disposable clone.
|
|
175
|
+
"""
|
|
176
|
+
if before is None or after is None or before == after:
|
|
177
|
+
return None
|
|
178
|
+
paths = sorted({line[3:].strip().split(" -> ")[-1].strip().strip('"')
|
|
179
|
+
for line in (before ^ after)})
|
|
180
|
+
shown = ", ".join(paths[:5]) + (" …" if len(paths) > 5 else "")
|
|
181
|
+
return ("sandbox isolation breach: the executor modified the source repo "
|
|
182
|
+
f"(unexpected working-tree changes outside {_STORE_PREFIX}: {shown})")
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _nodes(cfg: StackConfig, task: Task, tracer, run, holder: dict | None = None):
|
|
186
|
+
def plan(state: dict) -> dict:
|
|
187
|
+
with tracer.span("L2.plan"):
|
|
188
|
+
bounds = cfg.spec.plan(task)
|
|
189
|
+
run.put(build_spec_graph(task, bounds))
|
|
190
|
+
_maybe_expand(cfg, task, bounds, tracer) # L1: widen along the context graph
|
|
191
|
+
run.put(bounds) # persist Bounds artifact (post-expansion)
|
|
192
|
+
return {"bounds": bounds}
|
|
193
|
+
|
|
194
|
+
def execute(state: dict) -> dict:
|
|
195
|
+
n = state["attempt"] + 1
|
|
196
|
+
prev = state.get("sandbox")
|
|
197
|
+
if prev is not None:
|
|
198
|
+
prev.close() # fresh cage per attempt
|
|
199
|
+
sandbox = cfg.sandbox.open(task.repo)
|
|
200
|
+
if holder is not None:
|
|
201
|
+
holder["sandbox"] = sandbox # so run() can close it on a crash
|
|
202
|
+
t0 = time.perf_counter()
|
|
203
|
+
with tracer.span("L3.execute", attempt=n):
|
|
204
|
+
try:
|
|
205
|
+
result = cfg.execute.run(task, state["bounds"], sandbox,
|
|
206
|
+
state.get("feedback"))
|
|
207
|
+
except Exception as exc:
|
|
208
|
+
# An executor that crashes (or whose subprocess raises past the adapter's
|
|
209
|
+
# own timeout handling) must NOT abort the loop, leak the sandbox, or skip
|
|
210
|
+
# the persisted verdict. Convert the failure into a recorded Change so the
|
|
211
|
+
# verify stage turns it into a BLOCK and the run still completes cleanly.
|
|
212
|
+
diff = ""
|
|
213
|
+
try:
|
|
214
|
+
diff = sandbox.diff()
|
|
215
|
+
except Exception:
|
|
216
|
+
pass
|
|
217
|
+
result = Change(
|
|
218
|
+
diff=diff, workdir=getattr(sandbox, "workdir", ""),
|
|
219
|
+
report={"error": "executor-crashed", "exit_code": -1,
|
|
220
|
+
"detail": repr(exc)})
|
|
221
|
+
latency_s = round(time.perf_counter() - t0, 3)
|
|
222
|
+
|
|
223
|
+
# C1.3: record cost+latency per attempt. Latency is always measured here (wall
|
|
224
|
+
# clock around the executor); tokens/cost ride along only when the adapter reported
|
|
225
|
+
# usage. Stamp latency onto the Change report too so a single artifact is enough.
|
|
226
|
+
report = dict(getattr(result, "report", {}) or {})
|
|
227
|
+
report.setdefault("latency_s", latency_s)
|
|
228
|
+
result.report = report
|
|
229
|
+
run.put(result, name=f"change-{n}") # persist Change per attempt
|
|
230
|
+
run.record_attempt(
|
|
231
|
+
n, latency_s=latency_s, agent=report.get("agent"), model=report.get("model"),
|
|
232
|
+
exit_code=report.get("exit_code"), tokens=_usage_tokens(report),
|
|
233
|
+
cost=report.get("cost"))
|
|
234
|
+
return {"sandbox": sandbox, "result": result}
|
|
235
|
+
|
|
236
|
+
def verify(state: dict) -> dict:
|
|
237
|
+
change = state["result"]
|
|
238
|
+
exec_err = _execution_error(change)
|
|
239
|
+
if exec_err is not None:
|
|
240
|
+
# C1 hardening: a hard executor failure (timeout / crash) is not a verdict the
|
|
241
|
+
# gate can issue — the gate checks a *change*, not process health. Block directly
|
|
242
|
+
# so a timed-out or crashed run never sails through as PASS, and hand the executor
|
|
243
|
+
# actionable feedback on retry. (No gate call: there's nothing trustworthy to
|
|
244
|
+
# verify.)
|
|
245
|
+
verdict = Verdict(
|
|
246
|
+
status="BLOCK",
|
|
247
|
+
reasons=[f"executor failed ({exec_err}) — the task was not implemented; "
|
|
248
|
+
"check the executor/model output"],
|
|
249
|
+
raw={"execution_error": exec_err, "report": getattr(change, "report", {})})
|
|
250
|
+
elif _is_empty_change(change):
|
|
251
|
+
# C1 hardening: a no-op execution (empty diff — e.g. the executor errored, hit a
|
|
252
|
+
# dead model, or wrote nothing) must NOT pass. The gate verifies a *change*; with
|
|
253
|
+
# no change there is nothing that satisfies the task, so block and tell the
|
|
254
|
+
# executor it produced nothing (actionable feedback on retry).
|
|
255
|
+
verdict = Verdict(
|
|
256
|
+
status="BLOCK",
|
|
257
|
+
reasons=["executor produced no changes (empty diff) — the task was not "
|
|
258
|
+
"implemented; check the executor/model output"],
|
|
259
|
+
raw={"empty_diff": True, "report": getattr(change, "report", {})})
|
|
260
|
+
else:
|
|
261
|
+
with tracer.span("L5.verify"):
|
|
262
|
+
verdict = cfg.verify.verify(state["bounds"], change, cfg.strict)
|
|
263
|
+
rc = _nonzero_exit(change)
|
|
264
|
+
if rc is not None and verdict.status == "PASS":
|
|
265
|
+
# The change passed the gate but the executor process exited non-zero — it
|
|
266
|
+
# did not complete cleanly. Don't report an unqualified PASS: downgrade to
|
|
267
|
+
# WARN and record why, so a half-finished run is never mistaken for success.
|
|
268
|
+
verdict = Verdict(
|
|
269
|
+
status="WARN",
|
|
270
|
+
reasons=list(verdict.reasons)
|
|
271
|
+
+ [f"executor exited non-zero (exit_code={rc}); the change was applied "
|
|
272
|
+
"but the run did not complete cleanly"],
|
|
273
|
+
raw={**(getattr(verdict, "raw", {}) or {}), "exit_code": rc})
|
|
274
|
+
# Bind the verdict to the exact diff it judged (also for BLOCKs — harmless),
|
|
275
|
+
# so merge/apply can later refuse a verdict issued for a different change.
|
|
276
|
+
bind_verdict(verdict, getattr(change, "diff", "") or "")
|
|
277
|
+
attempt = state["attempt"] + 1
|
|
278
|
+
run.put(verdict, name=f"verdict-{attempt}")
|
|
279
|
+
return {
|
|
280
|
+
"verdict": verdict,
|
|
281
|
+
"attempt": attempt,
|
|
282
|
+
"feedback": verdict.feedback(),
|
|
283
|
+
"history": state.get("history", []) + [(attempt, verdict.status)],
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
def route(state: dict) -> str:
|
|
287
|
+
if state["verdict"].status in ("PASS", "WARN"):
|
|
288
|
+
return "done"
|
|
289
|
+
return "retry" if state["attempt"] < cfg.max_attempts else "done"
|
|
290
|
+
|
|
291
|
+
return plan, execute, verify, route
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def run(cfg: StackConfig, task: Task) -> LoopResult:
|
|
295
|
+
tracer = get_tracer(cfg.langfuse)
|
|
296
|
+
run_rec = RunStore(task.repo).new_run(task)
|
|
297
|
+
# L4 isolation guard: snapshot the source tree BEFORE any sandbox/executor runs.
|
|
298
|
+
tree_before = _source_tree_status(task.repo)
|
|
299
|
+
holder: dict = {"sandbox": None}
|
|
300
|
+
plan, execute, verify, route = _nodes(cfg, task, tracer, run_rec, holder)
|
|
301
|
+
init = {"attempt": 0, "feedback": None, "history": []}
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
try:
|
|
305
|
+
final, engine = _run_langgraph(plan, execute, verify, route, init)
|
|
306
|
+
except ImportError:
|
|
307
|
+
final, engine = _run_fallback(plan, execute, verify, route, init), "fallback"
|
|
308
|
+
except Exception as exc:
|
|
309
|
+
# A crash in plan/verify (executor crashes are already converted in-node) must not
|
|
310
|
+
# leave the run stuck at "started" with an open sandbox on disk. Close the cage,
|
|
311
|
+
# record the failure, then re-raise so the caller still sees the real error.
|
|
312
|
+
sb = holder.get("sandbox")
|
|
313
|
+
if sb is not None:
|
|
314
|
+
try:
|
|
315
|
+
sb.close()
|
|
316
|
+
except Exception:
|
|
317
|
+
pass
|
|
318
|
+
run_rec.set_status("failed", error=repr(exc)[:500])
|
|
319
|
+
tracer.flush()
|
|
320
|
+
raise
|
|
321
|
+
|
|
322
|
+
sandbox = final.get("sandbox")
|
|
323
|
+
workdir = getattr(sandbox, "workdir", None) if sandbox else None
|
|
324
|
+
if sandbox is not None:
|
|
325
|
+
sandbox.close()
|
|
326
|
+
tracer.flush()
|
|
327
|
+
|
|
328
|
+
# L4 isolation guard: re-snapshot the source tree now that the run is over. If it
|
|
329
|
+
# changed (outside .sembl/), the executor escaped the sandbox and edited the SOURCE repo
|
|
330
|
+
# — a containment breach the gate can't see. Fail LOUD: force the final verdict to BLOCK
|
|
331
|
+
# so the breach is never mistaken for a clean PASS/WARN.
|
|
332
|
+
verdict = final["verdict"]
|
|
333
|
+
breach = _isolation_breach(tree_before, _source_tree_status(task.repo))
|
|
334
|
+
if breach is not None:
|
|
335
|
+
verdict = Verdict(
|
|
336
|
+
status="BLOCK",
|
|
337
|
+
reasons=[breach, *getattr(verdict, "reasons", [])],
|
|
338
|
+
raw={**(getattr(verdict, "raw", {}) or {}), "isolation_breach": True})
|
|
339
|
+
|
|
340
|
+
# Persist the final accepted change under a stable name, then the final verdict, a
|
|
341
|
+
# trace, and the run status. Per-attempt artifacts remain as change-1/verdict-1...
|
|
342
|
+
run_rec.put(final["result"], name="change")
|
|
343
|
+
run_rec.put(verdict)
|
|
344
|
+
run_rec.put(Trace(steps=[{"attempt": a, "status": s} for a, s in final["history"]]))
|
|
345
|
+
log = run_rec.manifest().get("attempts_log", []) # C1.3 per-attempt metrics
|
|
346
|
+
total_latency_s = round(sum(e.get("latency_s", 0) for e in log), 3)
|
|
347
|
+
run_rec.set_status(verdict.status,
|
|
348
|
+
attempts=final["attempt"], engine=engine,
|
|
349
|
+
total_latency_s=total_latency_s)
|
|
350
|
+
|
|
351
|
+
return LoopResult(
|
|
352
|
+
verdict=verdict, attempts=final["attempt"],
|
|
353
|
+
history=final["history"], workdir=workdir, engine=engine,
|
|
354
|
+
run_id=run_rec.id,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _run_langgraph(plan, execute, verify, route, init):
|
|
359
|
+
from langgraph.graph import StateGraph, END # raises ImportError if absent
|
|
360
|
+
|
|
361
|
+
g = StateGraph(LoopState)
|
|
362
|
+
g.add_node("plan", plan)
|
|
363
|
+
g.add_node("execute", execute)
|
|
364
|
+
g.add_node("verify", verify)
|
|
365
|
+
g.set_entry_point("plan")
|
|
366
|
+
g.add_edge("plan", "execute")
|
|
367
|
+
g.add_edge("execute", "verify")
|
|
368
|
+
g.add_conditional_edges("verify", route, {"retry": "execute", "done": END})
|
|
369
|
+
app = g.compile()
|
|
370
|
+
return app.invoke(init, {"recursion_limit": 50}), "langgraph"
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def _run_fallback(plan, execute, verify, route, init):
|
|
374
|
+
state = dict(init)
|
|
375
|
+
state.update(plan(state))
|
|
376
|
+
while True:
|
|
377
|
+
state.update(execute(state))
|
|
378
|
+
state.update(verify(state))
|
|
379
|
+
if route(state) == "done":
|
|
380
|
+
return state
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
"""Phase-1 first-run onboarding TUI for the BYO profile.
|
|
2
|
+
|
|
3
|
+
This module is a thin Textual guide over `profile.py`: welcome, choose how model
|
|
4
|
+
calls are paid for, capture a few preferences, persist `~/.sembl/profile.json`,
|
|
5
|
+
then return control to the Phase-0 stage rail. It is deliberately tolerant of bad
|
|
6
|
+
state and keeps all credential decisions in the deterministic profile core.
|
|
7
|
+
Textual is optional; callers use `available()` before launching.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
from . import presets, profile
|
|
14
|
+
from .session import resume_or_new, save as save_session
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from textual.app import App, ComposeResult
|
|
18
|
+
from textual.containers import Vertical
|
|
19
|
+
from textual.widgets import (
|
|
20
|
+
Button,
|
|
21
|
+
Checkbox,
|
|
22
|
+
Collapsible,
|
|
23
|
+
Footer,
|
|
24
|
+
Header,
|
|
25
|
+
Input,
|
|
26
|
+
Select,
|
|
27
|
+
Static,
|
|
28
|
+
)
|
|
29
|
+
_HAVE_TEXTUAL = True
|
|
30
|
+
except ImportError: # textual not installed - degrade gracefully
|
|
31
|
+
_HAVE_TEXTUAL = False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
RUNNER_CHOICES = {
|
|
35
|
+
"claude-login": ("Use my Claude Code login", "claude"),
|
|
36
|
+
"api-key": ("Use my API key", "claude"),
|
|
37
|
+
"local": ("Use a local model", "opencode"),
|
|
38
|
+
"mock": ("Preview the mechanics (no AI)", "mock"),
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
_ENV_NAME = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def available() -> bool:
|
|
45
|
+
return _HAVE_TEXTUAL
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def launch(repo: str = ".") -> "profile.Profile | None":
|
|
49
|
+
"""Launch first-run onboarding. Caller launches `wizard.launch(repo)` after."""
|
|
50
|
+
if not _HAVE_TEXTUAL:
|
|
51
|
+
raise RuntimeError("textual not installed - `pip install \"sembl-stack[tui]\"`")
|
|
52
|
+
return OnboardingApp(repo=repo).run()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def env_var_options() -> list[str]:
|
|
56
|
+
return list(profile._KEY_ENV_VARS)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def normalize_env_var_name(name: str) -> str:
|
|
60
|
+
cleaned = name.strip()
|
|
61
|
+
if not _ENV_NAME.match(cleaned):
|
|
62
|
+
raise ValueError("enter an environment variable name, not an API key value")
|
|
63
|
+
return cleaned
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def api_key_source(selected: str | None, custom: str | None) -> str:
|
|
67
|
+
custom_name = (custom or "").strip()
|
|
68
|
+
name = normalize_env_var_name(custom_name or (selected or ""))
|
|
69
|
+
return f"env:{name}"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def profile_for_runner(
|
|
73
|
+
runner: str,
|
|
74
|
+
*,
|
|
75
|
+
key_env: str | None = None,
|
|
76
|
+
custom_key_env: str | None = None,
|
|
77
|
+
executor: str | None = None,
|
|
78
|
+
model: str | None = None,
|
|
79
|
+
strict: bool = True,
|
|
80
|
+
preset: str | None = None,
|
|
81
|
+
) -> profile.Profile:
|
|
82
|
+
"""Build a Profile from widget state without reading any API key value."""
|
|
83
|
+
if runner not in RUNNER_CHOICES:
|
|
84
|
+
raise ValueError(f"unknown runner: {runner}")
|
|
85
|
+
default_executor = RUNNER_CHOICES[runner][1]
|
|
86
|
+
key_source = None
|
|
87
|
+
if runner == "api-key":
|
|
88
|
+
key_source = api_key_source(key_env, custom_key_env)
|
|
89
|
+
model = (model or "").strip() or None
|
|
90
|
+
preset = preset or None
|
|
91
|
+
return profile.Profile(
|
|
92
|
+
runner=runner,
|
|
93
|
+
executor=executor or default_executor,
|
|
94
|
+
model=model,
|
|
95
|
+
key_source=key_source,
|
|
96
|
+
strict=strict,
|
|
97
|
+
preset=preset,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def first_fix_hint(candidate: profile.Profile) -> tuple[bool, str]:
|
|
102
|
+
checks = profile.preflight(candidate)
|
|
103
|
+
return profile.ready(checks)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
if _HAVE_TEXTUAL:
|
|
107
|
+
|
|
108
|
+
class OnboardingApp(App):
|
|
109
|
+
"""First-run BYO setup: welcome -> runner choice -> preferences -> profile."""
|
|
110
|
+
|
|
111
|
+
TITLE = "sembl-stack"
|
|
112
|
+
SUB_TITLE = "first-run setup"
|
|
113
|
+
BINDINGS = [("q", "quit", "Quit")]
|
|
114
|
+
CSS = """
|
|
115
|
+
#onboarding { padding: 1 2; }
|
|
116
|
+
.screen { display: none; }
|
|
117
|
+
.active { display: block; }
|
|
118
|
+
.choice { width: 100%; margin: 0 0 1 0; }
|
|
119
|
+
#mock-choice { opacity: 70%; }
|
|
120
|
+
#byo-hint { color: $error; margin: 1 0 0 0; }
|
|
121
|
+
#prefs-error { color: $error; margin: 1 0 0 0; }
|
|
122
|
+
Select, Input { margin: 0 0 1 0; }
|
|
123
|
+
Button { margin: 0 1 1 0; }
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
def __init__(self, repo: str = "."):
|
|
127
|
+
super().__init__()
|
|
128
|
+
self.repo = repo
|
|
129
|
+
detected = profile.detect()
|
|
130
|
+
self._runner = detected.runner
|
|
131
|
+
self._key_env = (
|
|
132
|
+
detected.key_source.removeprefix("env:")
|
|
133
|
+
if detected.key_source and detected.key_source.startswith("env:")
|
|
134
|
+
else profile._KEY_ENV_VARS[0]
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def compose(self) -> "ComposeResult":
|
|
138
|
+
yield Header()
|
|
139
|
+
with Vertical(id="onboarding"):
|
|
140
|
+
with Vertical(id="welcome-screen", classes="screen active"):
|
|
141
|
+
yield Static(
|
|
142
|
+
"sembl-stack puts an accountability gate around an AI coding loop.",
|
|
143
|
+
id="welcome-copy",
|
|
144
|
+
)
|
|
145
|
+
yield Static(
|
|
146
|
+
"It runs on your own keys, local model, or Claude Code login.",
|
|
147
|
+
id="keys-copy",
|
|
148
|
+
)
|
|
149
|
+
yield Button("Continue", id="welcome-next", variant="primary")
|
|
150
|
+
|
|
151
|
+
with Vertical(id="byo-screen", classes="screen"):
|
|
152
|
+
yield Static("Choose how sembl-stack should run AI work.", id="byo-title")
|
|
153
|
+
yield Button(RUNNER_CHOICES["claude-login"][0], id="claude-choice", classes="choice")
|
|
154
|
+
yield Button(RUNNER_CHOICES["api-key"][0], id="api-choice", classes="choice")
|
|
155
|
+
yield Select(
|
|
156
|
+
[(name, name) for name in env_var_options()],
|
|
157
|
+
value=self._key_env if self._key_env in env_var_options() else env_var_options()[0],
|
|
158
|
+
id="api-env",
|
|
159
|
+
)
|
|
160
|
+
yield Input(placeholder="OTHER_API_KEY_ENV", id="api-env-custom")
|
|
161
|
+
yield Button(RUNNER_CHOICES["local"][0], id="local-choice", classes="choice")
|
|
162
|
+
yield Button(RUNNER_CHOICES["mock"][0], id="mock-choice", classes="choice")
|
|
163
|
+
yield Static("", id="byo-hint")
|
|
164
|
+
|
|
165
|
+
with Vertical(id="prefs-screen", classes="screen"):
|
|
166
|
+
yield Static("Preferences", id="prefs-title")
|
|
167
|
+
yield Select([("Existing repo", "existing"), ("New repo", "new")],
|
|
168
|
+
value="existing", id="repo-mode")
|
|
169
|
+
yield Checkbox("Strict gate", value=True, id="strict")
|
|
170
|
+
yield Select([("No preset", "")] + [(name, name) for name in presets.names()],
|
|
171
|
+
value="", id="preset")
|
|
172
|
+
with Collapsible(title="Advanced", collapsed=True, id="advanced"):
|
|
173
|
+
yield Select([(name, name) for name in ("claude", "opencode", "aider", "mock")],
|
|
174
|
+
value=RUNNER_CHOICES[self._runner][1], id="executor")
|
|
175
|
+
yield Input(placeholder="Model (optional)", id="model")
|
|
176
|
+
yield Button("Go", id="finish", variant="primary")
|
|
177
|
+
yield Static("", id="prefs-error")
|
|
178
|
+
yield Footer()
|
|
179
|
+
|
|
180
|
+
def on_mount(self) -> None:
|
|
181
|
+
self._mark_selected()
|
|
182
|
+
|
|
183
|
+
def on_button_pressed(self, event) -> None:
|
|
184
|
+
button_id = event.button.id
|
|
185
|
+
if button_id == "welcome-next":
|
|
186
|
+
self._show("byo")
|
|
187
|
+
elif button_id in {"claude-choice", "api-choice", "local-choice", "mock-choice"}:
|
|
188
|
+
runner = {
|
|
189
|
+
"claude-choice": "claude-login",
|
|
190
|
+
"api-choice": "api-key",
|
|
191
|
+
"local-choice": "local",
|
|
192
|
+
"mock-choice": "mock",
|
|
193
|
+
}[button_id]
|
|
194
|
+
self._choose_runner(runner)
|
|
195
|
+
elif button_id == "finish":
|
|
196
|
+
self._finish()
|
|
197
|
+
|
|
198
|
+
def _show(self, name: str) -> None:
|
|
199
|
+
for screen_id in ("welcome-screen", "byo-screen", "prefs-screen"):
|
|
200
|
+
screen = self.query_one(f"#{screen_id}")
|
|
201
|
+
screen.remove_class("active")
|
|
202
|
+
self.query_one(f"#{name}-screen").add_class("active")
|
|
203
|
+
|
|
204
|
+
def _choose_runner(self, runner: str) -> None:
|
|
205
|
+
self._runner = runner
|
|
206
|
+
self._mark_selected()
|
|
207
|
+
try:
|
|
208
|
+
candidate = self._candidate_from_inputs()
|
|
209
|
+
except ValueError as exc:
|
|
210
|
+
self.query_one("#byo-hint", Static).update(str(exc))
|
|
211
|
+
return
|
|
212
|
+
ok, hint = first_fix_hint(candidate)
|
|
213
|
+
if not ok:
|
|
214
|
+
self.query_one("#byo-hint", Static).update(hint)
|
|
215
|
+
return
|
|
216
|
+
self.query_one("#byo-hint", Static).update("")
|
|
217
|
+
self.query_one("#executor", Select).value = candidate.executor
|
|
218
|
+
self._show("prefs")
|
|
219
|
+
|
|
220
|
+
def _finish(self) -> None:
|
|
221
|
+
try:
|
|
222
|
+
candidate = self._candidate_from_inputs(include_preferences=True)
|
|
223
|
+
except ValueError as exc:
|
|
224
|
+
self.query_one("#prefs-error", Static).update(str(exc))
|
|
225
|
+
return
|
|
226
|
+
ok, hint = first_fix_hint(candidate)
|
|
227
|
+
if not ok:
|
|
228
|
+
self.query_one("#prefs-error", Static).update(hint)
|
|
229
|
+
return
|
|
230
|
+
try:
|
|
231
|
+
profile.save(candidate)
|
|
232
|
+
except ValueError as exc: # e.g. an API key pasted into the Model field
|
|
233
|
+
self.query_one("#prefs-error", Static).update(str(exc))
|
|
234
|
+
return
|
|
235
|
+
session = resume_or_new(self.repo)
|
|
236
|
+
session.mode = str(self.query_one("#repo-mode", Select).value or "existing")
|
|
237
|
+
save_session(session)
|
|
238
|
+
self.exit(candidate)
|
|
239
|
+
|
|
240
|
+
def _candidate_from_inputs(self, *, include_preferences: bool = False) -> profile.Profile:
|
|
241
|
+
key_env = str(self.query_one("#api-env", Select).value or "")
|
|
242
|
+
custom_key_env = self.query_one("#api-env-custom", Input).value
|
|
243
|
+
executor = RUNNER_CHOICES[self._runner][1]
|
|
244
|
+
model = None
|
|
245
|
+
strict = True
|
|
246
|
+
preset = None
|
|
247
|
+
if include_preferences:
|
|
248
|
+
executor = str(self.query_one("#executor", Select).value or executor)
|
|
249
|
+
model = self.query_one("#model", Input).value
|
|
250
|
+
strict = bool(self.query_one("#strict", Checkbox).value)
|
|
251
|
+
preset_value = str(self.query_one("#preset", Select).value or "")
|
|
252
|
+
preset = preset_value or None
|
|
253
|
+
return profile_for_runner(
|
|
254
|
+
self._runner,
|
|
255
|
+
key_env=key_env,
|
|
256
|
+
custom_key_env=custom_key_env,
|
|
257
|
+
executor=executor,
|
|
258
|
+
model=model,
|
|
259
|
+
strict=strict,
|
|
260
|
+
preset=preset,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
def _mark_selected(self) -> None:
|
|
264
|
+
ids = {
|
|
265
|
+
"claude-login": "claude-choice",
|
|
266
|
+
"api-key": "api-choice",
|
|
267
|
+
"local": "local-choice",
|
|
268
|
+
"mock": "mock-choice",
|
|
269
|
+
}
|
|
270
|
+
for runner, button_id in ids.items():
|
|
271
|
+
button = self.query_one(f"#{button_id}", Button)
|
|
272
|
+
button.variant = "primary" if runner == self._runner else "default"
|