sembl-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. sembl_stack/__init__.py +3 -0
  2. sembl_stack/adapters/__init__.py +0 -0
  3. sembl_stack/adapters/_redact.py +19 -0
  4. sembl_stack/adapters/base.py +179 -0
  5. sembl_stack/adapters/codegraph_cbm.py +95 -0
  6. sembl_stack/adapters/deploy_vercel.py +215 -0
  7. sembl_stack/adapters/execute_aider.py +115 -0
  8. sembl_stack/adapters/execute_claude.py +114 -0
  9. sembl_stack/adapters/execute_mock.py +53 -0
  10. sembl_stack/adapters/execute_opencode.py +114 -0
  11. sembl_stack/adapters/merge_git.py +107 -0
  12. sembl_stack/adapters/postdeploy_http.py +82 -0
  13. sembl_stack/adapters/review_coderabbit.py +215 -0
  14. sembl_stack/adapters/review_llm.py +142 -0
  15. sembl_stack/adapters/review_mock.py +42 -0
  16. sembl_stack/adapters/sandbox_worktree.py +79 -0
  17. sembl_stack/adapters/spec_sembl.py +91 -0
  18. sembl_stack/adapters/verify_sembl.py +77 -0
  19. sembl_stack/artifacts.py +207 -0
  20. sembl_stack/cli.py +759 -0
  21. sembl_stack/config.py +87 -0
  22. sembl_stack/contextgraph.py +154 -0
  23. sembl_stack/doctor.py +111 -0
  24. sembl_stack/loop.py +380 -0
  25. sembl_stack/onboarding.py +272 -0
  26. sembl_stack/presets.py +114 -0
  27. sembl_stack/profile.py +193 -0
  28. sembl_stack/reconciliation.py +138 -0
  29. sembl_stack/registry.py +91 -0
  30. sembl_stack/rsi.py +188 -0
  31. sembl_stack/runner.py +134 -0
  32. sembl_stack/session.py +86 -0
  33. sembl_stack/specgraph.py +146 -0
  34. sembl_stack/store.py +112 -0
  35. sembl_stack/tracing.py +51 -0
  36. sembl_stack/transport/__init__.py +0 -0
  37. sembl_stack/transport/mcp_client.py +58 -0
  38. sembl_stack/tui.py +86 -0
  39. sembl_stack/views.py +74 -0
  40. sembl_stack/wizard.py +233 -0
  41. sembl_stack-0.1.0.dist-info/METADATA +165 -0
  42. sembl_stack-0.1.0.dist-info/RECORD +45 -0
  43. sembl_stack-0.1.0.dist-info/WHEEL +4 -0
  44. sembl_stack-0.1.0.dist-info/entry_points.txt +2 -0
  45. sembl_stack-0.1.0.dist-info/licenses/LICENSE +201 -0
sembl_stack/loop.py ADDED
@@ -0,0 +1,380 @@
1
+ """L6 orchestration: the short loop as a state machine.
2
+
3
+ plan (L2) -> execute (L3, in a fresh sandbox L4) -> verify (L5) ->
4
+ BLOCK & attempts left? feed the gate's reasons back and retry
5
+ PASS/WARN? accept.
6
+
7
+ Driven by LangGraph when installed (real retry graph + checkpointable), with a
8
+ built-in fallback runner of identical semantics so the loop boots with zero extra
9
+ installs. Every node is wrapped in a Langfuse span via the tracer.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import subprocess
14
+ import time
15
+ from dataclasses import dataclass, field
16
+ from typing import Any, TypedDict
17
+
18
+ from .adapters.base import Task, Verdict
19
+ from .artifacts import Change, Trace, bind_verdict
20
+ from .config import StackConfig
21
+ from .specgraph import build_spec_graph
22
+ from .store import RunStore
23
+ from .tracing import get_tracer
24
+
25
+
26
+ class LoopState(TypedDict, total=False):
27
+ """The state threaded through the graph (last-value channels)."""
28
+ attempt: int
29
+ feedback: str | None
30
+ history: list
31
+ bounds: Any
32
+ sandbox: Any
33
+ result: Any
34
+ verdict: Any
35
+
36
+
37
+ @dataclass
38
+ class LoopResult:
39
+ verdict: Verdict
40
+ attempts: int
41
+ history: list = field(default_factory=list) # [(attempt, status), ...]
42
+ workdir: str | None = None
43
+ engine: str = "fallback"
44
+ run_id: str | None = None
45
+
46
+
47
+ def _is_empty_change(change) -> bool:
48
+ """True when the executor produced no substantive change.
49
+
50
+ Not just "no diff": an executor that errored or hit a dead model often *creates an
51
+ empty file*, which has a `diff --git` header but no content — a no-op in substance. So
52
+ the real signal is the absence of added/removed content (or a structural rename/delete/
53
+ copy). `+++`/`---` file markers are skipped; any other `+`/`-` line is real content.
54
+ """
55
+ diff = getattr(change, "diff", "") or ""
56
+ for line in diff.splitlines():
57
+ s = line.rstrip()
58
+ if s.startswith(("+++", "---")):
59
+ continue
60
+ if s.startswith(("+", "-")):
61
+ return False # real content added or removed
62
+ if s.startswith(("rename ", "deleted file", "copy ")):
63
+ return False # structural change with no +/- body
64
+ return True
65
+
66
+
67
+ def _execution_error(change) -> str | None:
68
+ """A hard executor failure (timeout / crash) recorded by the adapter, or None.
69
+
70
+ The adapters convert a `TimeoutExpired` / internal crash into `report["error"]`
71
+ instead of letting it abort the loop; this reads that signal back so the verify
72
+ stage can BLOCK rather than the loop raising.
73
+ """
74
+ report = getattr(change, "report", {}) or {}
75
+ err = report.get("error")
76
+ return str(err) if err else None
77
+
78
+
79
+ def _nonzero_exit(change) -> int | None:
80
+ """The executor's non-zero process exit code, if any (else None).
81
+
82
+ A non-zero exit means the agent process did not finish cleanly. The gate verifies the
83
+ *change* (bounds + claim integrity), not process health — so a non-zero exit that still
84
+ produced an in-scope diff would otherwise PASS silently. The loop surfaces it instead.
85
+ """
86
+ report = getattr(change, "report", {}) or {}
87
+ rc = report.get("exit_code")
88
+ return rc if isinstance(rc, int) and rc != 0 else None
89
+
90
+
91
+ def _usage_tokens(report: dict):
92
+ """Total tokens an executor reported, if any (C1.3) — best-effort, never required.
93
+
94
+ Accepts a few shapes: `usage.total_tokens`, `usage.tokens`, or a bare `tokens`. Returns
95
+ None when the executor didn't surface usage (the common case for the OAuth/CLI agents).
96
+ """
97
+ usage = report.get("usage")
98
+ if isinstance(usage, dict):
99
+ return usage.get("total_tokens") or usage.get("tokens")
100
+ return report.get("tokens")
101
+
102
+
103
+ def _maybe_expand(cfg: StackConfig, task: Task, bounds, tracer) -> None:
104
+ """L1 context stage (in-loop): widen `bounds.editable_paths` along the coupling closure.
105
+
106
+ Opt-in via `loop.expand_bounds`. This makes the running loop the fuller pipeline
107
+ (L1→L2→L3→L4→L5) instead of only the `bounds --expand` CLI. It is a no-op — and so leaves
108
+ the gate exactly as strict — when no context adapter is configured/available or the seed
109
+ has no indexed files. Mutates `bounds` in place (one hop, closure-capped; EXP-05).
110
+ """
111
+ if not (cfg.raw.get("loop", {}) or {}).get("expand_bounds"):
112
+ return
113
+ g = cfg.context
114
+ if g is None or not getattr(g, "available", lambda: False)():
115
+ return
116
+ from .contextgraph import expand_bounds as _eb
117
+
118
+ opts = (cfg.raw.get("options", {}) or {}).get("context", {}) or {}
119
+ with tracer.span("L1.context"):
120
+ g.index(task.repo)
121
+ fg = g.file_graph(task.repo)
122
+ bounds.editable_paths = _eb(
123
+ list(bounds.editable_paths), fg, hops=opts.get("hops", 1),
124
+ min_strength=opts.get("min_strength", 0), max_fraction=opts.get("max_fraction", 0.4))
125
+
126
+
127
+ # --- L4 isolation guard (defense-in-depth) -----------------------------------
128
+ #
129
+ # The sandbox (L4) clones the repo so the executor (L3) edits ONLY a disposable copy; the
130
+ # gate verifies that copy's diff. But a swapped-in executor can ignore the cage and edit the
131
+ # SOURCE tree instead (this happened live 2026-06-20: `opencode` ignored the inherited cwd
132
+ # and wrote into the source repo until `--dir <sandbox.workdir>` was passed, commit 4a76163).
133
+ # That leak was caught only by eye. These helpers assert — cheaply, once before and once
134
+ # after the run — that the source working tree is left untouched, so a future regression
135
+ # fails LOUD (forced BLOCK) instead of slipping through.
136
+
137
+ _STORE_PREFIX = ".sembl/" # the run store writes here BY DESIGN; never a breach
138
+
139
+
140
+ def _source_tree_status(repo: str) -> set[str] | None:
141
+ """Snapshot the source repo's dirty working tree, EXCLUDING the `.sembl/` run store.
142
+
143
+ Returns the set of `git status --porcelain` lines for paths outside `.sembl/` (which
144
+ `store.py` writes into the source repo on every run, by design). Returns None — so the
145
+ caller skips the guard gracefully — when `repo` is not a git repo or git is unavailable.
146
+ """
147
+ try:
148
+ proc = subprocess.run(
149
+ ["git", "status", "--porcelain"], cwd=repo,
150
+ capture_output=True, text=True, encoding="utf-8", errors="replace")
151
+ except (OSError, ValueError):
152
+ return None # git missing / bad path: can't guard
153
+ if proc.returncode != 0:
154
+ return None # not a git repo: nothing to guard
155
+ lines: set[str] = set()
156
+ for line in proc.stdout.splitlines():
157
+ if not line.strip():
158
+ continue
159
+ path = line[3:].strip() # porcelain: "XY <path>"
160
+ if " -> " in path: # a rename: "old -> new"
161
+ path = path.split(" -> ", 1)[1]
162
+ path = path.strip().strip('"')
163
+ if path == _STORE_PREFIX.rstrip("/") or path.startswith(_STORE_PREFIX):
164
+ continue # run-store writes are expected
165
+ lines.add(line)
166
+ return lines
167
+
168
+
169
+ def _isolation_breach(before: set[str] | None, after: set[str] | None) -> str | None:
170
+ """A human reason if the source tree changed during the run (the cage leaked), else None.
171
+
172
+ `before`/`after` are `_source_tree_status` snapshots (or None when unguardable). Any
173
+ difference outside `.sembl/` means the executor wrote into the SOURCE repo instead of the
174
+ disposable clone.
175
+ """
176
+ if before is None or after is None or before == after:
177
+ return None
178
+ paths = sorted({line[3:].strip().split(" -> ")[-1].strip().strip('"')
179
+ for line in (before ^ after)})
180
+ shown = ", ".join(paths[:5]) + (" …" if len(paths) > 5 else "")
181
+ return ("sandbox isolation breach: the executor modified the source repo "
182
+ f"(unexpected working-tree changes outside {_STORE_PREFIX}: {shown})")
183
+
184
+
185
+ def _nodes(cfg: StackConfig, task: Task, tracer, run, holder: dict | None = None):
186
+ def plan(state: dict) -> dict:
187
+ with tracer.span("L2.plan"):
188
+ bounds = cfg.spec.plan(task)
189
+ run.put(build_spec_graph(task, bounds))
190
+ _maybe_expand(cfg, task, bounds, tracer) # L1: widen along the context graph
191
+ run.put(bounds) # persist Bounds artifact (post-expansion)
192
+ return {"bounds": bounds}
193
+
194
+ def execute(state: dict) -> dict:
195
+ n = state["attempt"] + 1
196
+ prev = state.get("sandbox")
197
+ if prev is not None:
198
+ prev.close() # fresh cage per attempt
199
+ sandbox = cfg.sandbox.open(task.repo)
200
+ if holder is not None:
201
+ holder["sandbox"] = sandbox # so run() can close it on a crash
202
+ t0 = time.perf_counter()
203
+ with tracer.span("L3.execute", attempt=n):
204
+ try:
205
+ result = cfg.execute.run(task, state["bounds"], sandbox,
206
+ state.get("feedback"))
207
+ except Exception as exc:
208
+ # An executor that crashes (or whose subprocess raises past the adapter's
209
+ # own timeout handling) must NOT abort the loop, leak the sandbox, or skip
210
+ # the persisted verdict. Convert the failure into a recorded Change so the
211
+ # verify stage turns it into a BLOCK and the run still completes cleanly.
212
+ diff = ""
213
+ try:
214
+ diff = sandbox.diff()
215
+ except Exception:
216
+ pass
217
+ result = Change(
218
+ diff=diff, workdir=getattr(sandbox, "workdir", ""),
219
+ report={"error": "executor-crashed", "exit_code": -1,
220
+ "detail": repr(exc)})
221
+ latency_s = round(time.perf_counter() - t0, 3)
222
+
223
+ # C1.3: record cost+latency per attempt. Latency is always measured here (wall
224
+ # clock around the executor); tokens/cost ride along only when the adapter reported
225
+ # usage. Stamp latency onto the Change report too so a single artifact is enough.
226
+ report = dict(getattr(result, "report", {}) or {})
227
+ report.setdefault("latency_s", latency_s)
228
+ result.report = report
229
+ run.put(result, name=f"change-{n}") # persist Change per attempt
230
+ run.record_attempt(
231
+ n, latency_s=latency_s, agent=report.get("agent"), model=report.get("model"),
232
+ exit_code=report.get("exit_code"), tokens=_usage_tokens(report),
233
+ cost=report.get("cost"))
234
+ return {"sandbox": sandbox, "result": result}
235
+
236
+ def verify(state: dict) -> dict:
237
+ change = state["result"]
238
+ exec_err = _execution_error(change)
239
+ if exec_err is not None:
240
+ # C1 hardening: a hard executor failure (timeout / crash) is not a verdict the
241
+ # gate can issue — the gate checks a *change*, not process health. Block directly
242
+ # so a timed-out or crashed run never sails through as PASS, and hand the executor
243
+ # actionable feedback on retry. (No gate call: there's nothing trustworthy to
244
+ # verify.)
245
+ verdict = Verdict(
246
+ status="BLOCK",
247
+ reasons=[f"executor failed ({exec_err}) — the task was not implemented; "
248
+ "check the executor/model output"],
249
+ raw={"execution_error": exec_err, "report": getattr(change, "report", {})})
250
+ elif _is_empty_change(change):
251
+ # C1 hardening: a no-op execution (empty diff — e.g. the executor errored, hit a
252
+ # dead model, or wrote nothing) must NOT pass. The gate verifies a *change*; with
253
+ # no change there is nothing that satisfies the task, so block and tell the
254
+ # executor it produced nothing (actionable feedback on retry).
255
+ verdict = Verdict(
256
+ status="BLOCK",
257
+ reasons=["executor produced no changes (empty diff) — the task was not "
258
+ "implemented; check the executor/model output"],
259
+ raw={"empty_diff": True, "report": getattr(change, "report", {})})
260
+ else:
261
+ with tracer.span("L5.verify"):
262
+ verdict = cfg.verify.verify(state["bounds"], change, cfg.strict)
263
+ rc = _nonzero_exit(change)
264
+ if rc is not None and verdict.status == "PASS":
265
+ # The change passed the gate but the executor process exited non-zero — it
266
+ # did not complete cleanly. Don't report an unqualified PASS: downgrade to
267
+ # WARN and record why, so a half-finished run is never mistaken for success.
268
+ verdict = Verdict(
269
+ status="WARN",
270
+ reasons=list(verdict.reasons)
271
+ + [f"executor exited non-zero (exit_code={rc}); the change was applied "
272
+ "but the run did not complete cleanly"],
273
+ raw={**(getattr(verdict, "raw", {}) or {}), "exit_code": rc})
274
+ # Bind the verdict to the exact diff it judged (also for BLOCKs — harmless),
275
+ # so merge/apply can later refuse a verdict issued for a different change.
276
+ bind_verdict(verdict, getattr(change, "diff", "") or "")
277
+ attempt = state["attempt"] + 1
278
+ run.put(verdict, name=f"verdict-{attempt}")
279
+ return {
280
+ "verdict": verdict,
281
+ "attempt": attempt,
282
+ "feedback": verdict.feedback(),
283
+ "history": state.get("history", []) + [(attempt, verdict.status)],
284
+ }
285
+
286
+ def route(state: dict) -> str:
287
+ if state["verdict"].status in ("PASS", "WARN"):
288
+ return "done"
289
+ return "retry" if state["attempt"] < cfg.max_attempts else "done"
290
+
291
+ return plan, execute, verify, route
292
+
293
+
294
+ def run(cfg: StackConfig, task: Task) -> LoopResult:
295
+ tracer = get_tracer(cfg.langfuse)
296
+ run_rec = RunStore(task.repo).new_run(task)
297
+ # L4 isolation guard: snapshot the source tree BEFORE any sandbox/executor runs.
298
+ tree_before = _source_tree_status(task.repo)
299
+ holder: dict = {"sandbox": None}
300
+ plan, execute, verify, route = _nodes(cfg, task, tracer, run_rec, holder)
301
+ init = {"attempt": 0, "feedback": None, "history": []}
302
+
303
+ try:
304
+ try:
305
+ final, engine = _run_langgraph(plan, execute, verify, route, init)
306
+ except ImportError:
307
+ final, engine = _run_fallback(plan, execute, verify, route, init), "fallback"
308
+ except Exception as exc:
309
+ # A crash in plan/verify (executor crashes are already converted in-node) must not
310
+ # leave the run stuck at "started" with an open sandbox on disk. Close the cage,
311
+ # record the failure, then re-raise so the caller still sees the real error.
312
+ sb = holder.get("sandbox")
313
+ if sb is not None:
314
+ try:
315
+ sb.close()
316
+ except Exception:
317
+ pass
318
+ run_rec.set_status("failed", error=repr(exc)[:500])
319
+ tracer.flush()
320
+ raise
321
+
322
+ sandbox = final.get("sandbox")
323
+ workdir = getattr(sandbox, "workdir", None) if sandbox else None
324
+ if sandbox is not None:
325
+ sandbox.close()
326
+ tracer.flush()
327
+
328
+ # L4 isolation guard: re-snapshot the source tree now that the run is over. If it
329
+ # changed (outside .sembl/), the executor escaped the sandbox and edited the SOURCE repo
330
+ # — a containment breach the gate can't see. Fail LOUD: force the final verdict to BLOCK
331
+ # so the breach is never mistaken for a clean PASS/WARN.
332
+ verdict = final["verdict"]
333
+ breach = _isolation_breach(tree_before, _source_tree_status(task.repo))
334
+ if breach is not None:
335
+ verdict = Verdict(
336
+ status="BLOCK",
337
+ reasons=[breach, *getattr(verdict, "reasons", [])],
338
+ raw={**(getattr(verdict, "raw", {}) or {}), "isolation_breach": True})
339
+
340
+ # Persist the final accepted change under a stable name, then the final verdict, a
341
+ # trace, and the run status. Per-attempt artifacts remain as change-1/verdict-1...
342
+ run_rec.put(final["result"], name="change")
343
+ run_rec.put(verdict)
344
+ run_rec.put(Trace(steps=[{"attempt": a, "status": s} for a, s in final["history"]]))
345
+ log = run_rec.manifest().get("attempts_log", []) # C1.3 per-attempt metrics
346
+ total_latency_s = round(sum(e.get("latency_s", 0) for e in log), 3)
347
+ run_rec.set_status(verdict.status,
348
+ attempts=final["attempt"], engine=engine,
349
+ total_latency_s=total_latency_s)
350
+
351
+ return LoopResult(
352
+ verdict=verdict, attempts=final["attempt"],
353
+ history=final["history"], workdir=workdir, engine=engine,
354
+ run_id=run_rec.id,
355
+ )
356
+
357
+
358
+ def _run_langgraph(plan, execute, verify, route, init):
359
+ from langgraph.graph import StateGraph, END # raises ImportError if absent
360
+
361
+ g = StateGraph(LoopState)
362
+ g.add_node("plan", plan)
363
+ g.add_node("execute", execute)
364
+ g.add_node("verify", verify)
365
+ g.set_entry_point("plan")
366
+ g.add_edge("plan", "execute")
367
+ g.add_edge("execute", "verify")
368
+ g.add_conditional_edges("verify", route, {"retry": "execute", "done": END})
369
+ app = g.compile()
370
+ return app.invoke(init, {"recursion_limit": 50}), "langgraph"
371
+
372
+
373
+ def _run_fallback(plan, execute, verify, route, init):
374
+ state = dict(init)
375
+ state.update(plan(state))
376
+ while True:
377
+ state.update(execute(state))
378
+ state.update(verify(state))
379
+ if route(state) == "done":
380
+ return state
@@ -0,0 +1,272 @@
1
+ """Phase-1 first-run onboarding TUI for the BYO profile.
2
+
3
+ This module is a thin Textual guide over `profile.py`: welcome, choose how model
4
+ calls are paid for, capture a few preferences, persist `~/.sembl/profile.json`,
5
+ then return control to the Phase-0 stage rail. It is deliberately tolerant of bad
6
+ state and keeps all credential decisions in the deterministic profile core.
7
+ Textual is optional; callers use `available()` before launching.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import re
12
+
13
+ from . import presets, profile
14
+ from .session import resume_or_new, save as save_session
15
+
16
+ try:
17
+ from textual.app import App, ComposeResult
18
+ from textual.containers import Vertical
19
+ from textual.widgets import (
20
+ Button,
21
+ Checkbox,
22
+ Collapsible,
23
+ Footer,
24
+ Header,
25
+ Input,
26
+ Select,
27
+ Static,
28
+ )
29
+ _HAVE_TEXTUAL = True
30
+ except ImportError: # textual not installed - degrade gracefully
31
+ _HAVE_TEXTUAL = False
32
+
33
+
34
+ RUNNER_CHOICES = {
35
+ "claude-login": ("Use my Claude Code login", "claude"),
36
+ "api-key": ("Use my API key", "claude"),
37
+ "local": ("Use a local model", "opencode"),
38
+ "mock": ("Preview the mechanics (no AI)", "mock"),
39
+ }
40
+
41
+ _ENV_NAME = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
42
+
43
+
44
+ def available() -> bool:
45
+ return _HAVE_TEXTUAL
46
+
47
+
48
+ def launch(repo: str = ".") -> "profile.Profile | None":
49
+ """Launch first-run onboarding. Caller launches `wizard.launch(repo)` after."""
50
+ if not _HAVE_TEXTUAL:
51
+ raise RuntimeError("textual not installed - `pip install \"sembl-stack[tui]\"`")
52
+ return OnboardingApp(repo=repo).run()
53
+
54
+
55
+ def env_var_options() -> list[str]:
56
+ return list(profile._KEY_ENV_VARS)
57
+
58
+
59
+ def normalize_env_var_name(name: str) -> str:
60
+ cleaned = name.strip()
61
+ if not _ENV_NAME.match(cleaned):
62
+ raise ValueError("enter an environment variable name, not an API key value")
63
+ return cleaned
64
+
65
+
66
+ def api_key_source(selected: str | None, custom: str | None) -> str:
67
+ custom_name = (custom or "").strip()
68
+ name = normalize_env_var_name(custom_name or (selected or ""))
69
+ return f"env:{name}"
70
+
71
+
72
+ def profile_for_runner(
73
+ runner: str,
74
+ *,
75
+ key_env: str | None = None,
76
+ custom_key_env: str | None = None,
77
+ executor: str | None = None,
78
+ model: str | None = None,
79
+ strict: bool = True,
80
+ preset: str | None = None,
81
+ ) -> profile.Profile:
82
+ """Build a Profile from widget state without reading any API key value."""
83
+ if runner not in RUNNER_CHOICES:
84
+ raise ValueError(f"unknown runner: {runner}")
85
+ default_executor = RUNNER_CHOICES[runner][1]
86
+ key_source = None
87
+ if runner == "api-key":
88
+ key_source = api_key_source(key_env, custom_key_env)
89
+ model = (model or "").strip() or None
90
+ preset = preset or None
91
+ return profile.Profile(
92
+ runner=runner,
93
+ executor=executor or default_executor,
94
+ model=model,
95
+ key_source=key_source,
96
+ strict=strict,
97
+ preset=preset,
98
+ )
99
+
100
+
101
+ def first_fix_hint(candidate: profile.Profile) -> tuple[bool, str]:
102
+ checks = profile.preflight(candidate)
103
+ return profile.ready(checks)
104
+
105
+
106
+ if _HAVE_TEXTUAL:
107
+
108
+ class OnboardingApp(App):
109
+ """First-run BYO setup: welcome -> runner choice -> preferences -> profile."""
110
+
111
+ TITLE = "sembl-stack"
112
+ SUB_TITLE = "first-run setup"
113
+ BINDINGS = [("q", "quit", "Quit")]
114
+ CSS = """
115
+ #onboarding { padding: 1 2; }
116
+ .screen { display: none; }
117
+ .active { display: block; }
118
+ .choice { width: 100%; margin: 0 0 1 0; }
119
+ #mock-choice { opacity: 70%; }
120
+ #byo-hint { color: $error; margin: 1 0 0 0; }
121
+ #prefs-error { color: $error; margin: 1 0 0 0; }
122
+ Select, Input { margin: 0 0 1 0; }
123
+ Button { margin: 0 1 1 0; }
124
+ """
125
+
126
+ def __init__(self, repo: str = "."):
127
+ super().__init__()
128
+ self.repo = repo
129
+ detected = profile.detect()
130
+ self._runner = detected.runner
131
+ self._key_env = (
132
+ detected.key_source.removeprefix("env:")
133
+ if detected.key_source and detected.key_source.startswith("env:")
134
+ else profile._KEY_ENV_VARS[0]
135
+ )
136
+
137
+ def compose(self) -> "ComposeResult":
138
+ yield Header()
139
+ with Vertical(id="onboarding"):
140
+ with Vertical(id="welcome-screen", classes="screen active"):
141
+ yield Static(
142
+ "sembl-stack puts an accountability gate around an AI coding loop.",
143
+ id="welcome-copy",
144
+ )
145
+ yield Static(
146
+ "It runs on your own keys, local model, or Claude Code login.",
147
+ id="keys-copy",
148
+ )
149
+ yield Button("Continue", id="welcome-next", variant="primary")
150
+
151
+ with Vertical(id="byo-screen", classes="screen"):
152
+ yield Static("Choose how sembl-stack should run AI work.", id="byo-title")
153
+ yield Button(RUNNER_CHOICES["claude-login"][0], id="claude-choice", classes="choice")
154
+ yield Button(RUNNER_CHOICES["api-key"][0], id="api-choice", classes="choice")
155
+ yield Select(
156
+ [(name, name) for name in env_var_options()],
157
+ value=self._key_env if self._key_env in env_var_options() else env_var_options()[0],
158
+ id="api-env",
159
+ )
160
+ yield Input(placeholder="OTHER_API_KEY_ENV", id="api-env-custom")
161
+ yield Button(RUNNER_CHOICES["local"][0], id="local-choice", classes="choice")
162
+ yield Button(RUNNER_CHOICES["mock"][0], id="mock-choice", classes="choice")
163
+ yield Static("", id="byo-hint")
164
+
165
+ with Vertical(id="prefs-screen", classes="screen"):
166
+ yield Static("Preferences", id="prefs-title")
167
+ yield Select([("Existing repo", "existing"), ("New repo", "new")],
168
+ value="existing", id="repo-mode")
169
+ yield Checkbox("Strict gate", value=True, id="strict")
170
+ yield Select([("No preset", "")] + [(name, name) for name in presets.names()],
171
+ value="", id="preset")
172
+ with Collapsible(title="Advanced", collapsed=True, id="advanced"):
173
+ yield Select([(name, name) for name in ("claude", "opencode", "aider", "mock")],
174
+ value=RUNNER_CHOICES[self._runner][1], id="executor")
175
+ yield Input(placeholder="Model (optional)", id="model")
176
+ yield Button("Go", id="finish", variant="primary")
177
+ yield Static("", id="prefs-error")
178
+ yield Footer()
179
+
180
+ def on_mount(self) -> None:
181
+ self._mark_selected()
182
+
183
+ def on_button_pressed(self, event) -> None:
184
+ button_id = event.button.id
185
+ if button_id == "welcome-next":
186
+ self._show("byo")
187
+ elif button_id in {"claude-choice", "api-choice", "local-choice", "mock-choice"}:
188
+ runner = {
189
+ "claude-choice": "claude-login",
190
+ "api-choice": "api-key",
191
+ "local-choice": "local",
192
+ "mock-choice": "mock",
193
+ }[button_id]
194
+ self._choose_runner(runner)
195
+ elif button_id == "finish":
196
+ self._finish()
197
+
198
+ def _show(self, name: str) -> None:
199
+ for screen_id in ("welcome-screen", "byo-screen", "prefs-screen"):
200
+ screen = self.query_one(f"#{screen_id}")
201
+ screen.remove_class("active")
202
+ self.query_one(f"#{name}-screen").add_class("active")
203
+
204
+ def _choose_runner(self, runner: str) -> None:
205
+ self._runner = runner
206
+ self._mark_selected()
207
+ try:
208
+ candidate = self._candidate_from_inputs()
209
+ except ValueError as exc:
210
+ self.query_one("#byo-hint", Static).update(str(exc))
211
+ return
212
+ ok, hint = first_fix_hint(candidate)
213
+ if not ok:
214
+ self.query_one("#byo-hint", Static).update(hint)
215
+ return
216
+ self.query_one("#byo-hint", Static).update("")
217
+ self.query_one("#executor", Select).value = candidate.executor
218
+ self._show("prefs")
219
+
220
+ def _finish(self) -> None:
221
+ try:
222
+ candidate = self._candidate_from_inputs(include_preferences=True)
223
+ except ValueError as exc:
224
+ self.query_one("#prefs-error", Static).update(str(exc))
225
+ return
226
+ ok, hint = first_fix_hint(candidate)
227
+ if not ok:
228
+ self.query_one("#prefs-error", Static).update(hint)
229
+ return
230
+ try:
231
+ profile.save(candidate)
232
+ except ValueError as exc: # e.g. an API key pasted into the Model field
233
+ self.query_one("#prefs-error", Static).update(str(exc))
234
+ return
235
+ session = resume_or_new(self.repo)
236
+ session.mode = str(self.query_one("#repo-mode", Select).value or "existing")
237
+ save_session(session)
238
+ self.exit(candidate)
239
+
240
+ def _candidate_from_inputs(self, *, include_preferences: bool = False) -> profile.Profile:
241
+ key_env = str(self.query_one("#api-env", Select).value or "")
242
+ custom_key_env = self.query_one("#api-env-custom", Input).value
243
+ executor = RUNNER_CHOICES[self._runner][1]
244
+ model = None
245
+ strict = True
246
+ preset = None
247
+ if include_preferences:
248
+ executor = str(self.query_one("#executor", Select).value or executor)
249
+ model = self.query_one("#model", Input).value
250
+ strict = bool(self.query_one("#strict", Checkbox).value)
251
+ preset_value = str(self.query_one("#preset", Select).value or "")
252
+ preset = preset_value or None
253
+ return profile_for_runner(
254
+ self._runner,
255
+ key_env=key_env,
256
+ custom_key_env=custom_key_env,
257
+ executor=executor,
258
+ model=model,
259
+ strict=strict,
260
+ preset=preset,
261
+ )
262
+
263
+ def _mark_selected(self) -> None:
264
+ ids = {
265
+ "claude-login": "claude-choice",
266
+ "api-key": "api-choice",
267
+ "local": "local-choice",
268
+ "mock": "mock-choice",
269
+ }
270
+ for runner, button_id in ids.items():
271
+ button = self.query_one(f"#{button_id}", Button)
272
+ button.variant = "primary" if runner == self._runner else "default"