sembl-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sembl_stack/__init__.py +3 -0
- sembl_stack/adapters/__init__.py +0 -0
- sembl_stack/adapters/_redact.py +19 -0
- sembl_stack/adapters/base.py +179 -0
- sembl_stack/adapters/codegraph_cbm.py +95 -0
- sembl_stack/adapters/deploy_vercel.py +215 -0
- sembl_stack/adapters/execute_aider.py +115 -0
- sembl_stack/adapters/execute_claude.py +114 -0
- sembl_stack/adapters/execute_mock.py +53 -0
- sembl_stack/adapters/execute_opencode.py +114 -0
- sembl_stack/adapters/merge_git.py +107 -0
- sembl_stack/adapters/postdeploy_http.py +82 -0
- sembl_stack/adapters/review_coderabbit.py +215 -0
- sembl_stack/adapters/review_llm.py +142 -0
- sembl_stack/adapters/review_mock.py +42 -0
- sembl_stack/adapters/sandbox_worktree.py +79 -0
- sembl_stack/adapters/spec_sembl.py +91 -0
- sembl_stack/adapters/verify_sembl.py +77 -0
- sembl_stack/artifacts.py +207 -0
- sembl_stack/cli.py +759 -0
- sembl_stack/config.py +87 -0
- sembl_stack/contextgraph.py +154 -0
- sembl_stack/doctor.py +111 -0
- sembl_stack/loop.py +380 -0
- sembl_stack/onboarding.py +272 -0
- sembl_stack/presets.py +114 -0
- sembl_stack/profile.py +193 -0
- sembl_stack/reconciliation.py +138 -0
- sembl_stack/registry.py +91 -0
- sembl_stack/rsi.py +188 -0
- sembl_stack/runner.py +134 -0
- sembl_stack/session.py +86 -0
- sembl_stack/specgraph.py +146 -0
- sembl_stack/store.py +112 -0
- sembl_stack/tracing.py +51 -0
- sembl_stack/transport/__init__.py +0 -0
- sembl_stack/transport/mcp_client.py +58 -0
- sembl_stack/tui.py +86 -0
- sembl_stack/views.py +74 -0
- sembl_stack/wizard.py +233 -0
- sembl_stack-0.1.0.dist-info/METADATA +165 -0
- sembl_stack-0.1.0.dist-info/RECORD +45 -0
- sembl_stack-0.1.0.dist-info/WHEEL +4 -0
- sembl_stack-0.1.0.dist-info/entry_points.txt +2 -0
- sembl_stack-0.1.0.dist-info/licenses/LICENSE +201 -0
sembl_stack/tracing.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""L6 observability: Langfuse tracing, with a no-op fallback.
|
|
2
|
+
|
|
3
|
+
`span(name)` is a context manager around each loop node. If Langfuse isn't enabled or
|
|
4
|
+
installed, it's a no-op — the loop runs identically, just untraced.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class _NoopTracer:
|
|
12
|
+
enabled = False
|
|
13
|
+
|
|
14
|
+
@contextmanager
|
|
15
|
+
def span(self, name: str, **meta):
|
|
16
|
+
yield None
|
|
17
|
+
|
|
18
|
+
def flush(self):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class _LangfuseTracer:
|
|
23
|
+
enabled = True
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
from langfuse import Langfuse
|
|
27
|
+
self._lf = Langfuse()
|
|
28
|
+
self._trace = self._lf.trace(name="sembl-stack-loop")
|
|
29
|
+
|
|
30
|
+
@contextmanager
|
|
31
|
+
def span(self, name: str, **meta):
|
|
32
|
+
span = self._trace.span(name=name, metadata=meta or None)
|
|
33
|
+
try:
|
|
34
|
+
yield span
|
|
35
|
+
finally:
|
|
36
|
+
span.end()
|
|
37
|
+
|
|
38
|
+
def flush(self):
|
|
39
|
+
try:
|
|
40
|
+
self._lf.flush()
|
|
41
|
+
except Exception:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_tracer(langfuse: bool):
|
|
46
|
+
if not langfuse:
|
|
47
|
+
return _NoopTracer()
|
|
48
|
+
try:
|
|
49
|
+
return _LangfuseTracer()
|
|
50
|
+
except Exception:
|
|
51
|
+
return _NoopTracer()
|
|
File without changes
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""L0 protocol backbone: a tiny synchronous MCP stdio client.
|
|
2
|
+
|
|
3
|
+
The platform's north star is "every layer speaks MCP." This helper spawns an MCP
|
|
4
|
+
server over stdio, calls one tool, and returns the parsed JSON result — synchronously,
|
|
5
|
+
so adapters don't have to be async.
|
|
6
|
+
|
|
7
|
+
If the `mcp` SDK isn't installed, `available()` returns False and adapters fall back
|
|
8
|
+
to their CLI path. That keeps the loop bootable with zero extra installs while MCP
|
|
9
|
+
remains the default, dogfooded transport.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def available() -> bool:
|
|
18
|
+
try:
|
|
19
|
+
import mcp # noqa: F401
|
|
20
|
+
return True
|
|
21
|
+
except Exception:
|
|
22
|
+
return False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def call_tool(server_cmd: list[str], tool: str, arguments: dict) -> Any:
|
|
26
|
+
"""Spawn `server_cmd` as a stdio MCP server, call `tool(arguments)`, return JSON.
|
|
27
|
+
|
|
28
|
+
Raises if the SDK is missing — callers should gate on `available()` first.
|
|
29
|
+
"""
|
|
30
|
+
import anyio
|
|
31
|
+
from mcp import ClientSession, StdioServerParameters
|
|
32
|
+
from mcp.client.stdio import stdio_client
|
|
33
|
+
|
|
34
|
+
async def _run() -> Any:
|
|
35
|
+
params = StdioServerParameters(command=server_cmd[0], args=server_cmd[1:])
|
|
36
|
+
async with stdio_client(params) as (read, write):
|
|
37
|
+
async with ClientSession(read, write) as session:
|
|
38
|
+
await session.initialize()
|
|
39
|
+
result = await session.call_tool(tool, arguments)
|
|
40
|
+
return _parse(result)
|
|
41
|
+
|
|
42
|
+
return anyio.run(_run)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _parse(result: Any) -> Any:
|
|
46
|
+
"""Pull the JSON payload out of an MCP CallToolResult."""
|
|
47
|
+
content = getattr(result, "content", None) or []
|
|
48
|
+
for block in content:
|
|
49
|
+
text = getattr(block, "text", None)
|
|
50
|
+
if text:
|
|
51
|
+
try:
|
|
52
|
+
return json.loads(text)
|
|
53
|
+
except json.JSONDecodeError:
|
|
54
|
+
return {"text": text}
|
|
55
|
+
sc = getattr(result, "structuredContent", None)
|
|
56
|
+
if sc:
|
|
57
|
+
return sc
|
|
58
|
+
return {}
|
sembl_stack/tui.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""O6 — the in-terminal run dashboard (CI-run-page UX), built on Textual.
|
|
2
|
+
|
|
3
|
+
Optional by design: Textual is an extra (`pip install "sembl-stack[tui]"`). If it isn't
|
|
4
|
+
installed, `available()` is False and the CLI prints an actionable hint instead of crashing —
|
|
5
|
+
the same degrade-don't-fail stance as the LangGraph fallback. The data comes from the shared
|
|
6
|
+
`views` layer, so the dashboard shows exactly what `sembl-stack runs` shows, live-refreshed.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from . import views
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from textual.app import App, ComposeResult
|
|
14
|
+
from textual.containers import Horizontal
|
|
15
|
+
from textual.widgets import DataTable, Footer, Header, Static
|
|
16
|
+
_HAVE_TEXTUAL = True
|
|
17
|
+
except ImportError: # textual not installed — degrade gracefully
|
|
18
|
+
_HAVE_TEXTUAL = False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def available() -> bool:
|
|
22
|
+
return _HAVE_TEXTUAL
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def run_dashboard(store, refresh_s: float = 3.0) -> None:
|
|
26
|
+
"""Launch the live dashboard. Caller must check `available()` first."""
|
|
27
|
+
if not _HAVE_TEXTUAL:
|
|
28
|
+
raise RuntimeError("textual not installed — `pip install \"sembl-stack[tui]\"`")
|
|
29
|
+
RunsDashboard(store, refresh_s).run()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
if _HAVE_TEXTUAL:
|
|
33
|
+
|
|
34
|
+
class RunsDashboard(App):
|
|
35
|
+
"""A two-pane dashboard: a table of runs + the highlighted run's detail."""
|
|
36
|
+
|
|
37
|
+
TITLE = "sembl-stack — runs"
|
|
38
|
+
BINDINGS = [("q", "quit", "Quit"), ("r", "reload", "Reload")]
|
|
39
|
+
CSS = """
|
|
40
|
+
DataTable { width: 60%; }
|
|
41
|
+
#detail { width: 40%; padding: 0 1; border-left: solid $accent; }
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, store, refresh_s: float = 3.0):
|
|
45
|
+
super().__init__()
|
|
46
|
+
self._store = store
|
|
47
|
+
self._refresh_s = refresh_s
|
|
48
|
+
|
|
49
|
+
def compose(self) -> "ComposeResult":
|
|
50
|
+
yield Header()
|
|
51
|
+
with Horizontal():
|
|
52
|
+
yield DataTable(id="runs", cursor_type="row")
|
|
53
|
+
yield Static("select a run", id="detail")
|
|
54
|
+
yield Footer()
|
|
55
|
+
|
|
56
|
+
def on_mount(self) -> None:
|
|
57
|
+
table = self.query_one("#runs", DataTable)
|
|
58
|
+
table.add_columns("run", "status", "att", "latency", "task")
|
|
59
|
+
self._reload()
|
|
60
|
+
if self._refresh_s:
|
|
61
|
+
self.set_interval(self._refresh_s, self._reload)
|
|
62
|
+
|
|
63
|
+
def action_reload(self) -> None:
|
|
64
|
+
self._reload()
|
|
65
|
+
|
|
66
|
+
def _reload(self) -> None:
|
|
67
|
+
table = self.query_one("#runs", DataTable)
|
|
68
|
+
keep = table.cursor_row
|
|
69
|
+
table.clear()
|
|
70
|
+
for r in views.list_rows(self._store):
|
|
71
|
+
task = (r["task"][:48] + "…") if len(r["task"]) > 49 else r["task"]
|
|
72
|
+
table.add_row(r["id"], r["status"], str(r["attempts"]),
|
|
73
|
+
r["latency"], task, key=r["id"])
|
|
74
|
+
if table.row_count:
|
|
75
|
+
table.move_cursor(row=min(keep, table.row_count - 1))
|
|
76
|
+
self._show(table.coordinate_to_cell_key((table.cursor_row, 0)).row_key.value)
|
|
77
|
+
|
|
78
|
+
def on_data_table_row_highlighted(self, event) -> None:
|
|
79
|
+
self._show(event.row_key.value)
|
|
80
|
+
|
|
81
|
+
def _show(self, run_id) -> None:
|
|
82
|
+
if not run_id:
|
|
83
|
+
return
|
|
84
|
+
lines = views.detail_lines(self._store, run_id)
|
|
85
|
+
self.query_one("#detail", Static).update(
|
|
86
|
+
"\n".join(lines) if lines else "no detail")
|
sembl_stack/views.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Run-store presentation layer — pure functions shared by the CLI and the TUI (O6).
|
|
2
|
+
|
|
3
|
+
Keeping the "what to show" logic here (no click, no textual) means the run list and the
|
|
4
|
+
single-run detail render identically whether you type `sembl-stack runs` or watch the live
|
|
5
|
+
dashboard, and the formatting is unit-testable without spinning up either UI.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def list_rows(store) -> list[dict]:
|
|
11
|
+
"""One summary row per recorded run, newest first."""
|
|
12
|
+
rows = []
|
|
13
|
+
for rid in store.list_runs():
|
|
14
|
+
m = store.open(rid).manifest()
|
|
15
|
+
lat = m.get("total_latency_s")
|
|
16
|
+
rows.append({
|
|
17
|
+
"id": rid,
|
|
18
|
+
"status": m.get("status", "?"),
|
|
19
|
+
"attempts": m.get("attempts", "-"),
|
|
20
|
+
"latency": f"{lat:.2f}s" if isinstance(lat, (int, float)) else "-",
|
|
21
|
+
"task": (m.get("task", {}) or {}).get("text", ""),
|
|
22
|
+
})
|
|
23
|
+
return rows
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def detail_lines(store, run_id: str) -> list[str] | None:
|
|
27
|
+
"""Plain-text detail for one run (task, bounds, per-attempt verdicts, final), or None."""
|
|
28
|
+
run = store.open(run_id)
|
|
29
|
+
m = run.manifest()
|
|
30
|
+
if not m:
|
|
31
|
+
return None
|
|
32
|
+
lat = m.get("total_latency_s")
|
|
33
|
+
lat_s = f"{lat:.2f}s" if isinstance(lat, (int, float)) else "-"
|
|
34
|
+
out = [
|
|
35
|
+
f"run {run_id}",
|
|
36
|
+
f" status: {m.get('status','?')} attempts={m.get('attempts','-')} "
|
|
37
|
+
f"engine={m.get('engine','-')} latency={lat_s}",
|
|
38
|
+
]
|
|
39
|
+
task = m.get("task", {}) or {}
|
|
40
|
+
if task:
|
|
41
|
+
out.append(f" task: {task.get('text','')}")
|
|
42
|
+
out.append(f" repo: {task.get('repo','')}")
|
|
43
|
+
bounds = run.get("bounds")
|
|
44
|
+
if bounds is not None:
|
|
45
|
+
out.append(f" bounds: editable={bounds.editable_paths} "
|
|
46
|
+
f"forbidden={bounds.forbidden_areas} churn={bounds.churn_budget}")
|
|
47
|
+
|
|
48
|
+
log = {e.get("attempt"): e for e in m.get("attempts_log", [])}
|
|
49
|
+
n = m.get("attempts") or 0
|
|
50
|
+
if n:
|
|
51
|
+
out.append(" attempts:")
|
|
52
|
+
for i in range(1, n + 1):
|
|
53
|
+
v = run.get(f"verdict-{i}")
|
|
54
|
+
meta = log.get(i, {})
|
|
55
|
+
status = v.status if v else "?"
|
|
56
|
+
extra = f" model={meta['model']}" if meta.get("model") else ""
|
|
57
|
+
out.append(f" {i}: [{status}] latency={meta.get('latency_s','-')}s{extra}")
|
|
58
|
+
out += [f" - {r}" for r in (v.reasons if v else [])]
|
|
59
|
+
|
|
60
|
+
fv = run.get("verdict")
|
|
61
|
+
if fv is not None:
|
|
62
|
+
out.append(f" final: {fv.status}")
|
|
63
|
+
change = run.get("change")
|
|
64
|
+
if change is None and n:
|
|
65
|
+
change = run.get(f"change-{n}")
|
|
66
|
+
if change is not None:
|
|
67
|
+
files = (getattr(change, "report", {}) or {}).get("files_modified") or []
|
|
68
|
+
suffix = f" files={files}" if files else ""
|
|
69
|
+
out.append(f" patch: change.json{suffix}")
|
|
70
|
+
if fv is not None and fv.status in ("PASS", "WARN"):
|
|
71
|
+
warn = " --allow-warn" if fv.status == "WARN" else ""
|
|
72
|
+
out.append(f" apply: sembl-stack apply {run_id} --repo {task.get('repo','.')}{warn}")
|
|
73
|
+
out.append(f" artifacts: {run.dir}")
|
|
74
|
+
return out
|
sembl_stack/wizard.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Guided surface — the bare-`sembl-stack` Textual wizard (O6, elevates C4).
|
|
2
|
+
|
|
3
|
+
Phase 0: a New/Existing choice, a stage rail (CI-run-page UX), and leave/continue-anywhere
|
|
4
|
+
resume via the `session.json` pointer (see `session.py`).
|
|
5
|
+
|
|
6
|
+
Phase 2: the stage rail actually RUNS the loop under the configured profile — press `r`
|
|
7
|
+
and the real `loop.run` (plan -> execute -> verify, retry-on-BLOCK) executes in a worker
|
|
8
|
+
thread against the repo's `task.yaml`, streaming per-stage status (pending/running/pass/
|
|
9
|
+
fail) into the rail and showing the final verdict panel. The orchestration glue is
|
|
10
|
+
`runner.py` (pure, headless); the wizard only renders its events — it adds NO core/gate
|
|
11
|
+
logic, so a TUI run and a headless `sembl-stack loop` run are byte-identical.
|
|
12
|
+
|
|
13
|
+
Deliberately NOT in Phase 2 (see docs/PROCESS-ACTION-PLAN.md §9 Track 2 item 5):
|
|
14
|
+
TODO(plan §9.5): CBM `index_repository` trigger on the Existing-repo path.
|
|
15
|
+
TODO(plan §9.5): reconcile (S9) advisory panel.
|
|
16
|
+
TODO(plan §9.5): live deploy/postdeploy panels + MurphyScan readiness screen.
|
|
17
|
+
|
|
18
|
+
Textual is an extra (`pip install "sembl-stack[tui]"`); if it isn't installed,
|
|
19
|
+
`available()` is False and the caller prints an actionable hint instead of crashing.
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from . import runner
|
|
24
|
+
from .session import STAGES, Session, resume_or_new, save
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
from textual.app import App, ComposeResult
|
|
28
|
+
from textual.containers import Horizontal, Vertical
|
|
29
|
+
from textual.widgets import Button, Footer, Header, Static
|
|
30
|
+
_HAVE_TEXTUAL = True
|
|
31
|
+
except ImportError: # textual not installed — degrade gracefully
|
|
32
|
+
_HAVE_TEXTUAL = False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def available() -> bool:
|
|
36
|
+
return _HAVE_TEXTUAL
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def launch(repo: str = ".") -> None:
|
|
40
|
+
"""Launch the guided wizard. Caller must check `available()` first."""
|
|
41
|
+
if not _HAVE_TEXTUAL:
|
|
42
|
+
raise RuntimeError("textual not installed — `pip install \"sembl-stack[tui]\"`")
|
|
43
|
+
StackWizard(repo=repo).run()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Live-run stage marks (Phase 2), layered over the session marks (Phase 0).
|
|
47
|
+
_LIVE_MARK = {"running": "~", "done": "x", "fail": "!"}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _rail_text(s: Session, live: dict | None = None) -> str:
|
|
51
|
+
"""The stage rail as plain text: [x] done, [>] current, [ ] pending;
|
|
52
|
+
live-run states win: [~] running, [!] failed."""
|
|
53
|
+
live = live or {}
|
|
54
|
+
lines = [f"repo: {s.repo}", f"mode: {s.mode}", ""]
|
|
55
|
+
for stage in STAGES:
|
|
56
|
+
if stage in live:
|
|
57
|
+
mark = _LIVE_MARK.get(live[stage]["state"], "?")
|
|
58
|
+
detail = live[stage].get("detail", "")
|
|
59
|
+
suffix = f" ({detail})" if detail else ""
|
|
60
|
+
else:
|
|
61
|
+
mark = "x" if stage in s.completed else (">" if stage == s.current_stage else " ")
|
|
62
|
+
suffix = ""
|
|
63
|
+
lines.append(f" [{mark}] {stage}{suffix}")
|
|
64
|
+
if s.done:
|
|
65
|
+
lines.append("\n all stages complete.")
|
|
66
|
+
return "\n".join(lines)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _verdict_text(result) -> str:
|
|
70
|
+
"""The final-verdict panel line(s) for a finished live run."""
|
|
71
|
+
v = result.verdict
|
|
72
|
+
lines = [f"FINAL: {v.status} (after {result.attempts} attempt(s))"]
|
|
73
|
+
for r in getattr(v, "reasons", []) or []:
|
|
74
|
+
lines.append(f" - {r}")
|
|
75
|
+
if result.run_id:
|
|
76
|
+
lines.append(f"run: {result.run_id} (.sembl/runs/{result.run_id}/)")
|
|
77
|
+
return "\n".join(lines)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
if _HAVE_TEXTUAL:
|
|
81
|
+
|
|
82
|
+
class StackWizard(App):
|
|
83
|
+
"""Bare-`sembl-stack` guided wizard: New/Existing + stage rail + session resume
|
|
84
|
+
+ Phase-2 live run (`r` runs task.yaml through the real loop)."""
|
|
85
|
+
|
|
86
|
+
TITLE = "sembl-stack"
|
|
87
|
+
SUB_TITLE = "guided run"
|
|
88
|
+
BINDINGS = [
|
|
89
|
+
("q", "quit", "Quit"),
|
|
90
|
+
("n", "mode_new", "New repo"),
|
|
91
|
+
("e", "mode_existing", "Existing repo"),
|
|
92
|
+
("space", "advance", "Advance stage"),
|
|
93
|
+
("r", "run_loop", "Run task.yaml"),
|
|
94
|
+
]
|
|
95
|
+
CSS = """
|
|
96
|
+
#mode { width: 30%; padding: 1; border-right: solid $accent; }
|
|
97
|
+
#right { width: 70%; }
|
|
98
|
+
#rail { padding: 1; height: auto; }
|
|
99
|
+
#verdict { padding: 1; height: auto; color: $text-muted; }
|
|
100
|
+
Button { width: 100%; margin: 0 0 1 0; }
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(self, repo: str = ".", session: "Session | None" = None):
|
|
104
|
+
super().__init__()
|
|
105
|
+
self._session = session or resume_or_new(repo)
|
|
106
|
+
self._live: dict = {} # stage -> {"state", "detail"} during a live run
|
|
107
|
+
self._loop_running = False
|
|
108
|
+
|
|
109
|
+
def compose(self) -> "ComposeResult":
|
|
110
|
+
yield Header()
|
|
111
|
+
with Horizontal():
|
|
112
|
+
with Vertical(id="mode"):
|
|
113
|
+
yield Static("New or existing?", id="mode-label")
|
|
114
|
+
yield Button("New repo", id="mode-new", variant="primary")
|
|
115
|
+
yield Button("Existing repo", id="mode-existing")
|
|
116
|
+
with Vertical(id="right"):
|
|
117
|
+
yield Static(_rail_text(self._session), id="rail")
|
|
118
|
+
yield Static("", id="verdict")
|
|
119
|
+
yield Footer()
|
|
120
|
+
|
|
121
|
+
# -- actions ------------------------------------------------------------
|
|
122
|
+
def _set_mode(self, mode: str) -> None:
|
|
123
|
+
self._session.mode = mode
|
|
124
|
+
save(self._session)
|
|
125
|
+
self._refresh()
|
|
126
|
+
|
|
127
|
+
def action_mode_new(self) -> None:
|
|
128
|
+
self._set_mode("new")
|
|
129
|
+
|
|
130
|
+
def action_mode_existing(self) -> None:
|
|
131
|
+
self._set_mode("existing")
|
|
132
|
+
|
|
133
|
+
def action_advance(self) -> None:
|
|
134
|
+
self._session.advance()
|
|
135
|
+
save(self._session)
|
|
136
|
+
self._refresh()
|
|
137
|
+
|
|
138
|
+
def on_button_pressed(self, event) -> None:
|
|
139
|
+
if event.button.id == "mode-new":
|
|
140
|
+
self._set_mode("new")
|
|
141
|
+
elif event.button.id == "mode-existing":
|
|
142
|
+
self._set_mode("existing")
|
|
143
|
+
|
|
144
|
+
# -- Phase 2: run the real loop under the profile ------------------------
|
|
145
|
+
def action_run_loop(self) -> None:
|
|
146
|
+
if self._loop_running:
|
|
147
|
+
self._note("a run is already in progress…")
|
|
148
|
+
return
|
|
149
|
+
task = runner.load_task(self._session.repo)
|
|
150
|
+
if task is None:
|
|
151
|
+
self._note(f"no task.yaml in {self._session.repo} — "
|
|
152
|
+
"`sembl-stack init` scaffolds one.")
|
|
153
|
+
return
|
|
154
|
+
cfg = runner.resolve_config(self._session.repo)
|
|
155
|
+
self._loop_running = True
|
|
156
|
+
self._live = {}
|
|
157
|
+
self._note("running… (plan -> execute -> verify)")
|
|
158
|
+
self._refresh()
|
|
159
|
+
self.run_worker(self._run_loop_async(cfg, task), exclusive=True)
|
|
160
|
+
|
|
161
|
+
async def _run_loop_async(self, cfg, task) -> None:
|
|
162
|
+
"""Run the blocking loop in an executor; drain stage events on the app's
|
|
163
|
+
own event loop via a thread-safe queue.
|
|
164
|
+
|
|
165
|
+
The loop's stage functions call `emit` from the executor thread, so `emit`
|
|
166
|
+
only enqueues (thread-safe, non-blocking) — every UI mutation happens here,
|
|
167
|
+
on the app thread. This deliberately avoids `call_from_thread`, whose
|
|
168
|
+
blocking round-trip deadlocks a threaded worker under Textual's `run_test`
|
|
169
|
+
harness (the pilot drives the loop, so the worker's blocked wait never
|
|
170
|
+
resolves)."""
|
|
171
|
+
import asyncio
|
|
172
|
+
import queue as _queue
|
|
173
|
+
|
|
174
|
+
events: "_queue.Queue" = _queue.Queue()
|
|
175
|
+
|
|
176
|
+
def emit(ev) -> None:
|
|
177
|
+
events.put(("event", ev))
|
|
178
|
+
|
|
179
|
+
def blocking() -> None:
|
|
180
|
+
try:
|
|
181
|
+
events.put(("done", runner.run_stages(cfg, task, emit)))
|
|
182
|
+
except Exception as exc: # loop crash (plan/verify raised)
|
|
183
|
+
events.put(("crash", exc))
|
|
184
|
+
|
|
185
|
+
loop = asyncio.get_running_loop()
|
|
186
|
+
fut = loop.run_in_executor(None, blocking)
|
|
187
|
+
terminal = None
|
|
188
|
+
while terminal is None:
|
|
189
|
+
try:
|
|
190
|
+
kind, payload = events.get_nowait()
|
|
191
|
+
except _queue.Empty:
|
|
192
|
+
await asyncio.sleep(0.02)
|
|
193
|
+
continue
|
|
194
|
+
if kind == "event":
|
|
195
|
+
self._on_stage_event(payload)
|
|
196
|
+
else:
|
|
197
|
+
terminal = (kind, payload)
|
|
198
|
+
await fut # surface any executor teardown error
|
|
199
|
+
if terminal[0] == "done":
|
|
200
|
+
self._on_run_done(terminal[1])
|
|
201
|
+
else:
|
|
202
|
+
self._on_run_crashed(terminal[1])
|
|
203
|
+
|
|
204
|
+
def _on_stage_event(self, ev) -> None:
|
|
205
|
+
self._live[ev.stage] = {"state": ev.state, "detail": ev.detail}
|
|
206
|
+
self._refresh()
|
|
207
|
+
|
|
208
|
+
def _on_run_done(self, result) -> None:
|
|
209
|
+
self._loop_running = False
|
|
210
|
+
if result.verdict.status in ("PASS", "WARN"):
|
|
211
|
+
# The loop-backed stages are genuinely complete — record the resume
|
|
212
|
+
# pointer just past them (leave/continue-anywhere, Phase 0 semantics).
|
|
213
|
+
for stage in ("bounds", "loop", "verify"):
|
|
214
|
+
if stage not in self._session.completed:
|
|
215
|
+
self._session.completed.append(stage)
|
|
216
|
+
self._session.current_stage = "merge"
|
|
217
|
+
self._session.run_id = result.run_id
|
|
218
|
+
save(self._session)
|
|
219
|
+
self._note(_verdict_text(result))
|
|
220
|
+
self._refresh()
|
|
221
|
+
|
|
222
|
+
def _on_run_crashed(self, exc: Exception) -> None:
|
|
223
|
+
self._loop_running = False
|
|
224
|
+
self._note(f"run crashed: {exc!r}")
|
|
225
|
+
self._refresh()
|
|
226
|
+
|
|
227
|
+
# -- rendering ------------------------------------------------------------
|
|
228
|
+
def _note(self, text: str) -> None:
|
|
229
|
+
self.query_one("#verdict", Static).update(text)
|
|
230
|
+
|
|
231
|
+
def _refresh(self) -> None:
|
|
232
|
+
self.query_one("#rail", Static).update(
|
|
233
|
+
_rail_text(self._session, self._live))
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sembl-stack
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A swappable, spec-driven software factory: task to bounds, an agent writes in a sandbox, the Sembl gate judges the diff, PASS merges, deploys, and a post-deploy gate confirms or rolls back. Every stage an adapter behind one typed contract.
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: click>=8.1
|
|
9
|
+
Requires-Dist: langgraph>=0.2
|
|
10
|
+
Requires-Dist: mcp>=1.0
|
|
11
|
+
Requires-Dist: pyyaml>=6.0
|
|
12
|
+
Requires-Dist: sembl>=0.1.20
|
|
13
|
+
Provides-Extra: all
|
|
14
|
+
Requires-Dist: langfuse>=2.0; extra == 'all'
|
|
15
|
+
Requires-Dist: langgraph>=0.2; extra == 'all'
|
|
16
|
+
Requires-Dist: mcp>=1.0; extra == 'all'
|
|
17
|
+
Requires-Dist: sembl>=0.1.20; extra == 'all'
|
|
18
|
+
Requires-Dist: textual>=0.50; extra == 'all'
|
|
19
|
+
Provides-Extra: gate
|
|
20
|
+
Requires-Dist: sembl>=0.1.20; extra == 'gate'
|
|
21
|
+
Provides-Extra: mcp
|
|
22
|
+
Requires-Dist: mcp>=1.0; extra == 'mcp'
|
|
23
|
+
Provides-Extra: orchestrate
|
|
24
|
+
Requires-Dist: langgraph>=0.2; extra == 'orchestrate'
|
|
25
|
+
Provides-Extra: trace
|
|
26
|
+
Requires-Dist: langfuse>=2.0; extra == 'trace'
|
|
27
|
+
Provides-Extra: tui
|
|
28
|
+
Requires-Dist: textual>=0.50; extra == 'tui'
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# sembl-stack
|
|
32
|
+
|
|
33
|
+
[](LICENSE)
|
|
34
|
+
[](https://github.com/speedvibecode/sembl)
|
|
35
|
+
|
|
36
|
+
**A swappable, spec-driven software factory.** A task becomes declared bounds, an
|
|
37
|
+
agent writes the change inside a disposable sandbox, the **[Sembl](https://github.com/speedvibecode/sembl)
|
|
38
|
+
gate** judges the real diff against those bounds, a PASS merges and deploys, and a
|
|
39
|
+
post-deploy gate confirms it's healthy — or rolls it back. **Every stage is an
|
|
40
|
+
interchangeable adapter behind one typed artifact contract, and every run is
|
|
41
|
+
recorded.**
|
|
42
|
+
|
|
43
|
+
We sell **process correctness** — the change did what the spec declared, stayed in
|
|
44
|
+
bounds, is honestly evidenced, and reached production accountably — **never "the
|
|
45
|
+
model writes better code."** The stack takes no side in the agent wars: swap the
|
|
46
|
+
executor, the sandbox, or the deploy target with one line of config and the rest of
|
|
47
|
+
the pipeline doesn't notice.
|
|
48
|
+
|
|
49
|
+
[Website](https://sembl-stack.vercel.app) · [The gate (Sembl)](https://sembl.vercel.app) · [Architecture & plan](docs/PROCESS-ACTION-PLAN.md)
|
|
50
|
+
|
|
51
|
+
```text
|
|
52
|
+
task ─▶ bounds ─▶ execute ─▶ sandbox ─▶ SEMBL GATE ─▶ merge ─▶ deploy ─▶ verify-in-prod
|
|
53
|
+
(L2) (L3) (L4) (L5) (L6.5) (L7) (L8)
|
|
54
|
+
every arrow is a typed artifact on disk
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Quickstart
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install sembl-stack sembl # the stack + the gate it runs at its core
|
|
61
|
+
sembl-stack init # scaffold sembl.stack.yaml + task.yaml from a preset
|
|
62
|
+
sembl-stack doctor # config-aware preflight
|
|
63
|
+
sembl-stack loop task.yaml # plan → execute → gate → retry-on-BLOCK
|
|
64
|
+
sembl-stack runs [<id>] # list / inspect runs
|
|
65
|
+
sembl-stack apply <id> # apply the accepted patch (a BLOCK is never applied)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Presets** (`sembl-stack init --preset …`):
|
|
69
|
+
|
|
70
|
+
| Preset | What runs | Needs |
|
|
71
|
+
|--------|-----------|-------|
|
|
72
|
+
| `just-gate` | gate any diff, nothing else | only `sembl` |
|
|
73
|
+
| `gate+sandbox` | the whole loop with a mock executor | no API keys |
|
|
74
|
+
| `full-loop` | real agent + sandbox + gate | an executor key |
|
|
75
|
+
|
|
76
|
+
Swap any layer in `sembl.stack.yaml` — e.g. `execute: opencode`, `execute: aider` —
|
|
77
|
+
with no code change.
|
|
78
|
+
|
|
79
|
+
## The stage map (L0–L8)
|
|
80
|
+
|
|
81
|
+
Each stage consumes and produces typed artifacts; that hand-off *is* the whole
|
|
82
|
+
interface, which is what makes every stage swappable.
|
|
83
|
+
|
|
84
|
+
| Stage | Does | Artifact flow | Who owns it |
|
|
85
|
+
|-------|------|---------------|-------------|
|
|
86
|
+
| **L0** Protocol & hub | one wire between stages | — | **we own** (the contract) |
|
|
87
|
+
| **L1** Repo intel | code-graph context | `Task → Context` | adapter |
|
|
88
|
+
| **L2** Spec → bounds | scope the change | `Task → Bounds` | **we own** (`sembl`) |
|
|
89
|
+
| **L3** Execute | write the change | `Task + Bounds → Change` | adapter (claude / aider / opencode) |
|
|
90
|
+
| **L4** Sandbox | contain a bad diff | `Change → Change` | adapter (disposable clone) |
|
|
91
|
+
| **L5** Verify | gate the diff | `Change + Bounds → Verdict` | **the gate** (`sembl`) |
|
|
92
|
+
| **L5.5** Review (advisory) | code-quality signal | `diff → findings` | adapter (`llm` — BYO agent-CLI reviewer; CodeRabbit optional) |
|
|
93
|
+
| **L6** Orchestrate | loop, retry, trace | wiring + `* → Trace` | **we own** (LangGraph) |
|
|
94
|
+
| **L6.5** Merge | gated merge | `Verdict(PASS) → MergeRecord` | **we own** |
|
|
95
|
+
| **L7** Deploy | ship | `Verdict(PASS) → Delivery` | adapter |
|
|
96
|
+
| **L8** Verify-in-prod | gate production | `Delivery → Verdict` | **the gate** (health + rollback) |
|
|
97
|
+
|
|
98
|
+
We own exactly three things: the **artifact contract + stage Protocol**, the **gate
|
|
99
|
+
(L5 + the post-deploy L8)**, and the **glue + layer-replacement protocol**.
|
|
100
|
+
Everything else is deliberately a best-in-class tool behind an interface.
|
|
101
|
+
|
|
102
|
+
## The accountable spine
|
|
103
|
+
|
|
104
|
+
A verdict is bound to the change it judged — most agent pipelines stop at "the check
|
|
105
|
+
passed"; this one guarantees a verdict can only ship the exact change it was issued
|
|
106
|
+
for:
|
|
107
|
+
|
|
108
|
+
- **Verdicts carry their subject.** Every verdict is stamped with the SHA-256 and
|
|
109
|
+
file set of the diff it judged. `apply` recomputes the patch hash and refuses a
|
|
110
|
+
verdict issued for a different patch; `merge` refuses if the merge would ship files
|
|
111
|
+
the verdict never saw.
|
|
112
|
+
- **BLOCK means blocked.** A BLOCK verdict is never applied and never merged — the
|
|
113
|
+
loop retries the executor instead. Overrides (`--skip-binding-check`) exist but are
|
|
114
|
+
recorded permanently in the `MergeRecord`.
|
|
115
|
+
- **Production is gated too.** After deploy, the L8 gate checks the live delivery
|
|
116
|
+
(health + payload, deterministically) and triggers a rollback when it fails.
|
|
117
|
+
|
|
118
|
+
Every run leaves a complete paper trail in `.sembl/runs/<id>/`:
|
|
119
|
+
|
|
120
|
+
```text
|
|
121
|
+
.sembl/runs/2ca41f/
|
|
122
|
+
├─ task.json # what was asked
|
|
123
|
+
├─ bounds.json # the declared contract
|
|
124
|
+
├─ change.json # the actual diff
|
|
125
|
+
├─ verdict.json # the gate's judgement + subject binding
|
|
126
|
+
├─ merge-record.json # what shipped, and under whose PASS
|
|
127
|
+
└─ trace.json # the timeline
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## The guided TUI (optional)
|
|
131
|
+
|
|
132
|
+
`pip install "sembl-stack[tui]"` adds a Textual wizard. Run bare `sembl-stack` and
|
|
133
|
+
press `r`: the stage rail runs the real loop under your configured profile,
|
|
134
|
+
streaming per-stage status (pending/running/pass/fail) live and showing the final
|
|
135
|
+
verdict — byte-identical to a headless `sembl-stack loop`, because it drives the
|
|
136
|
+
same adapters.
|
|
137
|
+
|
|
138
|
+
## The full picture
|
|
139
|
+
|
|
140
|
+
**→ [`docs/PROCESS-ACTION-PLAN.md`](docs/PROCESS-ACTION-PLAN.md)** is the single
|
|
141
|
+
source of truth: architecture, the L0–L8 stage map with build status, the eval
|
|
142
|
+
metric, locked decisions, the guided-TUI vision, and the remaining-work plan.
|
|
143
|
+
|
|
144
|
+
Reference: [`process-self-improvement.md`](docs/process-self-improvement.md)
|
|
145
|
+
(north-star theory) · [`eval-metric-O3.md`](docs/eval-metric-O3.md) (the metric) ·
|
|
146
|
+
[`memory-plane-hypothesis.md`](docs/memory-plane-hypothesis.md).
|
|
147
|
+
|
|
148
|
+
## Local development
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
uv sync --extra all
|
|
152
|
+
uv pip install -e ../sembl # or: pip install sembl
|
|
153
|
+
.venv/Scripts/python -m pytest -q # run from the repo root (corpus paths are cwd-relative)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Releasing
|
|
157
|
+
|
|
158
|
+
Publishing uses GitHub Actions + PyPI Trusted Publishing (OIDC); no tokens are
|
|
159
|
+
stored. `.github/workflows/release.yml` builds and publishes when you publish a
|
|
160
|
+
GitHub Release whose tag (`vX.Y.Z`) matches `pyproject.toml` and
|
|
161
|
+
`sembl_stack/__init__.py`.
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
Agents write the code. **sembl-stack makes the whole pipeline accountable.**
|