sembl-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. sembl_stack/__init__.py +3 -0
  2. sembl_stack/adapters/__init__.py +0 -0
  3. sembl_stack/adapters/_redact.py +19 -0
  4. sembl_stack/adapters/base.py +179 -0
  5. sembl_stack/adapters/codegraph_cbm.py +95 -0
  6. sembl_stack/adapters/deploy_vercel.py +215 -0
  7. sembl_stack/adapters/execute_aider.py +115 -0
  8. sembl_stack/adapters/execute_claude.py +114 -0
  9. sembl_stack/adapters/execute_mock.py +53 -0
  10. sembl_stack/adapters/execute_opencode.py +114 -0
  11. sembl_stack/adapters/merge_git.py +107 -0
  12. sembl_stack/adapters/postdeploy_http.py +82 -0
  13. sembl_stack/adapters/review_coderabbit.py +215 -0
  14. sembl_stack/adapters/review_llm.py +142 -0
  15. sembl_stack/adapters/review_mock.py +42 -0
  16. sembl_stack/adapters/sandbox_worktree.py +79 -0
  17. sembl_stack/adapters/spec_sembl.py +91 -0
  18. sembl_stack/adapters/verify_sembl.py +77 -0
  19. sembl_stack/artifacts.py +207 -0
  20. sembl_stack/cli.py +759 -0
  21. sembl_stack/config.py +87 -0
  22. sembl_stack/contextgraph.py +154 -0
  23. sembl_stack/doctor.py +111 -0
  24. sembl_stack/loop.py +380 -0
  25. sembl_stack/onboarding.py +272 -0
  26. sembl_stack/presets.py +114 -0
  27. sembl_stack/profile.py +193 -0
  28. sembl_stack/reconciliation.py +138 -0
  29. sembl_stack/registry.py +91 -0
  30. sembl_stack/rsi.py +188 -0
  31. sembl_stack/runner.py +134 -0
  32. sembl_stack/session.py +86 -0
  33. sembl_stack/specgraph.py +146 -0
  34. sembl_stack/store.py +112 -0
  35. sembl_stack/tracing.py +51 -0
  36. sembl_stack/transport/__init__.py +0 -0
  37. sembl_stack/transport/mcp_client.py +58 -0
  38. sembl_stack/tui.py +86 -0
  39. sembl_stack/views.py +74 -0
  40. sembl_stack/wizard.py +233 -0
  41. sembl_stack-0.1.0.dist-info/METADATA +165 -0
  42. sembl_stack-0.1.0.dist-info/RECORD +45 -0
  43. sembl_stack-0.1.0.dist-info/WHEEL +4 -0
  44. sembl_stack-0.1.0.dist-info/entry_points.txt +2 -0
  45. sembl_stack-0.1.0.dist-info/licenses/LICENSE +201 -0
sembl_stack/tracing.py ADDED
@@ -0,0 +1,51 @@
1
+ """L6 observability: Langfuse tracing, with a no-op fallback.
2
+
3
+ `span(name)` is a context manager around each loop node. If Langfuse isn't enabled or
4
+ installed, it's a no-op — the loop runs identically, just untraced.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from contextlib import contextmanager
9
+
10
+
11
+ class _NoopTracer:
12
+ enabled = False
13
+
14
+ @contextmanager
15
+ def span(self, name: str, **meta):
16
+ yield None
17
+
18
+ def flush(self):
19
+ pass
20
+
21
+
22
+ class _LangfuseTracer:
23
+ enabled = True
24
+
25
+ def __init__(self):
26
+ from langfuse import Langfuse
27
+ self._lf = Langfuse()
28
+ self._trace = self._lf.trace(name="sembl-stack-loop")
29
+
30
+ @contextmanager
31
+ def span(self, name: str, **meta):
32
+ span = self._trace.span(name=name, metadata=meta or None)
33
+ try:
34
+ yield span
35
+ finally:
36
+ span.end()
37
+
38
+ def flush(self):
39
+ try:
40
+ self._lf.flush()
41
+ except Exception:
42
+ pass
43
+
44
+
45
+ def get_tracer(langfuse: bool):
46
+ if not langfuse:
47
+ return _NoopTracer()
48
+ try:
49
+ return _LangfuseTracer()
50
+ except Exception:
51
+ return _NoopTracer()
File without changes
@@ -0,0 +1,58 @@
1
+ """L0 protocol backbone: a tiny synchronous MCP stdio client.
2
+
3
+ The platform's north star is "every layer speaks MCP." This helper spawns an MCP
4
+ server over stdio, calls one tool, and returns the parsed JSON result — synchronously,
5
+ so adapters don't have to be async.
6
+
7
+ If the `mcp` SDK isn't installed, `available()` returns False and adapters fall back
8
+ to their CLI path. That keeps the loop bootable with zero extra installs while MCP
9
+ remains the default, dogfooded transport.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from typing import Any
15
+
16
+
17
+ def available() -> bool:
18
+ try:
19
+ import mcp # noqa: F401
20
+ return True
21
+ except Exception:
22
+ return False
23
+
24
+
25
+ def call_tool(server_cmd: list[str], tool: str, arguments: dict) -> Any:
26
+ """Spawn `server_cmd` as a stdio MCP server, call `tool(arguments)`, return JSON.
27
+
28
+ Raises if the SDK is missing — callers should gate on `available()` first.
29
+ """
30
+ import anyio
31
+ from mcp import ClientSession, StdioServerParameters
32
+ from mcp.client.stdio import stdio_client
33
+
34
+ async def _run() -> Any:
35
+ params = StdioServerParameters(command=server_cmd[0], args=server_cmd[1:])
36
+ async with stdio_client(params) as (read, write):
37
+ async with ClientSession(read, write) as session:
38
+ await session.initialize()
39
+ result = await session.call_tool(tool, arguments)
40
+ return _parse(result)
41
+
42
+ return anyio.run(_run)
43
+
44
+
45
+ def _parse(result: Any) -> Any:
46
+ """Pull the JSON payload out of an MCP CallToolResult."""
47
+ content = getattr(result, "content", None) or []
48
+ for block in content:
49
+ text = getattr(block, "text", None)
50
+ if text:
51
+ try:
52
+ return json.loads(text)
53
+ except json.JSONDecodeError:
54
+ return {"text": text}
55
+ sc = getattr(result, "structuredContent", None)
56
+ if sc:
57
+ return sc
58
+ return {}
sembl_stack/tui.py ADDED
@@ -0,0 +1,86 @@
1
+ """O6 — the in-terminal run dashboard (CI-run-page UX), built on Textual.
2
+
3
+ Optional by design: Textual is an extra (`pip install "sembl-stack[tui]"`). If it isn't
4
+ installed, `available()` is False and the CLI prints an actionable hint instead of crashing —
5
+ the same degrade-don't-fail stance as the LangGraph fallback. The data comes from the shared
6
+ `views` layer, so the dashboard shows exactly what `sembl-stack runs` shows, live-refreshed.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from . import views
11
+
12
+ try:
13
+ from textual.app import App, ComposeResult
14
+ from textual.containers import Horizontal
15
+ from textual.widgets import DataTable, Footer, Header, Static
16
+ _HAVE_TEXTUAL = True
17
+ except ImportError: # textual not installed — degrade gracefully
18
+ _HAVE_TEXTUAL = False
19
+
20
+
21
+ def available() -> bool:
22
+ return _HAVE_TEXTUAL
23
+
24
+
25
+ def run_dashboard(store, refresh_s: float = 3.0) -> None:
26
+ """Launch the live dashboard. Caller must check `available()` first."""
27
+ if not _HAVE_TEXTUAL:
28
+ raise RuntimeError("textual not installed — `pip install \"sembl-stack[tui]\"`")
29
+ RunsDashboard(store, refresh_s).run()
30
+
31
+
32
+ if _HAVE_TEXTUAL:
33
+
34
+ class RunsDashboard(App):
35
+ """A two-pane dashboard: a table of runs + the highlighted run's detail."""
36
+
37
+ TITLE = "sembl-stack — runs"
38
+ BINDINGS = [("q", "quit", "Quit"), ("r", "reload", "Reload")]
39
+ CSS = """
40
+ DataTable { width: 60%; }
41
+ #detail { width: 40%; padding: 0 1; border-left: solid $accent; }
42
+ """
43
+
44
+ def __init__(self, store, refresh_s: float = 3.0):
45
+ super().__init__()
46
+ self._store = store
47
+ self._refresh_s = refresh_s
48
+
49
+ def compose(self) -> "ComposeResult":
50
+ yield Header()
51
+ with Horizontal():
52
+ yield DataTable(id="runs", cursor_type="row")
53
+ yield Static("select a run", id="detail")
54
+ yield Footer()
55
+
56
+ def on_mount(self) -> None:
57
+ table = self.query_one("#runs", DataTable)
58
+ table.add_columns("run", "status", "att", "latency", "task")
59
+ self._reload()
60
+ if self._refresh_s:
61
+ self.set_interval(self._refresh_s, self._reload)
62
+
63
+ def action_reload(self) -> None:
64
+ self._reload()
65
+
66
+ def _reload(self) -> None:
67
+ table = self.query_one("#runs", DataTable)
68
+ keep = table.cursor_row
69
+ table.clear()
70
+ for r in views.list_rows(self._store):
71
+ task = (r["task"][:48] + "…") if len(r["task"]) > 49 else r["task"]
72
+ table.add_row(r["id"], r["status"], str(r["attempts"]),
73
+ r["latency"], task, key=r["id"])
74
+ if table.row_count:
75
+ table.move_cursor(row=min(keep, table.row_count - 1))
76
+ self._show(table.coordinate_to_cell_key((table.cursor_row, 0)).row_key.value)
77
+
78
+ def on_data_table_row_highlighted(self, event) -> None:
79
+ self._show(event.row_key.value)
80
+
81
+ def _show(self, run_id) -> None:
82
+ if not run_id:
83
+ return
84
+ lines = views.detail_lines(self._store, run_id)
85
+ self.query_one("#detail", Static).update(
86
+ "\n".join(lines) if lines else "no detail")
sembl_stack/views.py ADDED
@@ -0,0 +1,74 @@
1
+ """Run-store presentation layer — pure functions shared by the CLI and the TUI (O6).
2
+
3
+ Keeping the "what to show" logic here (no click, no textual) means the run list and the
4
+ single-run detail render identically whether you type `sembl-stack runs` or watch the live
5
+ dashboard, and the formatting is unit-testable without spinning up either UI.
6
+ """
7
+ from __future__ import annotations
8
+
9
+
10
+ def list_rows(store) -> list[dict]:
11
+ """One summary row per recorded run, newest first."""
12
+ rows = []
13
+ for rid in store.list_runs():
14
+ m = store.open(rid).manifest()
15
+ lat = m.get("total_latency_s")
16
+ rows.append({
17
+ "id": rid,
18
+ "status": m.get("status", "?"),
19
+ "attempts": m.get("attempts", "-"),
20
+ "latency": f"{lat:.2f}s" if isinstance(lat, (int, float)) else "-",
21
+ "task": (m.get("task", {}) or {}).get("text", ""),
22
+ })
23
+ return rows
24
+
25
+
26
+ def detail_lines(store, run_id: str) -> list[str] | None:
27
+ """Plain-text detail for one run (task, bounds, per-attempt verdicts, final), or None."""
28
+ run = store.open(run_id)
29
+ m = run.manifest()
30
+ if not m:
31
+ return None
32
+ lat = m.get("total_latency_s")
33
+ lat_s = f"{lat:.2f}s" if isinstance(lat, (int, float)) else "-"
34
+ out = [
35
+ f"run {run_id}",
36
+ f" status: {m.get('status','?')} attempts={m.get('attempts','-')} "
37
+ f"engine={m.get('engine','-')} latency={lat_s}",
38
+ ]
39
+ task = m.get("task", {}) or {}
40
+ if task:
41
+ out.append(f" task: {task.get('text','')}")
42
+ out.append(f" repo: {task.get('repo','')}")
43
+ bounds = run.get("bounds")
44
+ if bounds is not None:
45
+ out.append(f" bounds: editable={bounds.editable_paths} "
46
+ f"forbidden={bounds.forbidden_areas} churn={bounds.churn_budget}")
47
+
48
+ log = {e.get("attempt"): e for e in m.get("attempts_log", [])}
49
+ n = m.get("attempts") or 0
50
+ if n:
51
+ out.append(" attempts:")
52
+ for i in range(1, n + 1):
53
+ v = run.get(f"verdict-{i}")
54
+ meta = log.get(i, {})
55
+ status = v.status if v else "?"
56
+ extra = f" model={meta['model']}" if meta.get("model") else ""
57
+ out.append(f" {i}: [{status}] latency={meta.get('latency_s','-')}s{extra}")
58
+ out += [f" - {r}" for r in (v.reasons if v else [])]
59
+
60
+ fv = run.get("verdict")
61
+ if fv is not None:
62
+ out.append(f" final: {fv.status}")
63
+ change = run.get("change")
64
+ if change is None and n:
65
+ change = run.get(f"change-{n}")
66
+ if change is not None:
67
+ files = (getattr(change, "report", {}) or {}).get("files_modified") or []
68
+ suffix = f" files={files}" if files else ""
69
+ out.append(f" patch: change.json{suffix}")
70
+ if fv is not None and fv.status in ("PASS", "WARN"):
71
+ warn = " --allow-warn" if fv.status == "WARN" else ""
72
+ out.append(f" apply: sembl-stack apply {run_id} --repo {task.get('repo','.')}{warn}")
73
+ out.append(f" artifacts: {run.dir}")
74
+ return out
sembl_stack/wizard.py ADDED
@@ -0,0 +1,233 @@
1
+ """Guided surface — the bare-`sembl-stack` Textual wizard (O6, elevates C4).
2
+
3
+ Phase 0: a New/Existing choice, a stage rail (CI-run-page UX), and leave/continue-anywhere
4
+ resume via the `session.json` pointer (see `session.py`).
5
+
6
+ Phase 2: the stage rail actually RUNS the loop under the configured profile — press `r`
7
+ and the real `loop.run` (plan -> execute -> verify, retry-on-BLOCK) executes in a worker
8
+ thread against the repo's `task.yaml`, streaming per-stage status (pending/running/pass/
9
+ fail) into the rail and showing the final verdict panel. The orchestration glue is
10
+ `runner.py` (pure, headless); the wizard only renders its events — it adds NO core/gate
11
+ logic, so a TUI run and a headless `sembl-stack loop` run are byte-identical.
12
+
13
+ Deliberately NOT in Phase 2 (see docs/PROCESS-ACTION-PLAN.md §9 Track 2 item 5):
14
+ TODO(plan §9.5): CBM `index_repository` trigger on the Existing-repo path.
15
+ TODO(plan §9.5): reconcile (S9) advisory panel.
16
+ TODO(plan §9.5): live deploy/postdeploy panels + MurphyScan readiness screen.
17
+
18
+ Textual is an extra (`pip install "sembl-stack[tui]"`); if it isn't installed,
19
+ `available()` is False and the caller prints an actionable hint instead of crashing.
20
+ """
21
+ from __future__ import annotations
22
+
23
+ from . import runner
24
+ from .session import STAGES, Session, resume_or_new, save
25
+
26
+ try:
27
+ from textual.app import App, ComposeResult
28
+ from textual.containers import Horizontal, Vertical
29
+ from textual.widgets import Button, Footer, Header, Static
30
+ _HAVE_TEXTUAL = True
31
+ except ImportError: # textual not installed — degrade gracefully
32
+ _HAVE_TEXTUAL = False
33
+
34
+
35
+ def available() -> bool:
36
+ return _HAVE_TEXTUAL
37
+
38
+
39
+ def launch(repo: str = ".") -> None:
40
+ """Launch the guided wizard. Caller must check `available()` first."""
41
+ if not _HAVE_TEXTUAL:
42
+ raise RuntimeError("textual not installed — `pip install \"sembl-stack[tui]\"`")
43
+ StackWizard(repo=repo).run()
44
+
45
+
46
+ # Live-run stage marks (Phase 2), layered over the session marks (Phase 0).
47
+ _LIVE_MARK = {"running": "~", "done": "x", "fail": "!"}
48
+
49
+
50
+ def _rail_text(s: Session, live: dict | None = None) -> str:
51
+ """The stage rail as plain text: [x] done, [>] current, [ ] pending;
52
+ live-run states win: [~] running, [!] failed."""
53
+ live = live or {}
54
+ lines = [f"repo: {s.repo}", f"mode: {s.mode}", ""]
55
+ for stage in STAGES:
56
+ if stage in live:
57
+ mark = _LIVE_MARK.get(live[stage]["state"], "?")
58
+ detail = live[stage].get("detail", "")
59
+ suffix = f" ({detail})" if detail else ""
60
+ else:
61
+ mark = "x" if stage in s.completed else (">" if stage == s.current_stage else " ")
62
+ suffix = ""
63
+ lines.append(f" [{mark}] {stage}{suffix}")
64
+ if s.done:
65
+ lines.append("\n all stages complete.")
66
+ return "\n".join(lines)
67
+
68
+
69
+ def _verdict_text(result) -> str:
70
+ """The final-verdict panel line(s) for a finished live run."""
71
+ v = result.verdict
72
+ lines = [f"FINAL: {v.status} (after {result.attempts} attempt(s))"]
73
+ for r in getattr(v, "reasons", []) or []:
74
+ lines.append(f" - {r}")
75
+ if result.run_id:
76
+ lines.append(f"run: {result.run_id} (.sembl/runs/{result.run_id}/)")
77
+ return "\n".join(lines)
78
+
79
+
80
+ if _HAVE_TEXTUAL:
81
+
82
+ class StackWizard(App):
83
+ """Bare-`sembl-stack` guided wizard: New/Existing + stage rail + session resume
84
+ + Phase-2 live run (`r` runs task.yaml through the real loop)."""
85
+
86
+ TITLE = "sembl-stack"
87
+ SUB_TITLE = "guided run"
88
+ BINDINGS = [
89
+ ("q", "quit", "Quit"),
90
+ ("n", "mode_new", "New repo"),
91
+ ("e", "mode_existing", "Existing repo"),
92
+ ("space", "advance", "Advance stage"),
93
+ ("r", "run_loop", "Run task.yaml"),
94
+ ]
95
+ CSS = """
96
+ #mode { width: 30%; padding: 1; border-right: solid $accent; }
97
+ #right { width: 70%; }
98
+ #rail { padding: 1; height: auto; }
99
+ #verdict { padding: 1; height: auto; color: $text-muted; }
100
+ Button { width: 100%; margin: 0 0 1 0; }
101
+ """
102
+
103
+ def __init__(self, repo: str = ".", session: "Session | None" = None):
104
+ super().__init__()
105
+ self._session = session or resume_or_new(repo)
106
+ self._live: dict = {} # stage -> {"state", "detail"} during a live run
107
+ self._loop_running = False
108
+
109
+ def compose(self) -> "ComposeResult":
110
+ yield Header()
111
+ with Horizontal():
112
+ with Vertical(id="mode"):
113
+ yield Static("New or existing?", id="mode-label")
114
+ yield Button("New repo", id="mode-new", variant="primary")
115
+ yield Button("Existing repo", id="mode-existing")
116
+ with Vertical(id="right"):
117
+ yield Static(_rail_text(self._session), id="rail")
118
+ yield Static("", id="verdict")
119
+ yield Footer()
120
+
121
+ # -- actions ------------------------------------------------------------
122
+ def _set_mode(self, mode: str) -> None:
123
+ self._session.mode = mode
124
+ save(self._session)
125
+ self._refresh()
126
+
127
+ def action_mode_new(self) -> None:
128
+ self._set_mode("new")
129
+
130
+ def action_mode_existing(self) -> None:
131
+ self._set_mode("existing")
132
+
133
+ def action_advance(self) -> None:
134
+ self._session.advance()
135
+ save(self._session)
136
+ self._refresh()
137
+
138
+ def on_button_pressed(self, event) -> None:
139
+ if event.button.id == "mode-new":
140
+ self._set_mode("new")
141
+ elif event.button.id == "mode-existing":
142
+ self._set_mode("existing")
143
+
144
+ # -- Phase 2: run the real loop under the profile ------------------------
145
+ def action_run_loop(self) -> None:
146
+ if self._loop_running:
147
+ self._note("a run is already in progress…")
148
+ return
149
+ task = runner.load_task(self._session.repo)
150
+ if task is None:
151
+ self._note(f"no task.yaml in {self._session.repo} — "
152
+ "`sembl-stack init` scaffolds one.")
153
+ return
154
+ cfg = runner.resolve_config(self._session.repo)
155
+ self._loop_running = True
156
+ self._live = {}
157
+ self._note("running… (plan -> execute -> verify)")
158
+ self._refresh()
159
+ self.run_worker(self._run_loop_async(cfg, task), exclusive=True)
160
+
161
+ async def _run_loop_async(self, cfg, task) -> None:
162
+ """Run the blocking loop in an executor; drain stage events on the app's
163
+ own event loop via a thread-safe queue.
164
+
165
+ The loop's stage functions call `emit` from the executor thread, so `emit`
166
+ only enqueues (thread-safe, non-blocking) — every UI mutation happens here,
167
+ on the app thread. This deliberately avoids `call_from_thread`, whose
168
+ blocking round-trip deadlocks a threaded worker under Textual's `run_test`
169
+ harness (the pilot drives the loop, so the worker's blocked wait never
170
+ resolves)."""
171
+ import asyncio
172
+ import queue as _queue
173
+
174
+ events: "_queue.Queue" = _queue.Queue()
175
+
176
+ def emit(ev) -> None:
177
+ events.put(("event", ev))
178
+
179
+ def blocking() -> None:
180
+ try:
181
+ events.put(("done", runner.run_stages(cfg, task, emit)))
182
+ except Exception as exc: # loop crash (plan/verify raised)
183
+ events.put(("crash", exc))
184
+
185
+ loop = asyncio.get_running_loop()
186
+ fut = loop.run_in_executor(None, blocking)
187
+ terminal = None
188
+ while terminal is None:
189
+ try:
190
+ kind, payload = events.get_nowait()
191
+ except _queue.Empty:
192
+ await asyncio.sleep(0.02)
193
+ continue
194
+ if kind == "event":
195
+ self._on_stage_event(payload)
196
+ else:
197
+ terminal = (kind, payload)
198
+ await fut # surface any executor teardown error
199
+ if terminal[0] == "done":
200
+ self._on_run_done(terminal[1])
201
+ else:
202
+ self._on_run_crashed(terminal[1])
203
+
204
+ def _on_stage_event(self, ev) -> None:
205
+ self._live[ev.stage] = {"state": ev.state, "detail": ev.detail}
206
+ self._refresh()
207
+
208
+ def _on_run_done(self, result) -> None:
209
+ self._loop_running = False
210
+ if result.verdict.status in ("PASS", "WARN"):
211
+ # The loop-backed stages are genuinely complete — record the resume
212
+ # pointer just past them (leave/continue-anywhere, Phase 0 semantics).
213
+ for stage in ("bounds", "loop", "verify"):
214
+ if stage not in self._session.completed:
215
+ self._session.completed.append(stage)
216
+ self._session.current_stage = "merge"
217
+ self._session.run_id = result.run_id
218
+ save(self._session)
219
+ self._note(_verdict_text(result))
220
+ self._refresh()
221
+
222
+ def _on_run_crashed(self, exc: Exception) -> None:
223
+ self._loop_running = False
224
+ self._note(f"run crashed: {exc!r}")
225
+ self._refresh()
226
+
227
+ # -- rendering ------------------------------------------------------------
228
+ def _note(self, text: str) -> None:
229
+ self.query_one("#verdict", Static).update(text)
230
+
231
+ def _refresh(self) -> None:
232
+ self.query_one("#rail", Static).update(
233
+ _rail_text(self._session, self._live))
@@ -0,0 +1,165 @@
1
+ Metadata-Version: 2.4
2
+ Name: sembl-stack
3
+ Version: 0.1.0
4
+ Summary: A swappable, spec-driven software factory: task to bounds, an agent writes in a sandbox, the Sembl gate judges the diff, PASS merges, deploys, and a post-deploy gate confirms or rolls back. Every stage an adapter behind one typed contract.
5
+ License-Expression: Apache-2.0
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.10
8
+ Requires-Dist: click>=8.1
9
+ Requires-Dist: langgraph>=0.2
10
+ Requires-Dist: mcp>=1.0
11
+ Requires-Dist: pyyaml>=6.0
12
+ Requires-Dist: sembl>=0.1.20
13
+ Provides-Extra: all
14
+ Requires-Dist: langfuse>=2.0; extra == 'all'
15
+ Requires-Dist: langgraph>=0.2; extra == 'all'
16
+ Requires-Dist: mcp>=1.0; extra == 'all'
17
+ Requires-Dist: sembl>=0.1.20; extra == 'all'
18
+ Requires-Dist: textual>=0.50; extra == 'all'
19
+ Provides-Extra: gate
20
+ Requires-Dist: sembl>=0.1.20; extra == 'gate'
21
+ Provides-Extra: mcp
22
+ Requires-Dist: mcp>=1.0; extra == 'mcp'
23
+ Provides-Extra: orchestrate
24
+ Requires-Dist: langgraph>=0.2; extra == 'orchestrate'
25
+ Provides-Extra: trace
26
+ Requires-Dist: langfuse>=2.0; extra == 'trace'
27
+ Provides-Extra: tui
28
+ Requires-Dist: textual>=0.50; extra == 'tui'
29
+ Description-Content-Type: text/markdown
30
+
31
+ # sembl-stack
32
+
33
+ [![License: Apache 2.0](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](LICENSE)
34
+ [![Built around Sembl](https://img.shields.io/badge/gate-Sembl-ccff00.svg)](https://github.com/speedvibecode/sembl)
35
+
36
+ **A swappable, spec-driven software factory.** A task becomes declared bounds, an
37
+ agent writes the change inside a disposable sandbox, the **[Sembl](https://github.com/speedvibecode/sembl)
38
+ gate** judges the real diff against those bounds, a PASS merges and deploys, and a
39
+ post-deploy gate confirms it's healthy — or rolls it back. **Every stage is an
40
+ interchangeable adapter behind one typed artifact contract, and every run is
41
+ recorded.**
42
+
43
+ We sell **process correctness** — the change did what the spec declared, stayed in
44
+ bounds, is honestly evidenced, and reached production accountably — **never "the
45
+ model writes better code."** The stack takes no side in the agent wars: swap the
46
+ executor, the sandbox, or the deploy target with one line of config and the rest of
47
+ the pipeline doesn't notice.
48
+
49
+ [Website](https://sembl-stack.vercel.app) · [The gate (Sembl)](https://sembl.vercel.app) · [Architecture & plan](docs/PROCESS-ACTION-PLAN.md)
50
+
51
+ ```text
52
+ task ─▶ bounds ─▶ execute ─▶ sandbox ─▶ SEMBL GATE ─▶ merge ─▶ deploy ─▶ verify-in-prod
53
+ (L2) (L3) (L4) (L5) (L6.5) (L7) (L8)
54
+ every arrow is a typed artifact on disk
55
+ ```
56
+
57
+ ## Quickstart
58
+
59
+ ```bash
60
+ pip install sembl-stack sembl # the stack + the gate it runs at its core
61
+ sembl-stack init # scaffold sembl.stack.yaml + task.yaml from a preset
62
+ sembl-stack doctor # config-aware preflight
63
+ sembl-stack loop task.yaml # plan → execute → gate → retry-on-BLOCK
64
+ sembl-stack runs [<id>] # list / inspect runs
65
+ sembl-stack apply <id> # apply the accepted patch (a BLOCK is never applied)
66
+ ```
67
+
68
+ **Presets** (`sembl-stack init --preset …`):
69
+
70
+ | Preset | What runs | Needs |
71
+ |--------|-----------|-------|
72
+ | `just-gate` | gate any diff, nothing else | only `sembl` |
73
+ | `gate+sandbox` | the whole loop with a mock executor | no API keys |
74
+ | `full-loop` | real agent + sandbox + gate | an executor key |
75
+
76
+ Swap any layer in `sembl.stack.yaml` — e.g. `execute: opencode`, `execute: aider` —
77
+ with no code change.
78
+
79
+ ## The stage map (L0–L8)
80
+
81
+ Each stage consumes and produces typed artifacts; that hand-off *is* the whole
82
+ interface, which is what makes every stage swappable.
83
+
84
+ | Stage | Does | Artifact flow | Who owns it |
85
+ |-------|------|---------------|-------------|
86
+ | **L0** Protocol & hub | one wire between stages | — | **we own** (the contract) |
87
+ | **L1** Repo intel | code-graph context | `Task → Context` | adapter |
88
+ | **L2** Spec → bounds | scope the change | `Task → Bounds` | **we own** (`sembl`) |
89
+ | **L3** Execute | write the change | `Task + Bounds → Change` | adapter (claude / aider / opencode) |
90
+ | **L4** Sandbox | contain a bad diff | `Change → Change` | adapter (disposable clone) |
91
+ | **L5** Verify | gate the diff | `Change + Bounds → Verdict` | **the gate** (`sembl`) |
92
+ | **L5.5** Review (advisory) | code-quality signal | `diff → findings` | adapter (`llm` — BYO agent-CLI reviewer; CodeRabbit optional) |
93
+ | **L6** Orchestrate | loop, retry, trace | wiring + `* → Trace` | **we own** (LangGraph) |
94
+ | **L6.5** Merge | gated merge | `Verdict(PASS) → MergeRecord` | **we own** |
95
+ | **L7** Deploy | ship | `Verdict(PASS) → Delivery` | adapter |
96
+ | **L8** Verify-in-prod | gate production | `Delivery → Verdict` | **the gate** (health + rollback) |
97
+
98
+ We own exactly three things: the **artifact contract + stage Protocol**, the **gate
99
+ (L5 + the post-deploy L8)**, and the **glue + layer-replacement protocol**.
100
+ Everything else is deliberately a best-in-class tool behind an interface.
101
+
102
+ ## The accountable spine
103
+
104
+ A verdict is bound to the change it judged — most agent pipelines stop at "the check
105
+ passed"; this one guarantees a verdict can only ship the exact change it was issued
106
+ for:
107
+
108
+ - **Verdicts carry their subject.** Every verdict is stamped with the SHA-256 and
109
+ file set of the diff it judged. `apply` recomputes the patch hash and refuses a
110
+ verdict issued for a different patch; `merge` refuses if the merge would ship files
111
+ the verdict never saw.
112
+ - **BLOCK means blocked.** A BLOCK verdict is never applied and never merged — the
113
+ loop retries the executor instead. Overrides (`--skip-binding-check`) exist but are
114
+ recorded permanently in the `MergeRecord`.
115
+ - **Production is gated too.** After deploy, the L8 gate checks the live delivery
116
+ (health + payload, deterministically) and triggers a rollback when it fails.
117
+
118
+ Every run leaves a complete paper trail in `.sembl/runs/<id>/`:
119
+
120
+ ```text
121
+ .sembl/runs/2ca41f/
122
+ ├─ task.json # what was asked
123
+ ├─ bounds.json # the declared contract
124
+ ├─ change.json # the actual diff
125
+ ├─ verdict.json # the gate's judgement + subject binding
126
+ ├─ merge-record.json # what shipped, and under whose PASS
127
+ └─ trace.json # the timeline
128
+ ```
129
+
130
+ ## The guided TUI (optional)
131
+
132
+ `pip install "sembl-stack[tui]"` adds a Textual wizard. Run bare `sembl-stack` and
133
+ press `r`: the stage rail runs the real loop under your configured profile,
134
+ streaming per-stage status (pending/running/pass/fail) live and showing the final
135
+ verdict — byte-identical to a headless `sembl-stack loop`, because it drives the
136
+ same adapters.
137
+
138
+ ## The full picture
139
+
140
+ **→ [`docs/PROCESS-ACTION-PLAN.md`](docs/PROCESS-ACTION-PLAN.md)** is the single
141
+ source of truth: architecture, the L0–L8 stage map with build status, the eval
142
+ metric, locked decisions, the guided-TUI vision, and the remaining-work plan.
143
+
144
+ Reference: [`process-self-improvement.md`](docs/process-self-improvement.md)
145
+ (north-star theory) · [`eval-metric-O3.md`](docs/eval-metric-O3.md) (the metric) ·
146
+ [`memory-plane-hypothesis.md`](docs/memory-plane-hypothesis.md).
147
+
148
+ ## Local development
149
+
150
+ ```bash
151
+ uv sync --extra all
152
+ uv pip install -e ../sembl # or: pip install sembl
153
+ .venv/Scripts/python -m pytest -q # run from the repo root (corpus paths are cwd-relative)
154
+ ```
155
+
156
+ ## Releasing
157
+
158
+ Publishing uses GitHub Actions + PyPI Trusted Publishing (OIDC); no tokens are
159
+ stored. `.github/workflows/release.yml` builds and publishes when you publish a
160
+ GitHub Release whose tag (`vX.Y.Z`) matches `pyproject.toml` and
161
+ `sembl_stack/__init__.py`.
162
+
163
+ ---
164
+
165
+ Agents write the code. **sembl-stack makes the whole pipeline accountable.**