sembl-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sembl_stack/__init__.py +3 -0
- sembl_stack/adapters/__init__.py +0 -0
- sembl_stack/adapters/_redact.py +19 -0
- sembl_stack/adapters/base.py +179 -0
- sembl_stack/adapters/codegraph_cbm.py +95 -0
- sembl_stack/adapters/deploy_vercel.py +215 -0
- sembl_stack/adapters/execute_aider.py +115 -0
- sembl_stack/adapters/execute_claude.py +114 -0
- sembl_stack/adapters/execute_mock.py +53 -0
- sembl_stack/adapters/execute_opencode.py +114 -0
- sembl_stack/adapters/merge_git.py +107 -0
- sembl_stack/adapters/postdeploy_http.py +82 -0
- sembl_stack/adapters/review_coderabbit.py +215 -0
- sembl_stack/adapters/review_llm.py +142 -0
- sembl_stack/adapters/review_mock.py +42 -0
- sembl_stack/adapters/sandbox_worktree.py +79 -0
- sembl_stack/adapters/spec_sembl.py +91 -0
- sembl_stack/adapters/verify_sembl.py +77 -0
- sembl_stack/artifacts.py +207 -0
- sembl_stack/cli.py +759 -0
- sembl_stack/config.py +87 -0
- sembl_stack/contextgraph.py +154 -0
- sembl_stack/doctor.py +111 -0
- sembl_stack/loop.py +380 -0
- sembl_stack/onboarding.py +272 -0
- sembl_stack/presets.py +114 -0
- sembl_stack/profile.py +193 -0
- sembl_stack/reconciliation.py +138 -0
- sembl_stack/registry.py +91 -0
- sembl_stack/rsi.py +188 -0
- sembl_stack/runner.py +134 -0
- sembl_stack/session.py +86 -0
- sembl_stack/specgraph.py +146 -0
- sembl_stack/store.py +112 -0
- sembl_stack/tracing.py +51 -0
- sembl_stack/transport/__init__.py +0 -0
- sembl_stack/transport/mcp_client.py +58 -0
- sembl_stack/tui.py +86 -0
- sembl_stack/views.py +74 -0
- sembl_stack/wizard.py +233 -0
- sembl_stack-0.1.0.dist-info/METADATA +165 -0
- sembl_stack-0.1.0.dist-info/RECORD +45 -0
- sembl_stack-0.1.0.dist-info/WHEEL +4 -0
- sembl_stack-0.1.0.dist-info/entry_points.txt +2 -0
- sembl_stack-0.1.0.dist-info/licenses/LICENSE +201 -0
sembl_stack/rsi.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""RSI-L1 readout — measured selection over the run store (north-star first rung).
|
|
2
|
+
|
|
3
|
+
The north star (docs/process-self-improvement.md) climbs L0 (manual swap) -> L1 (measured
|
|
4
|
+
selection): pick executors on RECORDED signal, not vibes. This module is that signal,
|
|
5
|
+
aggregated: per executor, over every run recorded in `.sembl/runs/`, how often the loop
|
|
6
|
+
went green, in how many iterations, and at what cost.
|
|
7
|
+
|
|
8
|
+
Honesty rules [LOCKED]:
|
|
9
|
+
* everything here is read back from run-store artifacts the loop already persisted —
|
|
10
|
+
nothing is estimated, sampled, or modeled;
|
|
11
|
+
* cost/tokens appear ONLY when the executor adapter reported usage (the C1.3
|
|
12
|
+
`attempts_log` hook in `loop.py`); runs recorded before an adapter reported usage
|
|
13
|
+
show "not yet recorded" — never an invented number;
|
|
14
|
+
* "green" = the loop accepted the run (final PASS or WARN; WARN is counted separately
|
|
15
|
+
so a WARN-heavy executor can't hide). iterations-to-green = the first attempt whose
|
|
16
|
+
verdict the loop accepted.
|
|
17
|
+
|
|
18
|
+
Pure and headless: no Textual, no click — `cli.py rsi` renders it.
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from .store import RunStore
|
|
23
|
+
|
|
24
|
+
GREEN = ("PASS", "WARN") # statuses the loop accepts (WARN = accepted-with-caveat)
|
|
25
|
+
UNKNOWN_EXECUTOR = "(unrecorded)" # runs whose attempts_log never named an agent
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# --- per-run extraction ---------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
def run_record(run) -> dict | None:
|
|
31
|
+
"""One run's manifest+artifacts distilled into the RSI row. None if no manifest."""
|
|
32
|
+
m = run.manifest()
|
|
33
|
+
if not m:
|
|
34
|
+
return None
|
|
35
|
+
log = m.get("attempts_log", []) or []
|
|
36
|
+
agent = next((e.get("agent") for e in log if e.get("agent")), None) or UNKNOWN_EXECUTOR
|
|
37
|
+
models = sorted({e["model"] for e in log if e.get("model")})
|
|
38
|
+
status = m.get("status", "?")
|
|
39
|
+
attempts = m.get("attempts")
|
|
40
|
+
if not isinstance(attempts, int) or attempts < 1:
|
|
41
|
+
attempts = len(log) or None
|
|
42
|
+
|
|
43
|
+
tokens = [e["tokens"] for e in log if isinstance(e.get("tokens"), (int, float))]
|
|
44
|
+
costs = [e["cost"] for e in log if isinstance(e.get("cost"), (int, float))]
|
|
45
|
+
latency = m.get("total_latency_s")
|
|
46
|
+
if not isinstance(latency, (int, float)):
|
|
47
|
+
lat = [e["latency_s"] for e in log if isinstance(e.get("latency_s"), (int, float))]
|
|
48
|
+
latency = round(sum(lat), 3) if lat else None
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
"id": m.get("id", run.id),
|
|
52
|
+
"agent": agent,
|
|
53
|
+
"models": models,
|
|
54
|
+
"status": status,
|
|
55
|
+
"attempts": attempts,
|
|
56
|
+
"iters_to_green": _iters_to_green(run, status, attempts),
|
|
57
|
+
"latency_s": latency,
|
|
58
|
+
"tokens": sum(tokens) if tokens else None,
|
|
59
|
+
"cost": round(sum(costs), 6) if costs else None,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _iters_to_green(run, status: str, attempts: int | None) -> int | None:
|
|
64
|
+
"""First attempt whose verdict the loop accepted (PASS/WARN), from the per-attempt
|
|
65
|
+
verdict artifacts; falls back to the manifest attempt count when the per-attempt
|
|
66
|
+
verdicts are missing but the run ended green (the loop stops at the first accept,
|
|
67
|
+
so the two agree by construction). None when the run never went green."""
|
|
68
|
+
if attempts:
|
|
69
|
+
for i in range(1, attempts + 1):
|
|
70
|
+
v = run.get(f"verdict-{i}")
|
|
71
|
+
if v is not None and getattr(v, "status", None) in GREEN:
|
|
72
|
+
return i
|
|
73
|
+
if status in GREEN and attempts:
|
|
74
|
+
return attempts
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# --- aggregation -----------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
def aggregate(store: RunStore) -> dict:
|
|
81
|
+
"""Group every recorded run by executor and compute the RSI-L1 measures."""
|
|
82
|
+
rows = []
|
|
83
|
+
for rid in store.list_runs():
|
|
84
|
+
rec = run_record(store.open(rid))
|
|
85
|
+
if rec is not None:
|
|
86
|
+
rows.append(rec)
|
|
87
|
+
|
|
88
|
+
groups: dict[str, list[dict]] = {}
|
|
89
|
+
for r in rows:
|
|
90
|
+
groups.setdefault(r["agent"], []).append(r)
|
|
91
|
+
|
|
92
|
+
executors = []
|
|
93
|
+
for agent in sorted(groups):
|
|
94
|
+
rs = groups[agent]
|
|
95
|
+
n = len(rs)
|
|
96
|
+
n_pass = sum(1 for r in rs if r["status"] == "PASS")
|
|
97
|
+
n_warn = sum(1 for r in rs if r["status"] == "WARN")
|
|
98
|
+
n_block = sum(1 for r in rs if r["status"] == "BLOCK")
|
|
99
|
+
n_other = n - n_pass - n_warn - n_block # failed / started / unknown
|
|
100
|
+
iters = [r["iters_to_green"] for r in rs if r["iters_to_green"] is not None]
|
|
101
|
+
lats = [r["latency_s"] for r in rs if r["latency_s"] is not None]
|
|
102
|
+
costed = [r for r in rs if r["cost"] is not None]
|
|
103
|
+
tokened = [r for r in rs if r["tokens"] is not None]
|
|
104
|
+
models = sorted({m for r in rs for m in r["models"]})
|
|
105
|
+
executors.append({
|
|
106
|
+
"executor": agent,
|
|
107
|
+
"models": models,
|
|
108
|
+
"runs": n,
|
|
109
|
+
"pass": n_pass, "warn": n_warn, "block": n_block, "other": n_other,
|
|
110
|
+
"green_rate": _rate(n_pass + n_warn, n),
|
|
111
|
+
"block_rate": _rate(n_block, n),
|
|
112
|
+
"iters_to_green": {
|
|
113
|
+
"n": len(iters),
|
|
114
|
+
"mean": round(sum(iters) / len(iters), 2) if iters else None,
|
|
115
|
+
"min": min(iters) if iters else None,
|
|
116
|
+
"max": max(iters) if iters else None,
|
|
117
|
+
},
|
|
118
|
+
"latency_s": {
|
|
119
|
+
"n": len(lats),
|
|
120
|
+
"total": round(sum(lats), 3) if lats else None,
|
|
121
|
+
"mean": round(sum(lats) / len(lats), 3) if lats else None,
|
|
122
|
+
},
|
|
123
|
+
# Honest cost: sums cover ONLY the runs whose adapter reported usage.
|
|
124
|
+
"cost_usd": {
|
|
125
|
+
"runs_recorded": len(costed),
|
|
126
|
+
"total": round(sum(r["cost"] for r in costed), 6) if costed else None,
|
|
127
|
+
},
|
|
128
|
+
"tokens": {
|
|
129
|
+
"runs_recorded": len(tokened),
|
|
130
|
+
"total": sum(r["tokens"] for r in tokened) if tokened else None,
|
|
131
|
+
},
|
|
132
|
+
})
|
|
133
|
+
|
|
134
|
+
return {
|
|
135
|
+
"store": str(store.root),
|
|
136
|
+
"n_runs": len(rows),
|
|
137
|
+
"executors": executors,
|
|
138
|
+
"runs": rows,
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _rate(n: int, d: int) -> float | None:
|
|
143
|
+
return round(n / d, 3) if d else None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# --- rendering ---------------------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
def render(summary: dict) -> str:
|
|
149
|
+
"""The self-explanatory table. Every number traces to a run-store artifact."""
|
|
150
|
+
lines = [
|
|
151
|
+
"RSI-L1 — measured selection (per-executor signal from the run store)",
|
|
152
|
+
f"store: {summary['store']} runs: {summary['n_runs']}",
|
|
153
|
+
"",
|
|
154
|
+
" green = the loop accepted the run (final PASS or WARN)",
|
|
155
|
+
" iters = attempts until the first accepted verdict (mean over green runs)",
|
|
156
|
+
" cost = USD summed over runs whose executor reported usage;",
|
|
157
|
+
" 'not yet recorded' means the adapter surfaced no usage — no number",
|
|
158
|
+
" is ever invented for those runs.",
|
|
159
|
+
"",
|
|
160
|
+
]
|
|
161
|
+
if not summary["executors"]:
|
|
162
|
+
lines.append("no runs recorded yet — `sembl-stack loop task.yaml` starts the feed.")
|
|
163
|
+
return "\n".join(lines)
|
|
164
|
+
|
|
165
|
+
hdr = (f" {'executor':16} {'runs':>4} {'green':>6} {'block':>6} "
|
|
166
|
+
f"{'iters':>6} {'latency':>9} {'cost (USD)':>18}")
|
|
167
|
+
lines += [hdr, " " + "-" * (len(hdr) - 2)]
|
|
168
|
+
for e in summary["executors"]:
|
|
169
|
+
iters = e["iters_to_green"]["mean"]
|
|
170
|
+
lat = e["latency_s"]["mean"]
|
|
171
|
+
cost = e["cost_usd"]
|
|
172
|
+
if cost["total"] is not None:
|
|
173
|
+
cost_s = f"{cost['total']:.4f} ({cost['runs_recorded']}/{e['runs']} runs)"
|
|
174
|
+
else:
|
|
175
|
+
cost_s = "not yet recorded"
|
|
176
|
+
lines.append(
|
|
177
|
+
f" {e['executor']:16} {e['runs']:>4} "
|
|
178
|
+
f"{_pct(e['green_rate']):>6} {_pct(e['block_rate']):>6} "
|
|
179
|
+
f"{iters if iters is not None else '-':>6} "
|
|
180
|
+
f"{(f'{lat:.1f}s' if lat is not None else '-'):>9} "
|
|
181
|
+
f"{cost_s:>18}")
|
|
182
|
+
if e["models"]:
|
|
183
|
+
lines.append(f" {'':16} models: {', '.join(e['models'])}")
|
|
184
|
+
return "\n".join(lines)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _pct(x: float | None) -> str:
|
|
188
|
+
return f"{x * 100:.0f}%" if x is not None else "-"
|
sembl_stack/runner.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""TUI Phase 2 orchestration glue — run the REAL loop and stream stage transitions.
|
|
2
|
+
|
|
3
|
+
Pure, headless, no Textual: the wizard (and any future surface) drives `run_stages` in a
|
|
4
|
+
worker thread and receives `StageEvent`s as the loop's stage functions actually execute —
|
|
5
|
+
plan (L2 -> rail "bounds"), execute (L3+L4 -> rail "loop"), verify (L5 -> rail "verify").
|
|
6
|
+
No new core/gate logic: the events come from thin proxies wrapped around the SAME adapter
|
|
7
|
+
objects `loop.run` already calls, so a TUI run and a headless `sembl-stack loop` run are
|
|
8
|
+
byte-identical in behavior and artifacts.
|
|
9
|
+
|
|
10
|
+
Config resolution mirrors the CLI `loop` command exactly: an explicit repo
|
|
11
|
+
`sembl.stack.yaml` always wins; otherwise the onboarded profile is the default.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import copy
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Callable
|
|
19
|
+
|
|
20
|
+
import yaml
|
|
21
|
+
|
|
22
|
+
from .artifacts import Task
|
|
23
|
+
from .config import StackConfig, load as load_config
|
|
24
|
+
from .loop import LoopResult, run as run_loop
|
|
25
|
+
|
|
26
|
+
# loop stage -> Phase-0 stage-rail name (session.STAGES)
|
|
27
|
+
RAIL = {"plan": "bounds", "execute": "loop", "verify": "verify"}
|
|
28
|
+
|
|
29
|
+
Emit = Callable[["StageEvent"], None]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class StageEvent:
|
|
34
|
+
stage: str # rail stage name ("bounds" | "loop" | "verify")
|
|
35
|
+
state: str # "running" | "done" | "fail"
|
|
36
|
+
detail: str = "" # e.g. "attempt 2" or the verdict status
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class _SpecProxy:
|
|
40
|
+
def __init__(self, inner, emit: Emit):
|
|
41
|
+
self._inner, self._emit = inner, emit
|
|
42
|
+
|
|
43
|
+
def plan(self, task):
|
|
44
|
+
self._emit(StageEvent("bounds", "running"))
|
|
45
|
+
try:
|
|
46
|
+
bounds = self._inner.plan(task)
|
|
47
|
+
except Exception:
|
|
48
|
+
self._emit(StageEvent("bounds", "fail"))
|
|
49
|
+
raise
|
|
50
|
+
self._emit(StageEvent("bounds", "done"))
|
|
51
|
+
return bounds
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class _ExecuteProxy:
|
|
55
|
+
def __init__(self, inner, emit: Emit):
|
|
56
|
+
self._inner, self._emit = inner, emit
|
|
57
|
+
self._attempt = 0
|
|
58
|
+
|
|
59
|
+
def run(self, task, bounds, sandbox, feedback):
|
|
60
|
+
self._attempt += 1
|
|
61
|
+
self._emit(StageEvent("loop", "running", f"attempt {self._attempt}"))
|
|
62
|
+
try:
|
|
63
|
+
result = self._inner.run(task, bounds, sandbox, feedback)
|
|
64
|
+
except Exception:
|
|
65
|
+
# loop.execute converts the crash into a BLOCKed Change; mark the rail
|
|
66
|
+
# anyway so the user sees which attempt died.
|
|
67
|
+
self._emit(StageEvent("loop", "fail", f"attempt {self._attempt} crashed"))
|
|
68
|
+
raise
|
|
69
|
+
self._emit(StageEvent("loop", "done", f"attempt {self._attempt}"))
|
|
70
|
+
return result
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class _VerifyProxy:
|
|
74
|
+
def __init__(self, inner, emit: Emit):
|
|
75
|
+
self._inner, self._emit = inner, emit
|
|
76
|
+
|
|
77
|
+
def verify(self, bounds, change, strict):
|
|
78
|
+
self._emit(StageEvent("verify", "running"))
|
|
79
|
+
try:
|
|
80
|
+
verdict = self._inner.verify(bounds, change, strict)
|
|
81
|
+
except Exception:
|
|
82
|
+
self._emit(StageEvent("verify", "fail"))
|
|
83
|
+
raise
|
|
84
|
+
status = getattr(verdict, "status", "?")
|
|
85
|
+
self._emit(StageEvent(
|
|
86
|
+
"verify", "done" if status in ("PASS", "WARN") else "fail", status))
|
|
87
|
+
return verdict
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def load_task(repo: str, name: str = "task.yaml") -> Task | None:
|
|
91
|
+
"""The repo's task.yaml as a Task (same resolution as `cli._load_task`), or None."""
|
|
92
|
+
p = Path(repo).resolve() / name
|
|
93
|
+
if not p.is_file():
|
|
94
|
+
return None
|
|
95
|
+
data = yaml.safe_load(p.read_text(encoding="utf-8")) or {}
|
|
96
|
+
if not isinstance(data, dict):
|
|
97
|
+
return None
|
|
98
|
+
base = p.parent
|
|
99
|
+
|
|
100
|
+
def _resolve(v):
|
|
101
|
+
if not v:
|
|
102
|
+
return v
|
|
103
|
+
vp = Path(v)
|
|
104
|
+
return str(vp if vp.is_absolute() else (base / vp).resolve())
|
|
105
|
+
|
|
106
|
+
return Task(text=data.get("text", ""), repo=_resolve(data.get("repo", ".")),
|
|
107
|
+
spec_path=_resolve(data.get("spec_path")))
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def resolve_config(repo: str, config_name: str = "sembl.stack.yaml") -> StackConfig:
|
|
111
|
+
"""Repo sembl.stack.yaml wins; else the onboarded profile; else defaults —
|
|
112
|
+
exactly the CLI `loop` precedence, so TUI and headless runs stay identical."""
|
|
113
|
+
cfg_file = Path(repo).resolve() / config_name
|
|
114
|
+
if cfg_file.is_file():
|
|
115
|
+
return load_config(str(cfg_file))
|
|
116
|
+
from . import profile as profile_mod
|
|
117
|
+
prof = profile_mod.load()
|
|
118
|
+
overrides = profile_mod.to_stack_overrides(prof) if prof is not None else None
|
|
119
|
+
return load_config(None, overrides)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def run_stages(cfg, task: Task, emit: Emit) -> LoopResult:
|
|
123
|
+
"""Run the real loop with stage events streamed to `emit`. Blocking — call it from a
|
|
124
|
+
worker thread; `emit` fires on that thread (marshal to the UI thread yourself,
|
|
125
|
+
e.g. Textual's `call_from_thread`)."""
|
|
126
|
+
wrapped = copy.copy(cfg) # shallow: same adapters, three wrapped in proxies
|
|
127
|
+
wrapped.spec = _SpecProxy(cfg.spec, emit)
|
|
128
|
+
wrapped.execute = _ExecuteProxy(cfg.execute, emit)
|
|
129
|
+
wrapped.verify = _VerifyProxy(cfg.verify, emit)
|
|
130
|
+
result = run_loop(wrapped, task)
|
|
131
|
+
v = result.verdict
|
|
132
|
+
emit(StageEvent("verify", "done" if v.status in ("PASS", "WARN") else "fail",
|
|
133
|
+
v.status))
|
|
134
|
+
return result
|
sembl_stack/session.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Phase-0 guided-session pointer over the run store.
|
|
2
|
+
|
|
3
|
+
A tiny `.sembl/session.json` `{repo, mode, run_id, current_stage, completed}` is what makes the
|
|
4
|
+
guided `sembl-stack` TUI leave/continue-anywhere: it points at a run in the existing store and
|
|
5
|
+
records which stage the user reached. Pure and headless — unit-testable without Textual; the
|
|
6
|
+
wizard only renders and advances it.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
from dataclasses import asdict, dataclass, field
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
# The Phase-0 stage rail (CI-run-page order). Only stages that are already headless.
|
|
15
|
+
STAGES = ["bounds", "loop", "verify", "merge", "deploy", "postdeploy"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Session:
|
|
20
|
+
repo: str = "."
|
|
21
|
+
mode: str = "existing" # "new" | "existing"
|
|
22
|
+
run_id: str | None = None
|
|
23
|
+
current_stage: str = STAGES[0]
|
|
24
|
+
completed: list[str] = field(default_factory=list)
|
|
25
|
+
|
|
26
|
+
def advance(self) -> str:
|
|
27
|
+
"""Mark the current stage complete and move to the next; return the new current stage."""
|
|
28
|
+
if self.current_stage not in self.completed:
|
|
29
|
+
self.completed.append(self.current_stage)
|
|
30
|
+
i = STAGES.index(self.current_stage)
|
|
31
|
+
if i + 1 < len(STAGES):
|
|
32
|
+
self.current_stage = STAGES[i + 1]
|
|
33
|
+
return self.current_stage
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def done(self) -> bool:
|
|
37
|
+
return all(s in self.completed for s in STAGES)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _path(repo: str) -> Path:
|
|
41
|
+
return Path(repo).resolve() / ".sembl" / "session.json"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def save(session: Session) -> Path:
|
|
45
|
+
p = _path(session.repo)
|
|
46
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
p.write_text(json.dumps(asdict(session), indent=2), encoding="utf-8")
|
|
48
|
+
return p
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def load(repo: str) -> "Session | None":
|
|
52
|
+
"""Read the saved session, or None if it's missing OR unusable.
|
|
53
|
+
|
|
54
|
+
A truncated/corrupt `session.json` or one pointing at an unknown stage must NOT brick the
|
|
55
|
+
guided entrypoint — an unusable pointer is treated exactly like no pointer (start fresh).
|
|
56
|
+
"""
|
|
57
|
+
p = _path(repo)
|
|
58
|
+
if not p.is_file():
|
|
59
|
+
return None
|
|
60
|
+
try:
|
|
61
|
+
data = json.loads(p.read_text(encoding="utf-8"))
|
|
62
|
+
if not isinstance(data, dict):
|
|
63
|
+
return None
|
|
64
|
+
s = Session(**{k: v for k, v in data.items() if k in Session.__dataclass_fields__})
|
|
65
|
+
except (OSError, ValueError, TypeError):
|
|
66
|
+
return None
|
|
67
|
+
# Validate the stage pointer so advance()'s STAGES.index never raises on bad state.
|
|
68
|
+
if s.current_stage not in STAGES:
|
|
69
|
+
return None
|
|
70
|
+
if not isinstance(s.completed, list):
|
|
71
|
+
return None
|
|
72
|
+
s.completed = [stage for stage in s.completed if stage in STAGES]
|
|
73
|
+
return s
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def resume_or_new(repo: str) -> Session:
|
|
77
|
+
"""Resume the saved session if it exists and is incomplete; else a fresh session.
|
|
78
|
+
|
|
79
|
+
This is the "continue anywhere" entry point: an incomplete saved session is the latest
|
|
80
|
+
point the user reached, so the wizard reopens there. A missing or finished session starts
|
|
81
|
+
fresh at the first stage.
|
|
82
|
+
"""
|
|
83
|
+
existing = load(repo)
|
|
84
|
+
if existing is not None and not existing.done:
|
|
85
|
+
return existing
|
|
86
|
+
return Session(repo=str(Path(repo).resolve()))
|
sembl_stack/specgraph.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Build a deterministic SpecGraph artifact from task/spec inputs.
|
|
2
|
+
|
|
3
|
+
The graph is intentionally structural and local: no model call, no repo scan, no
|
|
4
|
+
network. It gives the reconciliation stage a stable spec-side artifact before a
|
|
5
|
+
heavier spec parser exists.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from .artifacts import Bounds, SpecGraph, Task
|
|
13
|
+
|
|
14
|
+
SCHEMA_VERSION = 1
|
|
15
|
+
|
|
16
|
+
_ROUTE_RE = re.compile(
|
|
17
|
+
r"(?:route|endpoint|path)\s*[:=-]\s*(?:(GET|POST|PUT|PATCH|DELETE)\s+)?"
|
|
18
|
+
r"(/[A-Za-z0-9_./{}:-]+)",
|
|
19
|
+
re.IGNORECASE,
|
|
20
|
+
)
|
|
21
|
+
_ENTITY_RE = re.compile(
|
|
22
|
+
r"(?:entity|model|table)\s*[:=-]\s*([A-Za-z][A-Za-z0-9_ -]{1,80})",
|
|
23
|
+
re.IGNORECASE,
|
|
24
|
+
)
|
|
25
|
+
_RULE_RE = re.compile(r"\b(must|only|never|required|requires|should)\b", re.IGNORECASE)
|
|
26
|
+
|
|
27
|
+
_SPEC_FILENAMES = (
|
|
28
|
+
"tasks.md",
|
|
29
|
+
"spec.md",
|
|
30
|
+
"plan.md",
|
|
31
|
+
"requirements.md",
|
|
32
|
+
"README.md",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def build_spec_graph(task: Task, bounds: Bounds | None = None) -> SpecGraph:
|
|
37
|
+
"""Build a JSON-serializable graph of declared spec intent."""
|
|
38
|
+
nodes: list[dict] = []
|
|
39
|
+
edges: list[dict] = []
|
|
40
|
+
seen: set[str] = set()
|
|
41
|
+
|
|
42
|
+
def add_node(node_id: str, node_type: str, name: str, **attrs) -> None:
|
|
43
|
+
if node_id in seen:
|
|
44
|
+
return
|
|
45
|
+
seen.add(node_id)
|
|
46
|
+
node = {"id": node_id, "type": node_type, "name": name}
|
|
47
|
+
node.update({k: v for k, v in attrs.items() if v not in (None, "", [], {})})
|
|
48
|
+
nodes.append(node)
|
|
49
|
+
|
|
50
|
+
def add_edge(src: str, dst: str, rel: str) -> None:
|
|
51
|
+
edges.append({"from": src, "to": dst, "type": rel})
|
|
52
|
+
|
|
53
|
+
add_node("task", "task", "task", text=task.text, repo=task.repo)
|
|
54
|
+
|
|
55
|
+
sources = _read_sources(getattr(task, "spec_path", None))
|
|
56
|
+
if task.text:
|
|
57
|
+
sources.insert(0, ("task.text", task.text))
|
|
58
|
+
|
|
59
|
+
for idx, (source, text) in enumerate(sources):
|
|
60
|
+
source_id = f"source:{idx}"
|
|
61
|
+
add_node(source_id, "source", source, path=None if source == "task.text" else source)
|
|
62
|
+
add_edge("task", source_id, "declares")
|
|
63
|
+
_extract_concepts(text, source_id, add_node, add_edge)
|
|
64
|
+
|
|
65
|
+
if bounds is not None:
|
|
66
|
+
for path in bounds.editable_paths:
|
|
67
|
+
node_id = f"scope:editable:{path}"
|
|
68
|
+
add_node(node_id, "editable_path", path)
|
|
69
|
+
add_edge("task", node_id, "allows")
|
|
70
|
+
for path in bounds.forbidden_areas:
|
|
71
|
+
node_id = f"scope:forbidden:{path}"
|
|
72
|
+
add_node(node_id, "forbidden_area", path)
|
|
73
|
+
add_edge("task", node_id, "forbids")
|
|
74
|
+
|
|
75
|
+
return SpecGraph(
|
|
76
|
+
nodes=nodes,
|
|
77
|
+
edges=edges,
|
|
78
|
+
sources=[source for source, _ in sources],
|
|
79
|
+
data={"schema_version": SCHEMA_VERSION},
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _read_sources(spec_path: str | None) -> list[tuple[str, str]]:
|
|
84
|
+
if not spec_path:
|
|
85
|
+
return []
|
|
86
|
+
root = Path(spec_path)
|
|
87
|
+
if root.is_file():
|
|
88
|
+
return [(str(root), _read_text(root))]
|
|
89
|
+
if not root.is_dir():
|
|
90
|
+
return []
|
|
91
|
+
|
|
92
|
+
paths: list[Path] = []
|
|
93
|
+
for name in _SPEC_FILENAMES:
|
|
94
|
+
p = root / name
|
|
95
|
+
if p.is_file():
|
|
96
|
+
paths.append(p)
|
|
97
|
+
for p in sorted(root.glob("*.md")):
|
|
98
|
+
if p not in paths:
|
|
99
|
+
paths.append(p)
|
|
100
|
+
return [(str(p), _read_text(p)) for p in paths]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _read_text(path: Path) -> str:
|
|
104
|
+
try:
|
|
105
|
+
return path.read_text(encoding="utf-8")
|
|
106
|
+
except UnicodeDecodeError:
|
|
107
|
+
return path.read_text(encoding="utf-8", errors="replace")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _extract_concepts(text: str, source_id: str, add_node, add_edge) -> None:
|
|
111
|
+
for match in _ROUTE_RE.finditer(text):
|
|
112
|
+
method, route = match.groups()
|
|
113
|
+
route = _clean_route(route)
|
|
114
|
+
method = (method or "ANY").upper()
|
|
115
|
+
node_id = f"route:{method}:{route}"
|
|
116
|
+
add_node(node_id, "route", route, method=method)
|
|
117
|
+
add_edge(source_id, node_id, "mentions")
|
|
118
|
+
|
|
119
|
+
for match in _ENTITY_RE.finditer(text):
|
|
120
|
+
entity = _clean_label(match.group(1))
|
|
121
|
+
if not entity:
|
|
122
|
+
continue
|
|
123
|
+
node_id = f"entity:{_slug(entity)}"
|
|
124
|
+
add_node(node_id, "entity", entity)
|
|
125
|
+
add_edge(source_id, node_id, "mentions")
|
|
126
|
+
|
|
127
|
+
for line_no, line in enumerate(text.splitlines(), start=1):
|
|
128
|
+
stripped = line.strip(" -*\t")
|
|
129
|
+
if not stripped or not _RULE_RE.search(stripped):
|
|
130
|
+
continue
|
|
131
|
+
node_id = f"rule:{source_id}:{line_no}"
|
|
132
|
+
add_node(node_id, "data_rule", stripped, line=line_no)
|
|
133
|
+
add_edge(source_id, node_id, "declares")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _clean_label(value: str) -> str:
|
|
137
|
+
return re.sub(r"\s+", " ", value).strip(" .:-")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _clean_route(value: str) -> str:
|
|
141
|
+
return value.rstrip(".,;:)")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _slug(value: str) -> str:
|
|
145
|
+
slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
|
|
146
|
+
return slug or "unnamed"
|
sembl_stack/store.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Run store — artifacts persisted at `.sembl/runs/<run-id>/` in the target repo.
|
|
2
|
+
|
|
3
|
+
Local-first, no server required to read a past run. Each run is a directory of JSON
|
|
4
|
+
artifacts plus a `run.json` manifest. This is what makes runs inspectable (TUI/web),
|
|
5
|
+
resumable, and enterable at an arbitrary stage (supply the upstream artifact).
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import re
|
|
11
|
+
import time
|
|
12
|
+
import uuid
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from . import artifacts
|
|
16
|
+
from .artifacts import _Serializable
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Run:
|
|
20
|
+
def __init__(self, root: Path, run_id: str):
|
|
21
|
+
self.id = run_id
|
|
22
|
+
self.dir = root / run_id
|
|
23
|
+
self.dir.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
self._manifest_path = self.dir / "run.json"
|
|
25
|
+
|
|
26
|
+
# -- artifacts --------------------------------------------------------------
|
|
27
|
+
def put(self, artifact: _Serializable, name: str | None = None) -> str:
|
|
28
|
+
"""Persist an artifact; `name` defaults to its KIND. Returns the file name."""
|
|
29
|
+
name = name or artifact.KIND
|
|
30
|
+
fname = f"{name}.json"
|
|
31
|
+
(self.dir / fname).write_text(artifact.to_json(), encoding="utf-8")
|
|
32
|
+
self._touch_manifest(name, artifact.KIND, fname)
|
|
33
|
+
return fname
|
|
34
|
+
|
|
35
|
+
def get(self, name: str):
|
|
36
|
+
"""Load an artifact by name (reconstructs the right type via its `_kind` tag)."""
|
|
37
|
+
path = self.dir / f"{name}.json"
|
|
38
|
+
if not path.is_file():
|
|
39
|
+
return None
|
|
40
|
+
return artifacts.from_dict(json.loads(path.read_text(encoding="utf-8")))
|
|
41
|
+
|
|
42
|
+
def has(self, name: str) -> bool:
|
|
43
|
+
return (self.dir / f"{name}.json").is_file()
|
|
44
|
+
|
|
45
|
+
# -- manifest ---------------------------------------------------------------
|
|
46
|
+
def manifest(self) -> dict:
|
|
47
|
+
if self._manifest_path.is_file():
|
|
48
|
+
return json.loads(self._manifest_path.read_text(encoding="utf-8"))
|
|
49
|
+
return {}
|
|
50
|
+
|
|
51
|
+
def set_status(self, status: str, **extra) -> None:
|
|
52
|
+
m = self.manifest()
|
|
53
|
+
m["status"] = status
|
|
54
|
+
m["updated"] = time.time()
|
|
55
|
+
m.update(extra)
|
|
56
|
+
self._write_manifest(m)
|
|
57
|
+
|
|
58
|
+
def record_attempt(self, attempt: int, **metric) -> None:
|
|
59
|
+
"""Append a per-attempt cost/latency record to the manifest (C1.3).
|
|
60
|
+
|
|
61
|
+
One entry per execute call: `{attempt, latency_s, agent, model, exit_code,
|
|
62
|
+
tokens, cost}` (tokens/cost only where the executor reported usage). This is the
|
|
63
|
+
signal `sembl-stack runs` shows and that the process-RSI / eval (B) layer consumes.
|
|
64
|
+
Keys with a None value are dropped so the manifest stays clean.
|
|
65
|
+
"""
|
|
66
|
+
m = self.manifest()
|
|
67
|
+
entry = {"attempt": attempt}
|
|
68
|
+
entry.update({k: v for k, v in metric.items() if v is not None})
|
|
69
|
+
m.setdefault("attempts_log", []).append(entry)
|
|
70
|
+
self._write_manifest(m)
|
|
71
|
+
|
|
72
|
+
def _touch_manifest(self, name: str, kind: str, fname: str) -> None:
|
|
73
|
+
m = self.manifest()
|
|
74
|
+
m.setdefault("artifacts", {})[name] = {
|
|
75
|
+
"kind": kind, "file": fname, "ts": time.time()}
|
|
76
|
+
self._write_manifest(m)
|
|
77
|
+
|
|
78
|
+
def _write_manifest(self, m: dict) -> None:
|
|
79
|
+
self._manifest_path.write_text(
|
|
80
|
+
json.dumps(m, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
_RUN_ID = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class RunStore:
|
|
87
|
+
def __init__(self, repo: str):
|
|
88
|
+
self.root = Path(repo).resolve() / ".sembl" / "runs"
|
|
89
|
+
|
|
90
|
+
def new_run(self, task=None) -> Run:
|
|
91
|
+
run_id = time.strftime("%Y%m%d-%H%M%S-") + uuid.uuid4().hex[:6]
|
|
92
|
+
run = Run(self.root, run_id)
|
|
93
|
+
m = {"id": run_id, "created": time.time(), "status": "started",
|
|
94
|
+
"artifacts": {}}
|
|
95
|
+
if task is not None:
|
|
96
|
+
m["task"] = {"text": getattr(task, "text", ""),
|
|
97
|
+
"repo": getattr(task, "repo", "")}
|
|
98
|
+
run._write_manifest(m)
|
|
99
|
+
return run
|
|
100
|
+
|
|
101
|
+
def open(self, run_id: str) -> Run:
|
|
102
|
+
# A run id is a single directory name under .sembl/runs — never a path. Rejecting
|
|
103
|
+
# separators/leading dots here keeps `runs <id>` / `apply <id>` from resolving
|
|
104
|
+
# (and mkdir-ing) outside the store via a crafted id like `..\\..\\evil`.
|
|
105
|
+
if not _RUN_ID.match(run_id):
|
|
106
|
+
raise ValueError(f"invalid run id: {run_id!r}")
|
|
107
|
+
return Run(self.root, run_id)
|
|
108
|
+
|
|
109
|
+
def list_runs(self) -> list[str]:
|
|
110
|
+
if not self.root.is_dir():
|
|
111
|
+
return []
|
|
112
|
+
return sorted((p.name for p in self.root.iterdir() if p.is_dir()), reverse=True)
|