sembl-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. sembl_stack/__init__.py +3 -0
  2. sembl_stack/adapters/__init__.py +0 -0
  3. sembl_stack/adapters/_redact.py +19 -0
  4. sembl_stack/adapters/base.py +179 -0
  5. sembl_stack/adapters/codegraph_cbm.py +95 -0
  6. sembl_stack/adapters/deploy_vercel.py +215 -0
  7. sembl_stack/adapters/execute_aider.py +115 -0
  8. sembl_stack/adapters/execute_claude.py +114 -0
  9. sembl_stack/adapters/execute_mock.py +53 -0
  10. sembl_stack/adapters/execute_opencode.py +114 -0
  11. sembl_stack/adapters/merge_git.py +107 -0
  12. sembl_stack/adapters/postdeploy_http.py +82 -0
  13. sembl_stack/adapters/review_coderabbit.py +215 -0
  14. sembl_stack/adapters/review_llm.py +142 -0
  15. sembl_stack/adapters/review_mock.py +42 -0
  16. sembl_stack/adapters/sandbox_worktree.py +79 -0
  17. sembl_stack/adapters/spec_sembl.py +91 -0
  18. sembl_stack/adapters/verify_sembl.py +77 -0
  19. sembl_stack/artifacts.py +207 -0
  20. sembl_stack/cli.py +759 -0
  21. sembl_stack/config.py +87 -0
  22. sembl_stack/contextgraph.py +154 -0
  23. sembl_stack/doctor.py +111 -0
  24. sembl_stack/loop.py +380 -0
  25. sembl_stack/onboarding.py +272 -0
  26. sembl_stack/presets.py +114 -0
  27. sembl_stack/profile.py +193 -0
  28. sembl_stack/reconciliation.py +138 -0
  29. sembl_stack/registry.py +91 -0
  30. sembl_stack/rsi.py +188 -0
  31. sembl_stack/runner.py +134 -0
  32. sembl_stack/session.py +86 -0
  33. sembl_stack/specgraph.py +146 -0
  34. sembl_stack/store.py +112 -0
  35. sembl_stack/tracing.py +51 -0
  36. sembl_stack/transport/__init__.py +0 -0
  37. sembl_stack/transport/mcp_client.py +58 -0
  38. sembl_stack/tui.py +86 -0
  39. sembl_stack/views.py +74 -0
  40. sembl_stack/wizard.py +233 -0
  41. sembl_stack-0.1.0.dist-info/METADATA +165 -0
  42. sembl_stack-0.1.0.dist-info/RECORD +45 -0
  43. sembl_stack-0.1.0.dist-info/WHEEL +4 -0
  44. sembl_stack-0.1.0.dist-info/entry_points.txt +2 -0
  45. sembl_stack-0.1.0.dist-info/licenses/LICENSE +201 -0
sembl_stack/rsi.py ADDED
@@ -0,0 +1,188 @@
1
+ """RSI-L1 readout — measured selection over the run store (north-star first rung).
2
+
3
+ The north star (docs/process-self-improvement.md) climbs L0 (manual swap) -> L1 (measured
4
+ selection): pick executors on RECORDED signal, not vibes. This module is that signal,
5
+ aggregated: per executor, over every run recorded in `.sembl/runs/`, how often the loop
6
+ went green, in how many iterations, and at what cost.
7
+
8
+ Honesty rules [LOCKED]:
9
+ * everything here is read back from run-store artifacts the loop already persisted —
10
+ nothing is estimated, sampled, or modeled;
11
+ * cost/tokens appear ONLY when the executor adapter reported usage (the C1.3
12
+ `attempts_log` hook in `loop.py`); runs recorded before an adapter reported usage
13
+ show "not yet recorded" — never an invented number;
14
+ * "green" = the loop accepted the run (final PASS or WARN; WARN is counted separately
15
+ so a WARN-heavy executor can't hide). iterations-to-green = the first attempt whose
16
+ verdict the loop accepted.
17
+
18
+ Pure and headless: no Textual, no click — `cli.py rsi` renders it.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ from .store import RunStore
23
+
24
+ GREEN = ("PASS", "WARN") # statuses the loop accepts (WARN = accepted-with-caveat)
25
+ UNKNOWN_EXECUTOR = "(unrecorded)" # runs whose attempts_log never named an agent
26
+
27
+
28
+ # --- per-run extraction ---------------------------------------------------------
29
+
30
+ def run_record(run) -> dict | None:
31
+ """One run's manifest+artifacts distilled into the RSI row. None if no manifest."""
32
+ m = run.manifest()
33
+ if not m:
34
+ return None
35
+ log = m.get("attempts_log", []) or []
36
+ agent = next((e.get("agent") for e in log if e.get("agent")), None) or UNKNOWN_EXECUTOR
37
+ models = sorted({e["model"] for e in log if e.get("model")})
38
+ status = m.get("status", "?")
39
+ attempts = m.get("attempts")
40
+ if not isinstance(attempts, int) or attempts < 1:
41
+ attempts = len(log) or None
42
+
43
+ tokens = [e["tokens"] for e in log if isinstance(e.get("tokens"), (int, float))]
44
+ costs = [e["cost"] for e in log if isinstance(e.get("cost"), (int, float))]
45
+ latency = m.get("total_latency_s")
46
+ if not isinstance(latency, (int, float)):
47
+ lat = [e["latency_s"] for e in log if isinstance(e.get("latency_s"), (int, float))]
48
+ latency = round(sum(lat), 3) if lat else None
49
+
50
+ return {
51
+ "id": m.get("id", run.id),
52
+ "agent": agent,
53
+ "models": models,
54
+ "status": status,
55
+ "attempts": attempts,
56
+ "iters_to_green": _iters_to_green(run, status, attempts),
57
+ "latency_s": latency,
58
+ "tokens": sum(tokens) if tokens else None,
59
+ "cost": round(sum(costs), 6) if costs else None,
60
+ }
61
+
62
+
63
+ def _iters_to_green(run, status: str, attempts: int | None) -> int | None:
64
+ """First attempt whose verdict the loop accepted (PASS/WARN), from the per-attempt
65
+ verdict artifacts; falls back to the manifest attempt count when the per-attempt
66
+ verdicts are missing but the run ended green (the loop stops at the first accept,
67
+ so the two agree by construction). None when the run never went green."""
68
+ if attempts:
69
+ for i in range(1, attempts + 1):
70
+ v = run.get(f"verdict-{i}")
71
+ if v is not None and getattr(v, "status", None) in GREEN:
72
+ return i
73
+ if status in GREEN and attempts:
74
+ return attempts
75
+ return None
76
+
77
+
78
+ # --- aggregation -----------------------------------------------------------------
79
+
80
+ def aggregate(store: RunStore) -> dict:
81
+ """Group every recorded run by executor and compute the RSI-L1 measures."""
82
+ rows = []
83
+ for rid in store.list_runs():
84
+ rec = run_record(store.open(rid))
85
+ if rec is not None:
86
+ rows.append(rec)
87
+
88
+ groups: dict[str, list[dict]] = {}
89
+ for r in rows:
90
+ groups.setdefault(r["agent"], []).append(r)
91
+
92
+ executors = []
93
+ for agent in sorted(groups):
94
+ rs = groups[agent]
95
+ n = len(rs)
96
+ n_pass = sum(1 for r in rs if r["status"] == "PASS")
97
+ n_warn = sum(1 for r in rs if r["status"] == "WARN")
98
+ n_block = sum(1 for r in rs if r["status"] == "BLOCK")
99
+ n_other = n - n_pass - n_warn - n_block # failed / started / unknown
100
+ iters = [r["iters_to_green"] for r in rs if r["iters_to_green"] is not None]
101
+ lats = [r["latency_s"] for r in rs if r["latency_s"] is not None]
102
+ costed = [r for r in rs if r["cost"] is not None]
103
+ tokened = [r for r in rs if r["tokens"] is not None]
104
+ models = sorted({m for r in rs for m in r["models"]})
105
+ executors.append({
106
+ "executor": agent,
107
+ "models": models,
108
+ "runs": n,
109
+ "pass": n_pass, "warn": n_warn, "block": n_block, "other": n_other,
110
+ "green_rate": _rate(n_pass + n_warn, n),
111
+ "block_rate": _rate(n_block, n),
112
+ "iters_to_green": {
113
+ "n": len(iters),
114
+ "mean": round(sum(iters) / len(iters), 2) if iters else None,
115
+ "min": min(iters) if iters else None,
116
+ "max": max(iters) if iters else None,
117
+ },
118
+ "latency_s": {
119
+ "n": len(lats),
120
+ "total": round(sum(lats), 3) if lats else None,
121
+ "mean": round(sum(lats) / len(lats), 3) if lats else None,
122
+ },
123
+ # Honest cost: sums cover ONLY the runs whose adapter reported usage.
124
+ "cost_usd": {
125
+ "runs_recorded": len(costed),
126
+ "total": round(sum(r["cost"] for r in costed), 6) if costed else None,
127
+ },
128
+ "tokens": {
129
+ "runs_recorded": len(tokened),
130
+ "total": sum(r["tokens"] for r in tokened) if tokened else None,
131
+ },
132
+ })
133
+
134
+ return {
135
+ "store": str(store.root),
136
+ "n_runs": len(rows),
137
+ "executors": executors,
138
+ "runs": rows,
139
+ }
140
+
141
+
142
+ def _rate(n: int, d: int) -> float | None:
143
+ return round(n / d, 3) if d else None
144
+
145
+
146
+ # --- rendering ---------------------------------------------------------------------
147
+
148
+ def render(summary: dict) -> str:
149
+ """The self-explanatory table. Every number traces to a run-store artifact."""
150
+ lines = [
151
+ "RSI-L1 — measured selection (per-executor signal from the run store)",
152
+ f"store: {summary['store']} runs: {summary['n_runs']}",
153
+ "",
154
+ " green = the loop accepted the run (final PASS or WARN)",
155
+ " iters = attempts until the first accepted verdict (mean over green runs)",
156
+ " cost = USD summed over runs whose executor reported usage;",
157
+ " 'not yet recorded' means the adapter surfaced no usage — no number",
158
+ " is ever invented for those runs.",
159
+ "",
160
+ ]
161
+ if not summary["executors"]:
162
+ lines.append("no runs recorded yet — `sembl-stack loop task.yaml` starts the feed.")
163
+ return "\n".join(lines)
164
+
165
+ hdr = (f" {'executor':16} {'runs':>4} {'green':>6} {'block':>6} "
166
+ f"{'iters':>6} {'latency':>9} {'cost (USD)':>18}")
167
+ lines += [hdr, " " + "-" * (len(hdr) - 2)]
168
+ for e in summary["executors"]:
169
+ iters = e["iters_to_green"]["mean"]
170
+ lat = e["latency_s"]["mean"]
171
+ cost = e["cost_usd"]
172
+ if cost["total"] is not None:
173
+ cost_s = f"{cost['total']:.4f} ({cost['runs_recorded']}/{e['runs']} runs)"
174
+ else:
175
+ cost_s = "not yet recorded"
176
+ lines.append(
177
+ f" {e['executor']:16} {e['runs']:>4} "
178
+ f"{_pct(e['green_rate']):>6} {_pct(e['block_rate']):>6} "
179
+ f"{iters if iters is not None else '-':>6} "
180
+ f"{(f'{lat:.1f}s' if lat is not None else '-'):>9} "
181
+ f"{cost_s:>18}")
182
+ if e["models"]:
183
+ lines.append(f" {'':16} models: {', '.join(e['models'])}")
184
+ return "\n".join(lines)
185
+
186
+
187
+ def _pct(x: float | None) -> str:
188
+ return f"{x * 100:.0f}%" if x is not None else "-"
sembl_stack/runner.py ADDED
@@ -0,0 +1,134 @@
1
+ """TUI Phase 2 orchestration glue — run the REAL loop and stream stage transitions.
2
+
3
+ Pure, headless, no Textual: the wizard (and any future surface) drives `run_stages` in a
4
+ worker thread and receives `StageEvent`s as the loop's stage functions actually execute —
5
+ plan (L2 -> rail "bounds"), execute (L3+L4 -> rail "loop"), verify (L5 -> rail "verify").
6
+ No new core/gate logic: the events come from thin proxies wrapped around the SAME adapter
7
+ objects `loop.run` already calls, so a TUI run and a headless `sembl-stack loop` run are
8
+ byte-identical in behavior and artifacts.
9
+
10
+ Config resolution mirrors the CLI `loop` command exactly: an explicit repo
11
+ `sembl.stack.yaml` always wins; otherwise the onboarded profile is the default.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import copy
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+ from typing import Callable
19
+
20
+ import yaml
21
+
22
+ from .artifacts import Task
23
+ from .config import StackConfig, load as load_config
24
+ from .loop import LoopResult, run as run_loop
25
+
26
+ # loop stage -> Phase-0 stage-rail name (session.STAGES)
27
+ RAIL = {"plan": "bounds", "execute": "loop", "verify": "verify"}
28
+
29
+ Emit = Callable[["StageEvent"], None]
30
+
31
+
32
+ @dataclass
33
+ class StageEvent:
34
+ stage: str # rail stage name ("bounds" | "loop" | "verify")
35
+ state: str # "running" | "done" | "fail"
36
+ detail: str = "" # e.g. "attempt 2" or the verdict status
37
+
38
+
39
+ class _SpecProxy:
40
+ def __init__(self, inner, emit: Emit):
41
+ self._inner, self._emit = inner, emit
42
+
43
+ def plan(self, task):
44
+ self._emit(StageEvent("bounds", "running"))
45
+ try:
46
+ bounds = self._inner.plan(task)
47
+ except Exception:
48
+ self._emit(StageEvent("bounds", "fail"))
49
+ raise
50
+ self._emit(StageEvent("bounds", "done"))
51
+ return bounds
52
+
53
+
54
+ class _ExecuteProxy:
55
+ def __init__(self, inner, emit: Emit):
56
+ self._inner, self._emit = inner, emit
57
+ self._attempt = 0
58
+
59
+ def run(self, task, bounds, sandbox, feedback):
60
+ self._attempt += 1
61
+ self._emit(StageEvent("loop", "running", f"attempt {self._attempt}"))
62
+ try:
63
+ result = self._inner.run(task, bounds, sandbox, feedback)
64
+ except Exception:
65
+ # loop.execute converts the crash into a BLOCKed Change; mark the rail
66
+ # anyway so the user sees which attempt died.
67
+ self._emit(StageEvent("loop", "fail", f"attempt {self._attempt} crashed"))
68
+ raise
69
+ self._emit(StageEvent("loop", "done", f"attempt {self._attempt}"))
70
+ return result
71
+
72
+
73
+ class _VerifyProxy:
74
+ def __init__(self, inner, emit: Emit):
75
+ self._inner, self._emit = inner, emit
76
+
77
+ def verify(self, bounds, change, strict):
78
+ self._emit(StageEvent("verify", "running"))
79
+ try:
80
+ verdict = self._inner.verify(bounds, change, strict)
81
+ except Exception:
82
+ self._emit(StageEvent("verify", "fail"))
83
+ raise
84
+ status = getattr(verdict, "status", "?")
85
+ self._emit(StageEvent(
86
+ "verify", "done" if status in ("PASS", "WARN") else "fail", status))
87
+ return verdict
88
+
89
+
90
+ def load_task(repo: str, name: str = "task.yaml") -> Task | None:
91
+ """The repo's task.yaml as a Task (same resolution as `cli._load_task`), or None."""
92
+ p = Path(repo).resolve() / name
93
+ if not p.is_file():
94
+ return None
95
+ data = yaml.safe_load(p.read_text(encoding="utf-8")) or {}
96
+ if not isinstance(data, dict):
97
+ return None
98
+ base = p.parent
99
+
100
+ def _resolve(v):
101
+ if not v:
102
+ return v
103
+ vp = Path(v)
104
+ return str(vp if vp.is_absolute() else (base / vp).resolve())
105
+
106
+ return Task(text=data.get("text", ""), repo=_resolve(data.get("repo", ".")),
107
+ spec_path=_resolve(data.get("spec_path")))
108
+
109
+
110
+ def resolve_config(repo: str, config_name: str = "sembl.stack.yaml") -> StackConfig:
111
+ """Repo sembl.stack.yaml wins; else the onboarded profile; else defaults —
112
+ exactly the CLI `loop` precedence, so TUI and headless runs stay identical."""
113
+ cfg_file = Path(repo).resolve() / config_name
114
+ if cfg_file.is_file():
115
+ return load_config(str(cfg_file))
116
+ from . import profile as profile_mod
117
+ prof = profile_mod.load()
118
+ overrides = profile_mod.to_stack_overrides(prof) if prof is not None else None
119
+ return load_config(None, overrides)
120
+
121
+
122
+ def run_stages(cfg, task: Task, emit: Emit) -> LoopResult:
123
+ """Run the real loop with stage events streamed to `emit`. Blocking — call it from a
124
+ worker thread; `emit` fires on that thread (marshal to the UI thread yourself,
125
+ e.g. Textual's `call_from_thread`)."""
126
+ wrapped = copy.copy(cfg) # shallow: same adapters, three wrapped in proxies
127
+ wrapped.spec = _SpecProxy(cfg.spec, emit)
128
+ wrapped.execute = _ExecuteProxy(cfg.execute, emit)
129
+ wrapped.verify = _VerifyProxy(cfg.verify, emit)
130
+ result = run_loop(wrapped, task)
131
+ v = result.verdict
132
+ emit(StageEvent("verify", "done" if v.status in ("PASS", "WARN") else "fail",
133
+ v.status))
134
+ return result
sembl_stack/session.py ADDED
@@ -0,0 +1,86 @@
1
+ """Phase-0 guided-session pointer over the run store.
2
+
3
+ A tiny `.sembl/session.json` `{repo, mode, run_id, current_stage, completed}` is what makes the
4
+ guided `sembl-stack` TUI leave/continue-anywhere: it points at a run in the existing store and
5
+ records which stage the user reached. Pure and headless — unit-testable without Textual; the
6
+ wizard only renders and advances it.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ from dataclasses import asdict, dataclass, field
12
+ from pathlib import Path
13
+
14
+ # The Phase-0 stage rail (CI-run-page order). Only stages that are already headless.
15
+ STAGES = ["bounds", "loop", "verify", "merge", "deploy", "postdeploy"]
16
+
17
+
18
+ @dataclass
19
+ class Session:
20
+ repo: str = "."
21
+ mode: str = "existing" # "new" | "existing"
22
+ run_id: str | None = None
23
+ current_stage: str = STAGES[0]
24
+ completed: list[str] = field(default_factory=list)
25
+
26
+ def advance(self) -> str:
27
+ """Mark the current stage complete and move to the next; return the new current stage."""
28
+ if self.current_stage not in self.completed:
29
+ self.completed.append(self.current_stage)
30
+ i = STAGES.index(self.current_stage)
31
+ if i + 1 < len(STAGES):
32
+ self.current_stage = STAGES[i + 1]
33
+ return self.current_stage
34
+
35
+ @property
36
+ def done(self) -> bool:
37
+ return all(s in self.completed for s in STAGES)
38
+
39
+
40
+ def _path(repo: str) -> Path:
41
+ return Path(repo).resolve() / ".sembl" / "session.json"
42
+
43
+
44
+ def save(session: Session) -> Path:
45
+ p = _path(session.repo)
46
+ p.parent.mkdir(parents=True, exist_ok=True)
47
+ p.write_text(json.dumps(asdict(session), indent=2), encoding="utf-8")
48
+ return p
49
+
50
+
51
+ def load(repo: str) -> "Session | None":
52
+ """Read the saved session, or None if it's missing OR unusable.
53
+
54
+ A truncated/corrupt `session.json` or one pointing at an unknown stage must NOT brick the
55
+ guided entrypoint — an unusable pointer is treated exactly like no pointer (start fresh).
56
+ """
57
+ p = _path(repo)
58
+ if not p.is_file():
59
+ return None
60
+ try:
61
+ data = json.loads(p.read_text(encoding="utf-8"))
62
+ if not isinstance(data, dict):
63
+ return None
64
+ s = Session(**{k: v for k, v in data.items() if k in Session.__dataclass_fields__})
65
+ except (OSError, ValueError, TypeError):
66
+ return None
67
+ # Validate the stage pointer so advance()'s STAGES.index never raises on bad state.
68
+ if s.current_stage not in STAGES:
69
+ return None
70
+ if not isinstance(s.completed, list):
71
+ return None
72
+ s.completed = [stage for stage in s.completed if stage in STAGES]
73
+ return s
74
+
75
+
76
+ def resume_or_new(repo: str) -> Session:
77
+ """Resume the saved session if it exists and is incomplete; else a fresh session.
78
+
79
+ This is the "continue anywhere" entry point: an incomplete saved session is the latest
80
+ point the user reached, so the wizard reopens there. A missing or finished session starts
81
+ fresh at the first stage.
82
+ """
83
+ existing = load(repo)
84
+ if existing is not None and not existing.done:
85
+ return existing
86
+ return Session(repo=str(Path(repo).resolve()))
@@ -0,0 +1,146 @@
1
+ """Build a deterministic SpecGraph artifact from task/spec inputs.
2
+
3
+ The graph is intentionally structural and local: no model call, no repo scan, no
4
+ network. It gives the reconciliation stage a stable spec-side artifact before a
5
+ heavier spec parser exists.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ from pathlib import Path
11
+
12
+ from .artifacts import Bounds, SpecGraph, Task
13
+
14
+ SCHEMA_VERSION = 1
15
+
16
+ _ROUTE_RE = re.compile(
17
+ r"(?:route|endpoint|path)\s*[:=-]\s*(?:(GET|POST|PUT|PATCH|DELETE)\s+)?"
18
+ r"(/[A-Za-z0-9_./{}:-]+)",
19
+ re.IGNORECASE,
20
+ )
21
+ _ENTITY_RE = re.compile(
22
+ r"(?:entity|model|table)\s*[:=-]\s*([A-Za-z][A-Za-z0-9_ -]{1,80})",
23
+ re.IGNORECASE,
24
+ )
25
+ _RULE_RE = re.compile(r"\b(must|only|never|required|requires|should)\b", re.IGNORECASE)
26
+
27
+ _SPEC_FILENAMES = (
28
+ "tasks.md",
29
+ "spec.md",
30
+ "plan.md",
31
+ "requirements.md",
32
+ "README.md",
33
+ )
34
+
35
+
36
+ def build_spec_graph(task: Task, bounds: Bounds | None = None) -> SpecGraph:
37
+ """Build a JSON-serializable graph of declared spec intent."""
38
+ nodes: list[dict] = []
39
+ edges: list[dict] = []
40
+ seen: set[str] = set()
41
+
42
+ def add_node(node_id: str, node_type: str, name: str, **attrs) -> None:
43
+ if node_id in seen:
44
+ return
45
+ seen.add(node_id)
46
+ node = {"id": node_id, "type": node_type, "name": name}
47
+ node.update({k: v for k, v in attrs.items() if v not in (None, "", [], {})})
48
+ nodes.append(node)
49
+
50
+ def add_edge(src: str, dst: str, rel: str) -> None:
51
+ edges.append({"from": src, "to": dst, "type": rel})
52
+
53
+ add_node("task", "task", "task", text=task.text, repo=task.repo)
54
+
55
+ sources = _read_sources(getattr(task, "spec_path", None))
56
+ if task.text:
57
+ sources.insert(0, ("task.text", task.text))
58
+
59
+ for idx, (source, text) in enumerate(sources):
60
+ source_id = f"source:{idx}"
61
+ add_node(source_id, "source", source, path=None if source == "task.text" else source)
62
+ add_edge("task", source_id, "declares")
63
+ _extract_concepts(text, source_id, add_node, add_edge)
64
+
65
+ if bounds is not None:
66
+ for path in bounds.editable_paths:
67
+ node_id = f"scope:editable:{path}"
68
+ add_node(node_id, "editable_path", path)
69
+ add_edge("task", node_id, "allows")
70
+ for path in bounds.forbidden_areas:
71
+ node_id = f"scope:forbidden:{path}"
72
+ add_node(node_id, "forbidden_area", path)
73
+ add_edge("task", node_id, "forbids")
74
+
75
+ return SpecGraph(
76
+ nodes=nodes,
77
+ edges=edges,
78
+ sources=[source for source, _ in sources],
79
+ data={"schema_version": SCHEMA_VERSION},
80
+ )
81
+
82
+
83
+ def _read_sources(spec_path: str | None) -> list[tuple[str, str]]:
84
+ if not spec_path:
85
+ return []
86
+ root = Path(spec_path)
87
+ if root.is_file():
88
+ return [(str(root), _read_text(root))]
89
+ if not root.is_dir():
90
+ return []
91
+
92
+ paths: list[Path] = []
93
+ for name in _SPEC_FILENAMES:
94
+ p = root / name
95
+ if p.is_file():
96
+ paths.append(p)
97
+ for p in sorted(root.glob("*.md")):
98
+ if p not in paths:
99
+ paths.append(p)
100
+ return [(str(p), _read_text(p)) for p in paths]
101
+
102
+
103
+ def _read_text(path: Path) -> str:
104
+ try:
105
+ return path.read_text(encoding="utf-8")
106
+ except UnicodeDecodeError:
107
+ return path.read_text(encoding="utf-8", errors="replace")
108
+
109
+
110
+ def _extract_concepts(text: str, source_id: str, add_node, add_edge) -> None:
111
+ for match in _ROUTE_RE.finditer(text):
112
+ method, route = match.groups()
113
+ route = _clean_route(route)
114
+ method = (method or "ANY").upper()
115
+ node_id = f"route:{method}:{route}"
116
+ add_node(node_id, "route", route, method=method)
117
+ add_edge(source_id, node_id, "mentions")
118
+
119
+ for match in _ENTITY_RE.finditer(text):
120
+ entity = _clean_label(match.group(1))
121
+ if not entity:
122
+ continue
123
+ node_id = f"entity:{_slug(entity)}"
124
+ add_node(node_id, "entity", entity)
125
+ add_edge(source_id, node_id, "mentions")
126
+
127
+ for line_no, line in enumerate(text.splitlines(), start=1):
128
+ stripped = line.strip(" -*\t")
129
+ if not stripped or not _RULE_RE.search(stripped):
130
+ continue
131
+ node_id = f"rule:{source_id}:{line_no}"
132
+ add_node(node_id, "data_rule", stripped, line=line_no)
133
+ add_edge(source_id, node_id, "declares")
134
+
135
+
136
+ def _clean_label(value: str) -> str:
137
+ return re.sub(r"\s+", " ", value).strip(" .:-")
138
+
139
+
140
+ def _clean_route(value: str) -> str:
141
+ return value.rstrip(".,;:)")
142
+
143
+
144
+ def _slug(value: str) -> str:
145
+ slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
146
+ return slug or "unnamed"
sembl_stack/store.py ADDED
@@ -0,0 +1,112 @@
1
+ """Run store — artifacts persisted at `.sembl/runs/<run-id>/` in the target repo.
2
+
3
+ Local-first, no server required to read a past run. Each run is a directory of JSON
4
+ artifacts plus a `run.json` manifest. This is what makes runs inspectable (TUI/web),
5
+ resumable, and enterable at an arbitrary stage (supply the upstream artifact).
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import re
11
+ import time
12
+ import uuid
13
+ from pathlib import Path
14
+
15
+ from . import artifacts
16
+ from .artifacts import _Serializable
17
+
18
+
19
+ class Run:
20
+ def __init__(self, root: Path, run_id: str):
21
+ self.id = run_id
22
+ self.dir = root / run_id
23
+ self.dir.mkdir(parents=True, exist_ok=True)
24
+ self._manifest_path = self.dir / "run.json"
25
+
26
+ # -- artifacts --------------------------------------------------------------
27
+ def put(self, artifact: _Serializable, name: str | None = None) -> str:
28
+ """Persist an artifact; `name` defaults to its KIND. Returns the file name."""
29
+ name = name or artifact.KIND
30
+ fname = f"{name}.json"
31
+ (self.dir / fname).write_text(artifact.to_json(), encoding="utf-8")
32
+ self._touch_manifest(name, artifact.KIND, fname)
33
+ return fname
34
+
35
+ def get(self, name: str):
36
+ """Load an artifact by name (reconstructs the right type via its `_kind` tag)."""
37
+ path = self.dir / f"{name}.json"
38
+ if not path.is_file():
39
+ return None
40
+ return artifacts.from_dict(json.loads(path.read_text(encoding="utf-8")))
41
+
42
+ def has(self, name: str) -> bool:
43
+ return (self.dir / f"{name}.json").is_file()
44
+
45
+ # -- manifest ---------------------------------------------------------------
46
+ def manifest(self) -> dict:
47
+ if self._manifest_path.is_file():
48
+ return json.loads(self._manifest_path.read_text(encoding="utf-8"))
49
+ return {}
50
+
51
+ def set_status(self, status: str, **extra) -> None:
52
+ m = self.manifest()
53
+ m["status"] = status
54
+ m["updated"] = time.time()
55
+ m.update(extra)
56
+ self._write_manifest(m)
57
+
58
+ def record_attempt(self, attempt: int, **metric) -> None:
59
+ """Append a per-attempt cost/latency record to the manifest (C1.3).
60
+
61
+ One entry per execute call: `{attempt, latency_s, agent, model, exit_code,
62
+ tokens, cost}` (tokens/cost only where the executor reported usage). This is the
63
+ signal `sembl-stack runs` shows and that the process-RSI / eval (B) layer consumes.
64
+ Keys with a None value are dropped so the manifest stays clean.
65
+ """
66
+ m = self.manifest()
67
+ entry = {"attempt": attempt}
68
+ entry.update({k: v for k, v in metric.items() if v is not None})
69
+ m.setdefault("attempts_log", []).append(entry)
70
+ self._write_manifest(m)
71
+
72
+ def _touch_manifest(self, name: str, kind: str, fname: str) -> None:
73
+ m = self.manifest()
74
+ m.setdefault("artifacts", {})[name] = {
75
+ "kind": kind, "file": fname, "ts": time.time()}
76
+ self._write_manifest(m)
77
+
78
+ def _write_manifest(self, m: dict) -> None:
79
+ self._manifest_path.write_text(
80
+ json.dumps(m, indent=2, ensure_ascii=False), encoding="utf-8")
81
+
82
+
83
+ _RUN_ID = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
84
+
85
+
86
+ class RunStore:
87
+ def __init__(self, repo: str):
88
+ self.root = Path(repo).resolve() / ".sembl" / "runs"
89
+
90
+ def new_run(self, task=None) -> Run:
91
+ run_id = time.strftime("%Y%m%d-%H%M%S-") + uuid.uuid4().hex[:6]
92
+ run = Run(self.root, run_id)
93
+ m = {"id": run_id, "created": time.time(), "status": "started",
94
+ "artifacts": {}}
95
+ if task is not None:
96
+ m["task"] = {"text": getattr(task, "text", ""),
97
+ "repo": getattr(task, "repo", "")}
98
+ run._write_manifest(m)
99
+ return run
100
+
101
+ def open(self, run_id: str) -> Run:
102
+ # A run id is a single directory name under .sembl/runs — never a path. Rejecting
103
+ # separators/leading dots here keeps `runs <id>` / `apply <id>` from resolving
104
+ # (and mkdir-ing) outside the store via a crafted id like `..\\..\\evil`.
105
+ if not _RUN_ID.match(run_id):
106
+ raise ValueError(f"invalid run id: {run_id!r}")
107
+ return Run(self.root, run_id)
108
+
109
+ def list_runs(self) -> list[str]:
110
+ if not self.root.is_dir():
111
+ return []
112
+ return sorted((p.name for p in self.root.iterdir() if p.is_dir()), reverse=True)