PyPI - sembl-stack - Versions diffs - 0.1.0__py3-none-any.whl - Mend

sembl-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

sembl_stack/__init__.py +3 -0
sembl_stack/adapters/__init__.py +0 -0
sembl_stack/adapters/_redact.py +19 -0
sembl_stack/adapters/base.py +179 -0
sembl_stack/adapters/codegraph_cbm.py +95 -0
sembl_stack/adapters/deploy_vercel.py +215 -0
sembl_stack/adapters/execute_aider.py +115 -0
sembl_stack/adapters/execute_claude.py +114 -0
sembl_stack/adapters/execute_mock.py +53 -0
sembl_stack/adapters/execute_opencode.py +114 -0
sembl_stack/adapters/merge_git.py +107 -0
sembl_stack/adapters/postdeploy_http.py +82 -0
sembl_stack/adapters/review_coderabbit.py +215 -0
sembl_stack/adapters/review_llm.py +142 -0
sembl_stack/adapters/review_mock.py +42 -0
sembl_stack/adapters/sandbox_worktree.py +79 -0
sembl_stack/adapters/spec_sembl.py +91 -0
sembl_stack/adapters/verify_sembl.py +77 -0
sembl_stack/artifacts.py +207 -0
sembl_stack/cli.py +759 -0
sembl_stack/config.py +87 -0
sembl_stack/contextgraph.py +154 -0
sembl_stack/doctor.py +111 -0
sembl_stack/loop.py +380 -0
sembl_stack/onboarding.py +272 -0
sembl_stack/presets.py +114 -0
sembl_stack/profile.py +193 -0
sembl_stack/reconciliation.py +138 -0
sembl_stack/registry.py +91 -0
sembl_stack/rsi.py +188 -0
sembl_stack/runner.py +134 -0
sembl_stack/session.py +86 -0
sembl_stack/specgraph.py +146 -0
sembl_stack/store.py +112 -0
sembl_stack/tracing.py +51 -0
sembl_stack/transport/__init__.py +0 -0
sembl_stack/transport/mcp_client.py +58 -0
sembl_stack/tui.py +86 -0
sembl_stack/views.py +74 -0
sembl_stack/wizard.py +233 -0
sembl_stack-0.1.0.dist-info/METADATA +165 -0
sembl_stack-0.1.0.dist-info/RECORD +45 -0
sembl_stack-0.1.0.dist-info/WHEEL +4 -0
sembl_stack-0.1.0.dist-info/entry_points.txt +2 -0
sembl_stack-0.1.0.dist-info/licenses/LICENSE +201 -0

sembl_stack/adapters/execute_claude.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""L3 executor: Claude Code (Anthropic) driven headless in the sandbox.
+Hands the task (plus the gate's feedback on retry, and the in-scope file list) to
+`claude -p` (print / non-interactive mode) inside the worktree, then reads back the diff.
+Claude Code uses the operator's own logged-in session (OAuth/keychain) — sembl-stack never
+handles a token. Requires `claude` on PATH.
+Why the flags:
+  -p / --print                    non-interactive: run the task and exit (no TUI).
+  --dangerously-skip-permissions  headless edits otherwise block on a per-write approval
+                                  prompt. Safe here: the agent runs inside a disposable
+                                  git-worktree sandbox (the cage, not the repo) — only the
+                                  diff is gated and the worktree is thrown away after.
+  --output-format json            the result envelope carries `total_cost_usd` + `usage`
+                                  — the REAL per-attempt cost signal the run store's
+                                  `attempts_log` (C1.3) and the RSI-L1 readout consume.
+                                  Parsed best-effort: a non-JSON stdout (older CLI, crash
+                                  mid-stream) degrades to the raw text, never an error —
+                                  and never an invented cost.
+We deliberately do NOT pass --bare: that would force ANTHROPIC_API_KEY auth and ignore the
+operator's OAuth login. Default auth keeps the "never see a token" property.
+"""
+from __future__ import annotations
+import json
+import shutil
+import subprocess  # noqa: F401  (kept for tests that monkeypatch cc.subprocess.run)
+from .base import (
+    Bounds,
+    ExecutionResult,
+    Sandbox,
+    Task,
+    changed_files_from_diff as _changed_files,
+    run_executor,
+    scrub_secrets,
+)
+class ClaudeCodeExecutor:
+    def __init__(self, model: str | None = None, timeout: int = 900):
+        self.model = model
+        self.timeout = timeout
+    def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
+            feedback: str | None) -> ExecutionResult:
+        exe = shutil.which("claude")
+        if not exe:
+            raise RuntimeError(
+                "L3: `claude` not found on PATH. Install Claude Code, or set execute: mock.")
+        prompt = self._prompt(task, bounds, feedback)
+        cmd = [exe, "-p", "--dangerously-skip-permissions", "--output-format", "json"]
+        if self.model:
+            cmd += ["--model", self.model]
+        cmd.append(prompt)
+        rc, out, err, timed_out = run_executor(
+            cmd, cwd=sandbox.workdir, timeout=self.timeout)
+        diff = sandbox.diff()
+        report = {
+            "files_modified": _changed_files(diff),
+            "agent": "claude-code",
+            "model": self.model,
+            "exit_code": rc,
+            "output": scrub_secrets(out)[-2000:],
+            "stderr": scrub_secrets(err)[-1000:],
+        }
+        report.update(_usage_from_result_json(out))    # cost/usage — only when reported
+        if timed_out:                          # surfaced to the gate as a BLOCK, not a crash
+            report["error"] = "timeout"
+            report["timed_out"] = True
+        return ExecutionResult(diff=diff, report=report, workdir=sandbox.workdir)
+    @staticmethod
+    def _prompt(task: Task, bounds: Bounds, feedback: str | None) -> str:
+        lines = [task.text, ""]
+        if bounds.editable_paths:
+            lines.append("You may ONLY edit these paths: "
+                         + ", ".join(bounds.editable_paths))
+        if bounds.forbidden_areas:
+            lines.append("Never touch: " + ", ".join(bounds.forbidden_areas))
+        if feedback:
+            lines += ["", feedback]
+        return "\n".join(lines)
+def _usage_from_result_json(out: str) -> dict:
+    """Cost/usage from the `--output-format json` result envelope — or {} (never invented).
+    The envelope is `{"type": "result", ..., "total_cost_usd": <float>, "usage": {...}}`.
+    Anything that doesn't parse as that shape (older CLI, text output, truncated stream)
+    yields {} so the run store simply records no usage — the RSI readout then reports
+    "not yet recorded" for the run instead of a fabricated number.
+    """
+    try:
+        data = json.loads(out)
+    except (ValueError, TypeError):
+        return {}
+    if not isinstance(data, dict):
+        return {}
+    extra: dict = {}
+    cost = data.get("total_cost_usd")
+    if isinstance(cost, (int, float)):
+        extra["cost"] = cost
+    usage = data.get("usage")
+    if isinstance(usage, dict):
+        usage = dict(usage)
+        if "total_tokens" not in usage:
+            parts = [usage.get(k) for k in ("input_tokens", "output_tokens")]
+            if all(isinstance(p, int) for p in parts):
+                usage["total_tokens"] = sum(parts)   # derived from reported parts only
+        extra["usage"] = usage
+    return extra

sembl_stack/adapters/execute_mock.py ADDED Viewed

@@ -0,0 +1,53 @@
+"""L3 executor: a deterministic mock — no model, no keys.
+It exists to prove the *loop* end to end (and to make the retry-on-BLOCK behaviour
+visible): the first attempt deliberately wanders out of scope and fabricates a claim
+(→ BLOCK); once the gate's feedback arrives, it behaves (→ PASS). Swap for `opencode`
+to drive a real agent.
+"""
+from __future__ import annotations
+from pathlib import Path
+from .base import Bounds, ExecutionResult, Sandbox, Task
+class MockExecutor:
+    def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
+            feedback: str | None) -> ExecutionResult:
+        root = Path(sandbox.workdir)
+        if not feedback:
+            # First attempt: misbehave — edit outside scope + fabricate a claim.
+            stray = bounds.forbidden_areas[0] if bounds.forbidden_areas else ""
+            rel = self._write(root, stray, "stray.txt", "# out-of-scope edit\n") \
+                if stray else self._write(root, "", "OUTSIDE.txt", "# out-of-scope\n")
+            report = {
+                "files_modified": [rel, "src/imaginary.py"],   # imaginary = fabrication
+                "tests_passed": True,                           # no evidence attached
+            }
+            return ExecutionResult(diff=sandbox.diff(), report=report, workdir=str(root))
+        # Retry: stay in the first editable path and report honestly, with evidence.
+        target = bounds.editable_paths[0] if bounds.editable_paths else "."
+        rel = self._write(root, target, "patch.py", "# in-scope change\nVALUE = 1\n")
+        report = {
+            "files_modified": [rel],
+            "tests_passed": True,
+            "exit_code": 0,
+            "output": "pytest: 1 passed in 0.1s",
+        }
+        return ExecutionResult(diff=sandbox.diff(), report=report, workdir=str(root))
+    @staticmethod
+    def _write(root: Path, target: str, default_name: str, content: str) -> str:
+        """Write `content` to `target` (file or dir). Returns the repo-relative path."""
+        t = (root / target) if target else root
+        if target and not target.endswith("/") and t.suffix:
+            path = t                      # target is a concrete file
+        else:
+            path = t / default_name        # target is a dir (or repo root)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with path.open("a", encoding="utf-8") as fh:
+            fh.write(content)
+        return str(path.relative_to(root)).replace("\\", "/")

sembl_stack/adapters/execute_opencode.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""L3 executor: OpenCode (OSS, 75+ models) driven headless in the sandbox.
+Hands the task (plus the gate's feedback on retry, and the in-scope file list) to
+`opencode run` inside the worktree, then reads back the diff. OpenCode supplies its own
+model key; sembl-stack never sees a token. Requires `opencode` on PATH.
+"""
+from __future__ import annotations
+import shutil
+import subprocess  # noqa: F401  (kept for tests that monkeypatch oc.subprocess.run)
+from pathlib import Path
+from .base import (
+    Bounds,
+    ExecutionResult,
+    Sandbox,
+    Task,
+    changed_files_from_diff as _changed_files,
+    run_executor,
+    scrub_secrets,
+)
+class OpenCodeExecutor:
+    def __init__(self, model: str | None = None, timeout: int = 900):
+        self.model = model
+        self.timeout = timeout
+    def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
+            feedback: str | None) -> ExecutionResult:
+        launcher = _resolve_opencode()
+        if not launcher:
+            raise RuntimeError(
+                "L3: `opencode` not found on PATH. Install it, or set execute: mock.")
+        prompt = self._prompt(task, bounds, feedback)
+        # Preserve the prompt's newlines: invoking the native opencode.exe directly (see
+        # _resolve_opencode) passes argv straight through, so multi-line tasks and the gate's
+        # multi-line retry feedback reach the agent intact. ONLY the legacy `cmd /c` shim
+        # fallback truncates an argument at an embedded newline (cmd reads it as end-of-line),
+        # so flatten to spaces in that case alone — never on the direct/POSIX path.
+        if launcher and launcher[0].lower() == "cmd":
+            prompt = " ".join(prompt.splitlines())
+        # --dangerously-skip-permissions: headless `opencode run` otherwise blocks on an
+        # interactive approval prompt for every file write. It is safe here because the
+        # agent runs inside a disposable git-worktree sandbox (the cage, not the repo) —
+        # the diff is what gets gated, and the worktree is thrown away after.
+        # --pure: ignore the user's external plugins/skills/global agents. The factory
+        # wants a lean, deterministic agent driven only by the task + bounds we hand it —
+        # not whatever happens to be in the operator's personal opencode config. It also
+        # shrinks the request (smaller/faster, less prone to free-tier queueing).
+        # --dir: pin opencode's working directory to the sandbox clone explicitly.
+        # opencode resolves its project root via its own logic, NOT the inherited cwd, so
+        # without this it escaped the sandbox and edited the *source* repo — leaving the
+        # clone's diff empty (a false BLOCK). --dir nails it to the disposable clone, which
+        # is the whole point of the cage.
+        cmd = launcher + ["run", "--pure", "--dangerously-skip-permissions",
+                          "--dir", sandbox.workdir, prompt]
+        if self.model:
+            cmd += ["--model", self.model]
+        rc, out, err, timed_out = run_executor(
+            cmd, cwd=sandbox.workdir, timeout=self.timeout)
+        diff = sandbox.diff()
+        report = {
+            "files_modified": _changed_files(diff),
+            "agent": "opencode",
+            "model": self.model,
+            "exit_code": rc,
+            "output": scrub_secrets(out)[-2000:],
+            "stderr": scrub_secrets(err)[-1000:],
+        }
+        if timed_out:                          # surfaced to the gate as a BLOCK, not a crash
+            report["error"] = "timeout"
+            report["timed_out"] = True
+        return ExecutionResult(diff=diff, report=report, workdir=sandbox.workdir)
+    @staticmethod
+    def _prompt(task: Task, bounds: Bounds, feedback: str | None) -> str:
+        lines = [task.text, ""]
+        if bounds.editable_paths:
+            lines.append("You may ONLY edit these paths: "
+                         + ", ".join(bounds.editable_paths))
+        if bounds.forbidden_areas:
+            lines.append("Never touch: " + ", ".join(bounds.forbidden_areas))
+        if feedback:
+            lines += ["", feedback]
+        return "\n".join(lines)
+def _resolve_opencode() -> list[str]:
+    """Return the argv prefix that actually launches opencode.
+    Prefer the real NATIVE binary so subprocess passes argv straight through — no `cmd /c`
+    in the middle to truncate a multi-line prompt at the first newline. On Windows npm
+    installs `opencode` as a `.cmd`/`.ps1` shim that itself calls
+    `<dir>/node_modules/opencode-ai/bin/opencode.exe`; resolve to that exe directly. Fall
+    back to invoking the shim through its interpreter only if the native exe isn't found
+    (the caller then flattens newlines, since `cmd /c` would otherwise truncate). On POSIX
+    `which` already returns the real binary.
+    """
+    exe = shutil.which("opencode")
+    if not exe:
+        return []
+    # The native binary the npm shim wraps — invoke it directly when present.
+    native = Path(exe).parent / "node_modules" / "opencode-ai" / "bin" / "opencode.exe"
+    if native.is_file():
+        return [str(native)]
+    low = exe.lower()
+    if low.endswith((".cmd", ".bat")):
+        return ["cmd", "/c", exe]
+    if low.endswith(".ps1"):
+        return ["powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", exe]
+    return [exe]

sembl_stack/adapters/merge_git.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""L6.5 gated merge adapter using local git.
+The stage owns the MergeRecord, not the VCS mechanism. PASS/WARN verdicts are gated at the
+CLI; this adapter performs the merge into the target branch and records the merge commit.
+No credentials ever enter the artifact.
+"""
+from __future__ import annotations
+import shutil
+import subprocess
+import time
+from pathlib import Path
+from ._redact import summarize
+from .base import MergeRecord
+class GitMergeAdapter:
+    def __init__(self, timeout: int = 300):
+        self.timeout = timeout
+    def available(self) -> bool:
+        return shutil.which("git") is not None
+    def merge(self, repo: str, *, into: str = "main", source: str = "HEAD",
+              no_ff: bool = True, message: str | None = None) -> MergeRecord:
+        repo_path = str(Path(repo).resolve())
+        def _git(args: list[str]):
+            return subprocess.run(
+                ["git", "-C", repo_path, *args], capture_output=True, text=True,
+                encoding="utf-8", errors="replace", timeout=self.timeout)
+        t0 = time.perf_counter()
+        # target branch must exist
+        check = _git(["rev-parse", "--verify", "--quiet", into])
+        if check.returncode != 0:
+            return MergeRecord(
+                target_branch=into, source_ref=source, status="failed",
+                data={"reason": "target branch not found",
+                      "latency_s": round(time.perf_counter() - t0, 3)})
+        prev = _git(["rev-parse", "--abbrev-ref", "HEAD"]).stdout.strip()
+        # Switching to the target MUST succeed before we merge. A dirty tree, a locked branch,
+        # or any refusal returns non-zero; if we ignored it the merge would run on whatever
+        # branch is currently checked out while the record claims `into` was merged — a false
+        # accountability record (and it could mutate the source branch). Fail loudly instead.
+        co = _git(["checkout", into])
+        if co.returncode != 0:
+            return MergeRecord(
+                target_branch=into, source_ref=source, status="failed",
+                data={"reason": "checkout of target branch failed",
+                      "returncode": co.returncode, "previous_branch": prev,
+                      "latency_s": round(time.perf_counter() - t0, 3),
+                      "stderr": summarize(co.stderr)})
+        # Defense in depth: confirm HEAD actually moved to the target before merging.
+        cur = _git(["rev-parse", "--abbrev-ref", "HEAD"]).stdout.strip()
+        if cur != into:
+            return MergeRecord(
+                target_branch=into, source_ref=source, status="failed",
+                data={"reason": f"expected to be on '{into}' but on '{cur}' after checkout",
+                      "previous_branch": prev,
+                      "latency_s": round(time.perf_counter() - t0, 3)})
+        msg = message or f"merge {source} into {into} (sembl-gated)"
+        merge_cmd = ["merge", *(["--no-ff"] if no_ff else []), "-m", msg, source]
+        m = _git(merge_cmd)
+        if m.returncode != 0:
+            _git(["merge", "--abort"])     # best-effort cleanup of a conflicted merge
+            self._restore(_git, prev, into)
+            return MergeRecord(
+                target_branch=into, source_ref=source, status="failed",
+                data={"reason": "merge failed", "returncode": m.returncode,
+                      "previous_branch": prev,
+                      "latency_s": round(time.perf_counter() - t0, 3),
+                      "command": _safe_command(["git", *merge_cmd]),
+                      "stdout": summarize(m.stdout), "stderr": summarize(m.stderr)})
+        sha = _git(["rev-parse", "HEAD"]).stdout.strip()
+        restored = self._restore(_git, prev, into)
+        return MergeRecord(
+            target_branch=into, source_ref=source, commit=sha or None, status="merged",
+            data={"no_ff": no_ff, "previous_branch": prev, "restored_to_branch": restored,
+                  "latency_s": round(time.perf_counter() - t0, 3),
+                  "command": _safe_command(["git", *merge_cmd]),
+                  "stdout": summarize(m.stdout), "stderr": summarize(m.stderr)})
+    @staticmethod
+    def _restore(_git, prev: str, into: str) -> str | None:
+        """Best-effort: leave the repo on the branch it started on (the merge commit stays on
+        `into` regardless). Returns the branch we ended on, or None if restore was skipped."""
+        if not prev or prev in ("HEAD", into):
+            return None
+        return prev if _git(["checkout", prev]).returncode == 0 else None
+def _safe_command(cmd: list[str]) -> list[str]:
+    safe, redact_next = [], False
+    for part in cmd:
+        if redact_next:
+            safe.append("<redacted>"); redact_next = False; continue
+        safe.append(part)
+        if part == "--token":
+            redact_next = True
+    return safe

sembl_stack/adapters/postdeploy_http.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""L8 deterministic post-deploy health gate."""
+from __future__ import annotations
+import json
+from urllib.error import URLError
+from urllib.parse import urljoin
+from urllib.request import Request, urlopen
+from ._redact import summarize
+from .base import Delivery, Verdict
+class HttpPostDeployGate:
+    def __init__(self, health_path: str = "/", expect_json: dict | None = None):
+        # Defaults come from config `options.postdeploy` (threaded by the registry) so the
+        # spine can enforce a real health contract — e.g. {ok, supabaseConfigured} — by config
+        # alone. A CLI flag overrides per-call; None means "use the configured default".
+        self.health_path = health_path
+        self.expect_json = expect_json
+    def verify(self, delivery: Delivery, *, health_path: str | None = None,
+               timeout_s: float = 10.0,
+               expect_json: dict | None = None) -> Verdict:
+        health_path = health_path if health_path is not None else self.health_path
+        expect_json = expect_json if expect_json is not None else self.expect_json
+        if delivery.status != "deployed" or not delivery.url:
+            return Verdict(
+                status="BLOCK",
+                reasons=["delivery is not deployed or has no URL"],
+                raw={"delivery": delivery.to_dict()},
+            )
+        url = urljoin(delivery.url.rstrip("/") + "/", health_path.lstrip("/"))
+        try:
+            req = Request(url, headers={"User-Agent": "sembl-stack-postdeploy"})
+            with urlopen(req, timeout=timeout_s) as resp:
+                code = getattr(resp, "status", None)
+                if code is None:
+                    code = resp.getcode()
+                body = resp.read(2048).decode("utf-8", "replace")
+        except (OSError, URLError) as exc:
+            return Verdict(
+                status="BLOCK",
+                reasons=[f"post-deploy health check failed: {type(exc).__name__}"],
+                raw={"url": url, "error": type(exc).__name__},
+            )
+        if not (200 <= int(code) < 400):
+            return Verdict(
+                status="BLOCK",
+                reasons=[f"post-deploy health check returned HTTP {code}"],
+                raw={"url": url, "status_code": int(code), "body": summarize(body)},
+            )
+        # A 2xx/3xx is necessary but not sufficient: a misconfigured app can return
+        # 200 with a useless body. When the caller declares an expected payload, assert
+        # the health JSON actually reports the app healthy (matches the app's own
+        # postdeploy-health.mjs check, instead of status-only).
+        if expect_json:
+            try:
+                payload = json.loads(body)
+            except (ValueError, TypeError):
+                return Verdict(
+                    status="BLOCK",
+                    reasons=["post-deploy health payload is not valid JSON"],
+                    raw={"url": url, "status_code": int(code), "body": summarize(body)},
+                )
+            # Only the allowlisted expected keys are surfaced (health booleans the caller
+            # declared) — never the full third-party payload, which may carry env-shaped values.
+            mismatches = [
+                f"{key}={payload.get(key)!r} (want {value!r})"
+                for key, value in expect_json.items()
+                if payload.get(key) != value
+            ]
+            if mismatches:
+                return Verdict(
+                    status="BLOCK",
+                    reasons=[f"post-deploy health payload mismatch: {', '.join(mismatches)}"],
+                    raw={"url": url, "status_code": int(code), "body": summarize(body)},
+                )
+        return Verdict(status="PASS", raw={"url": url, "status_code": int(code)})