sembl-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. sembl_stack/__init__.py +3 -0
  2. sembl_stack/adapters/__init__.py +0 -0
  3. sembl_stack/adapters/_redact.py +19 -0
  4. sembl_stack/adapters/base.py +179 -0
  5. sembl_stack/adapters/codegraph_cbm.py +95 -0
  6. sembl_stack/adapters/deploy_vercel.py +215 -0
  7. sembl_stack/adapters/execute_aider.py +115 -0
  8. sembl_stack/adapters/execute_claude.py +114 -0
  9. sembl_stack/adapters/execute_mock.py +53 -0
  10. sembl_stack/adapters/execute_opencode.py +114 -0
  11. sembl_stack/adapters/merge_git.py +107 -0
  12. sembl_stack/adapters/postdeploy_http.py +82 -0
  13. sembl_stack/adapters/review_coderabbit.py +215 -0
  14. sembl_stack/adapters/review_llm.py +142 -0
  15. sembl_stack/adapters/review_mock.py +42 -0
  16. sembl_stack/adapters/sandbox_worktree.py +79 -0
  17. sembl_stack/adapters/spec_sembl.py +91 -0
  18. sembl_stack/adapters/verify_sembl.py +77 -0
  19. sembl_stack/artifacts.py +207 -0
  20. sembl_stack/cli.py +759 -0
  21. sembl_stack/config.py +87 -0
  22. sembl_stack/contextgraph.py +154 -0
  23. sembl_stack/doctor.py +111 -0
  24. sembl_stack/loop.py +380 -0
  25. sembl_stack/onboarding.py +272 -0
  26. sembl_stack/presets.py +114 -0
  27. sembl_stack/profile.py +193 -0
  28. sembl_stack/reconciliation.py +138 -0
  29. sembl_stack/registry.py +91 -0
  30. sembl_stack/rsi.py +188 -0
  31. sembl_stack/runner.py +134 -0
  32. sembl_stack/session.py +86 -0
  33. sembl_stack/specgraph.py +146 -0
  34. sembl_stack/store.py +112 -0
  35. sembl_stack/tracing.py +51 -0
  36. sembl_stack/transport/__init__.py +0 -0
  37. sembl_stack/transport/mcp_client.py +58 -0
  38. sembl_stack/tui.py +86 -0
  39. sembl_stack/views.py +74 -0
  40. sembl_stack/wizard.py +233 -0
  41. sembl_stack-0.1.0.dist-info/METADATA +165 -0
  42. sembl_stack-0.1.0.dist-info/RECORD +45 -0
  43. sembl_stack-0.1.0.dist-info/WHEEL +4 -0
  44. sembl_stack-0.1.0.dist-info/entry_points.txt +2 -0
  45. sembl_stack-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,114 @@
1
+ """L3 executor: Claude Code (Anthropic) driven headless in the sandbox.
2
+
3
+ Hands the task (plus the gate's feedback on retry, and the in-scope file list) to
4
+ `claude -p` (print / non-interactive mode) inside the worktree, then reads back the diff.
5
+ Claude Code uses the operator's own logged-in session (OAuth/keychain) — sembl-stack never
6
+ handles a token. Requires `claude` on PATH.
7
+
8
+ Why the flags:
9
+ -p / --print non-interactive: run the task and exit (no TUI).
10
+ --dangerously-skip-permissions headless edits otherwise block on a per-write approval
11
+ prompt. Safe here: the agent runs inside a disposable
12
+ git-worktree sandbox (the cage, not the repo) — only the
13
+ diff is gated and the worktree is thrown away after.
14
+ --output-format json the result envelope carries `total_cost_usd` + `usage`
15
+ — the REAL per-attempt cost signal the run store's
16
+ `attempts_log` (C1.3) and the RSI-L1 readout consume.
17
+ Parsed best-effort: a non-JSON stdout (older CLI, crash
18
+ mid-stream) degrades to the raw text, never an error —
19
+ and never an invented cost.
20
+ We deliberately do NOT pass --bare: that would force ANTHROPIC_API_KEY auth and ignore the
21
+ operator's OAuth login. Default auth keeps the "never see a token" property.
22
+ """
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import shutil
27
+ import subprocess # noqa: F401 (kept for tests that monkeypatch cc.subprocess.run)
28
+
29
+ from .base import (
30
+ Bounds,
31
+ ExecutionResult,
32
+ Sandbox,
33
+ Task,
34
+ changed_files_from_diff as _changed_files,
35
+ run_executor,
36
+ scrub_secrets,
37
+ )
38
+
39
+
40
+ class ClaudeCodeExecutor:
41
+ def __init__(self, model: str | None = None, timeout: int = 900):
42
+ self.model = model
43
+ self.timeout = timeout
44
+
45
+ def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
46
+ feedback: str | None) -> ExecutionResult:
47
+ exe = shutil.which("claude")
48
+ if not exe:
49
+ raise RuntimeError(
50
+ "L3: `claude` not found on PATH. Install Claude Code, or set execute: mock.")
51
+
52
+ prompt = self._prompt(task, bounds, feedback)
53
+ cmd = [exe, "-p", "--dangerously-skip-permissions", "--output-format", "json"]
54
+ if self.model:
55
+ cmd += ["--model", self.model]
56
+ cmd.append(prompt)
57
+ rc, out, err, timed_out = run_executor(
58
+ cmd, cwd=sandbox.workdir, timeout=self.timeout)
59
+
60
+ diff = sandbox.diff()
61
+ report = {
62
+ "files_modified": _changed_files(diff),
63
+ "agent": "claude-code",
64
+ "model": self.model,
65
+ "exit_code": rc,
66
+ "output": scrub_secrets(out)[-2000:],
67
+ "stderr": scrub_secrets(err)[-1000:],
68
+ }
69
+ report.update(_usage_from_result_json(out)) # cost/usage — only when reported
70
+ if timed_out: # surfaced to the gate as a BLOCK, not a crash
71
+ report["error"] = "timeout"
72
+ report["timed_out"] = True
73
+ return ExecutionResult(diff=diff, report=report, workdir=sandbox.workdir)
74
+
75
+ @staticmethod
76
+ def _prompt(task: Task, bounds: Bounds, feedback: str | None) -> str:
77
+ lines = [task.text, ""]
78
+ if bounds.editable_paths:
79
+ lines.append("You may ONLY edit these paths: "
80
+ + ", ".join(bounds.editable_paths))
81
+ if bounds.forbidden_areas:
82
+ lines.append("Never touch: " + ", ".join(bounds.forbidden_areas))
83
+ if feedback:
84
+ lines += ["", feedback]
85
+ return "\n".join(lines)
86
+
87
+
88
+ def _usage_from_result_json(out: str) -> dict:
89
+ """Cost/usage from the `--output-format json` result envelope — or {} (never invented).
90
+
91
+ The envelope is `{"type": "result", ..., "total_cost_usd": <float>, "usage": {...}}`.
92
+ Anything that doesn't parse as that shape (older CLI, text output, truncated stream)
93
+ yields {} so the run store simply records no usage — the RSI readout then reports
94
+ "not yet recorded" for the run instead of a fabricated number.
95
+ """
96
+ try:
97
+ data = json.loads(out)
98
+ except (ValueError, TypeError):
99
+ return {}
100
+ if not isinstance(data, dict):
101
+ return {}
102
+ extra: dict = {}
103
+ cost = data.get("total_cost_usd")
104
+ if isinstance(cost, (int, float)):
105
+ extra["cost"] = cost
106
+ usage = data.get("usage")
107
+ if isinstance(usage, dict):
108
+ usage = dict(usage)
109
+ if "total_tokens" not in usage:
110
+ parts = [usage.get(k) for k in ("input_tokens", "output_tokens")]
111
+ if all(isinstance(p, int) for p in parts):
112
+ usage["total_tokens"] = sum(parts) # derived from reported parts only
113
+ extra["usage"] = usage
114
+ return extra
@@ -0,0 +1,53 @@
1
+ """L3 executor: a deterministic mock — no model, no keys.
2
+
3
+ It exists to prove the *loop* end to end (and to make the retry-on-BLOCK behaviour
4
+ visible): the first attempt deliberately wanders out of scope and fabricates a claim
5
+ (→ BLOCK); once the gate's feedback arrives, it behaves (→ PASS). Swap for `opencode`
6
+ to drive a real agent.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+
12
+ from .base import Bounds, ExecutionResult, Sandbox, Task
13
+
14
+
15
+ class MockExecutor:
16
+ def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
17
+ feedback: str | None) -> ExecutionResult:
18
+ root = Path(sandbox.workdir)
19
+
20
+ if not feedback:
21
+ # First attempt: misbehave — edit outside scope + fabricate a claim.
22
+ stray = bounds.forbidden_areas[0] if bounds.forbidden_areas else ""
23
+ rel = self._write(root, stray, "stray.txt", "# out-of-scope edit\n") \
24
+ if stray else self._write(root, "", "OUTSIDE.txt", "# out-of-scope\n")
25
+ report = {
26
+ "files_modified": [rel, "src/imaginary.py"], # imaginary = fabrication
27
+ "tests_passed": True, # no evidence attached
28
+ }
29
+ return ExecutionResult(diff=sandbox.diff(), report=report, workdir=str(root))
30
+
31
+ # Retry: stay in the first editable path and report honestly, with evidence.
32
+ target = bounds.editable_paths[0] if bounds.editable_paths else "."
33
+ rel = self._write(root, target, "patch.py", "# in-scope change\nVALUE = 1\n")
34
+ report = {
35
+ "files_modified": [rel],
36
+ "tests_passed": True,
37
+ "exit_code": 0,
38
+ "output": "pytest: 1 passed in 0.1s",
39
+ }
40
+ return ExecutionResult(diff=sandbox.diff(), report=report, workdir=str(root))
41
+
42
+ @staticmethod
43
+ def _write(root: Path, target: str, default_name: str, content: str) -> str:
44
+ """Write `content` to `target` (file or dir). Returns the repo-relative path."""
45
+ t = (root / target) if target else root
46
+ if target and not target.endswith("/") and t.suffix:
47
+ path = t # target is a concrete file
48
+ else:
49
+ path = t / default_name # target is a dir (or repo root)
50
+ path.parent.mkdir(parents=True, exist_ok=True)
51
+ with path.open("a", encoding="utf-8") as fh:
52
+ fh.write(content)
53
+ return str(path.relative_to(root)).replace("\\", "/")
@@ -0,0 +1,114 @@
1
+ """L3 executor: OpenCode (OSS, 75+ models) driven headless in the sandbox.
2
+
3
+ Hands the task (plus the gate's feedback on retry, and the in-scope file list) to
4
+ `opencode run` inside the worktree, then reads back the diff. OpenCode supplies its own
5
+ model key; sembl-stack never sees a token. Requires `opencode` on PATH.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import shutil
10
+ import subprocess # noqa: F401 (kept for tests that monkeypatch oc.subprocess.run)
11
+ from pathlib import Path
12
+
13
+ from .base import (
14
+ Bounds,
15
+ ExecutionResult,
16
+ Sandbox,
17
+ Task,
18
+ changed_files_from_diff as _changed_files,
19
+ run_executor,
20
+ scrub_secrets,
21
+ )
22
+
23
+
24
+ class OpenCodeExecutor:
25
+ def __init__(self, model: str | None = None, timeout: int = 900):
26
+ self.model = model
27
+ self.timeout = timeout
28
+
29
+ def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
30
+ feedback: str | None) -> ExecutionResult:
31
+ launcher = _resolve_opencode()
32
+ if not launcher:
33
+ raise RuntimeError(
34
+ "L3: `opencode` not found on PATH. Install it, or set execute: mock.")
35
+
36
+ prompt = self._prompt(task, bounds, feedback)
37
+ # Preserve the prompt's newlines: invoking the native opencode.exe directly (see
38
+ # _resolve_opencode) passes argv straight through, so multi-line tasks and the gate's
39
+ # multi-line retry feedback reach the agent intact. ONLY the legacy `cmd /c` shim
40
+ # fallback truncates an argument at an embedded newline (cmd reads it as end-of-line),
41
+ # so flatten to spaces in that case alone — never on the direct/POSIX path.
42
+ if launcher and launcher[0].lower() == "cmd":
43
+ prompt = " ".join(prompt.splitlines())
44
+ # --dangerously-skip-permissions: headless `opencode run` otherwise blocks on an
45
+ # interactive approval prompt for every file write. It is safe here because the
46
+ # agent runs inside a disposable git-worktree sandbox (the cage, not the repo) —
47
+ # the diff is what gets gated, and the worktree is thrown away after.
48
+ # --pure: ignore the user's external plugins/skills/global agents. The factory
49
+ # wants a lean, deterministic agent driven only by the task + bounds we hand it —
50
+ # not whatever happens to be in the operator's personal opencode config. It also
51
+ # shrinks the request (smaller/faster, less prone to free-tier queueing).
52
+ # --dir: pin opencode's working directory to the sandbox clone explicitly.
53
+ # opencode resolves its project root via its own logic, NOT the inherited cwd, so
54
+ # without this it escaped the sandbox and edited the *source* repo — leaving the
55
+ # clone's diff empty (a false BLOCK). --dir nails it to the disposable clone, which
56
+ # is the whole point of the cage.
57
+ cmd = launcher + ["run", "--pure", "--dangerously-skip-permissions",
58
+ "--dir", sandbox.workdir, prompt]
59
+ if self.model:
60
+ cmd += ["--model", self.model]
61
+ rc, out, err, timed_out = run_executor(
62
+ cmd, cwd=sandbox.workdir, timeout=self.timeout)
63
+
64
+ diff = sandbox.diff()
65
+ report = {
66
+ "files_modified": _changed_files(diff),
67
+ "agent": "opencode",
68
+ "model": self.model,
69
+ "exit_code": rc,
70
+ "output": scrub_secrets(out)[-2000:],
71
+ "stderr": scrub_secrets(err)[-1000:],
72
+ }
73
+ if timed_out: # surfaced to the gate as a BLOCK, not a crash
74
+ report["error"] = "timeout"
75
+ report["timed_out"] = True
76
+ return ExecutionResult(diff=diff, report=report, workdir=sandbox.workdir)
77
+
78
+ @staticmethod
79
+ def _prompt(task: Task, bounds: Bounds, feedback: str | None) -> str:
80
+ lines = [task.text, ""]
81
+ if bounds.editable_paths:
82
+ lines.append("You may ONLY edit these paths: "
83
+ + ", ".join(bounds.editable_paths))
84
+ if bounds.forbidden_areas:
85
+ lines.append("Never touch: " + ", ".join(bounds.forbidden_areas))
86
+ if feedback:
87
+ lines += ["", feedback]
88
+ return "\n".join(lines)
89
+
90
+
91
+ def _resolve_opencode() -> list[str]:
92
+ """Return the argv prefix that actually launches opencode.
93
+
94
+ Prefer the real NATIVE binary so subprocess passes argv straight through — no `cmd /c`
95
+ in the middle to truncate a multi-line prompt at the first newline. On Windows npm
96
+ installs `opencode` as a `.cmd`/`.ps1` shim that itself calls
97
+ `<dir>/node_modules/opencode-ai/bin/opencode.exe`; resolve to that exe directly. Fall
98
+ back to invoking the shim through its interpreter only if the native exe isn't found
99
+ (the caller then flattens newlines, since `cmd /c` would otherwise truncate). On POSIX
100
+ `which` already returns the real binary.
101
+ """
102
+ exe = shutil.which("opencode")
103
+ if not exe:
104
+ return []
105
+ # The native binary the npm shim wraps — invoke it directly when present.
106
+ native = Path(exe).parent / "node_modules" / "opencode-ai" / "bin" / "opencode.exe"
107
+ if native.is_file():
108
+ return [str(native)]
109
+ low = exe.lower()
110
+ if low.endswith((".cmd", ".bat")):
111
+ return ["cmd", "/c", exe]
112
+ if low.endswith(".ps1"):
113
+ return ["powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", exe]
114
+ return [exe]
@@ -0,0 +1,107 @@
1
+ """L6.5 gated merge adapter using local git.
2
+
3
+ The stage owns the MergeRecord, not the VCS mechanism. PASS/WARN verdicts are gated at the
4
+ CLI; this adapter performs the merge into the target branch and records the merge commit.
5
+ No credentials ever enter the artifact.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import shutil
10
+ import subprocess
11
+ import time
12
+ from pathlib import Path
13
+
14
+ from ._redact import summarize
15
+ from .base import MergeRecord
16
+
17
+
18
+ class GitMergeAdapter:
19
+ def __init__(self, timeout: int = 300):
20
+ self.timeout = timeout
21
+
22
+ def available(self) -> bool:
23
+ return shutil.which("git") is not None
24
+
25
+ def merge(self, repo: str, *, into: str = "main", source: str = "HEAD",
26
+ no_ff: bool = True, message: str | None = None) -> MergeRecord:
27
+ repo_path = str(Path(repo).resolve())
28
+
29
+ def _git(args: list[str]):
30
+ return subprocess.run(
31
+ ["git", "-C", repo_path, *args], capture_output=True, text=True,
32
+ encoding="utf-8", errors="replace", timeout=self.timeout)
33
+
34
+ t0 = time.perf_counter()
35
+ # target branch must exist
36
+ check = _git(["rev-parse", "--verify", "--quiet", into])
37
+ if check.returncode != 0:
38
+ return MergeRecord(
39
+ target_branch=into, source_ref=source, status="failed",
40
+ data={"reason": "target branch not found",
41
+ "latency_s": round(time.perf_counter() - t0, 3)})
42
+
43
+ prev = _git(["rev-parse", "--abbrev-ref", "HEAD"]).stdout.strip()
44
+
45
+ # Switching to the target MUST succeed before we merge. A dirty tree, a locked branch,
46
+ # or any refusal returns non-zero; if we ignored it the merge would run on whatever
47
+ # branch is currently checked out while the record claims `into` was merged — a false
48
+ # accountability record (and it could mutate the source branch). Fail loudly instead.
49
+ co = _git(["checkout", into])
50
+ if co.returncode != 0:
51
+ return MergeRecord(
52
+ target_branch=into, source_ref=source, status="failed",
53
+ data={"reason": "checkout of target branch failed",
54
+ "returncode": co.returncode, "previous_branch": prev,
55
+ "latency_s": round(time.perf_counter() - t0, 3),
56
+ "stderr": summarize(co.stderr)})
57
+ # Defense in depth: confirm HEAD actually moved to the target before merging.
58
+ cur = _git(["rev-parse", "--abbrev-ref", "HEAD"]).stdout.strip()
59
+ if cur != into:
60
+ return MergeRecord(
61
+ target_branch=into, source_ref=source, status="failed",
62
+ data={"reason": f"expected to be on '{into}' but on '{cur}' after checkout",
63
+ "previous_branch": prev,
64
+ "latency_s": round(time.perf_counter() - t0, 3)})
65
+
66
+ msg = message or f"merge {source} into {into} (sembl-gated)"
67
+ merge_cmd = ["merge", *(["--no-ff"] if no_ff else []), "-m", msg, source]
68
+ m = _git(merge_cmd)
69
+
70
+ if m.returncode != 0:
71
+ _git(["merge", "--abort"]) # best-effort cleanup of a conflicted merge
72
+ self._restore(_git, prev, into)
73
+ return MergeRecord(
74
+ target_branch=into, source_ref=source, status="failed",
75
+ data={"reason": "merge failed", "returncode": m.returncode,
76
+ "previous_branch": prev,
77
+ "latency_s": round(time.perf_counter() - t0, 3),
78
+ "command": _safe_command(["git", *merge_cmd]),
79
+ "stdout": summarize(m.stdout), "stderr": summarize(m.stderr)})
80
+
81
+ sha = _git(["rev-parse", "HEAD"]).stdout.strip()
82
+ restored = self._restore(_git, prev, into)
83
+ return MergeRecord(
84
+ target_branch=into, source_ref=source, commit=sha or None, status="merged",
85
+ data={"no_ff": no_ff, "previous_branch": prev, "restored_to_branch": restored,
86
+ "latency_s": round(time.perf_counter() - t0, 3),
87
+ "command": _safe_command(["git", *merge_cmd]),
88
+ "stdout": summarize(m.stdout), "stderr": summarize(m.stderr)})
89
+
90
+ @staticmethod
91
+ def _restore(_git, prev: str, into: str) -> str | None:
92
+ """Best-effort: leave the repo on the branch it started on (the merge commit stays on
93
+ `into` regardless). Returns the branch we ended on, or None if restore was skipped."""
94
+ if not prev or prev in ("HEAD", into):
95
+ return None
96
+ return prev if _git(["checkout", prev]).returncode == 0 else None
97
+
98
+
99
+ def _safe_command(cmd: list[str]) -> list[str]:
100
+ safe, redact_next = [], False
101
+ for part in cmd:
102
+ if redact_next:
103
+ safe.append("<redacted>"); redact_next = False; continue
104
+ safe.append(part)
105
+ if part == "--token":
106
+ redact_next = True
107
+ return safe
@@ -0,0 +1,82 @@
1
+ """L8 deterministic post-deploy health gate."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from urllib.error import URLError
6
+ from urllib.parse import urljoin
7
+ from urllib.request import Request, urlopen
8
+
9
+ from ._redact import summarize
10
+ from .base import Delivery, Verdict
11
+
12
+
13
+ class HttpPostDeployGate:
14
+ def __init__(self, health_path: str = "/", expect_json: dict | None = None):
15
+ # Defaults come from config `options.postdeploy` (threaded by the registry) so the
16
+ # spine can enforce a real health contract — e.g. {ok, supabaseConfigured} — by config
17
+ # alone. A CLI flag overrides per-call; None means "use the configured default".
18
+ self.health_path = health_path
19
+ self.expect_json = expect_json
20
+
21
+ def verify(self, delivery: Delivery, *, health_path: str | None = None,
22
+ timeout_s: float = 10.0,
23
+ expect_json: dict | None = None) -> Verdict:
24
+ health_path = health_path if health_path is not None else self.health_path
25
+ expect_json = expect_json if expect_json is not None else self.expect_json
26
+ if delivery.status != "deployed" or not delivery.url:
27
+ return Verdict(
28
+ status="BLOCK",
29
+ reasons=["delivery is not deployed or has no URL"],
30
+ raw={"delivery": delivery.to_dict()},
31
+ )
32
+
33
+ url = urljoin(delivery.url.rstrip("/") + "/", health_path.lstrip("/"))
34
+ try:
35
+ req = Request(url, headers={"User-Agent": "sembl-stack-postdeploy"})
36
+ with urlopen(req, timeout=timeout_s) as resp:
37
+ code = getattr(resp, "status", None)
38
+ if code is None:
39
+ code = resp.getcode()
40
+ body = resp.read(2048).decode("utf-8", "replace")
41
+ except (OSError, URLError) as exc:
42
+ return Verdict(
43
+ status="BLOCK",
44
+ reasons=[f"post-deploy health check failed: {type(exc).__name__}"],
45
+ raw={"url": url, "error": type(exc).__name__},
46
+ )
47
+
48
+ if not (200 <= int(code) < 400):
49
+ return Verdict(
50
+ status="BLOCK",
51
+ reasons=[f"post-deploy health check returned HTTP {code}"],
52
+ raw={"url": url, "status_code": int(code), "body": summarize(body)},
53
+ )
54
+
55
+ # A 2xx/3xx is necessary but not sufficient: a misconfigured app can return
56
+ # 200 with a useless body. When the caller declares an expected payload, assert
57
+ # the health JSON actually reports the app healthy (matches the app's own
58
+ # postdeploy-health.mjs check, instead of status-only).
59
+ if expect_json:
60
+ try:
61
+ payload = json.loads(body)
62
+ except (ValueError, TypeError):
63
+ return Verdict(
64
+ status="BLOCK",
65
+ reasons=["post-deploy health payload is not valid JSON"],
66
+ raw={"url": url, "status_code": int(code), "body": summarize(body)},
67
+ )
68
+ # Only the allowlisted expected keys are surfaced (health booleans the caller
69
+ # declared) — never the full third-party payload, which may carry env-shaped values.
70
+ mismatches = [
71
+ f"{key}={payload.get(key)!r} (want {value!r})"
72
+ for key, value in expect_json.items()
73
+ if payload.get(key) != value
74
+ ]
75
+ if mismatches:
76
+ return Verdict(
77
+ status="BLOCK",
78
+ reasons=[f"post-deploy health payload mismatch: {', '.join(mismatches)}"],
79
+ raw={"url": url, "status_code": int(code), "body": summarize(body)},
80
+ )
81
+
82
+ return Verdict(status="PASS", raw={"url": url, "status_code": int(code)})