npm - delimit-cli - Versions diffs - 4.6.0 → 4.6.2 - Mend

delimit-cli 4.6.0 → 4.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/CHANGELOG.md +71 -8
package/bin/delimit-cli.js +59 -9
package/bin/delimit-setup.js +7 -3
package/gateway/ai/agent_dispatch.py +5 -0
package/gateway/ai/backends/gateway_core.py +6 -0
package/gateway/ai/backends/git_health.py +175 -0
package/gateway/ai/backends/memory_bridge.py +210 -53
package/gateway/ai/backends/tools_infra.py +93 -0
package/gateway/ai/backends/tools_real.py +53 -7
package/gateway/ai/cli_contract.py +185 -0
package/gateway/ai/governance.py +181 -0
package/gateway/ai/heartbeat.py +290 -0
package/gateway/ai/ledger_manager.py +81 -4
package/gateway/ai/ledger_proof.py +127 -0
package/gateway/ai/license.py +132 -47
package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
package/gateway/ai/license_core.pyi +1 -1
package/gateway/ai/outreach_loop_daemon.py +349 -0
package/gateway/ai/outreach_substantive.py +768 -7
package/gateway/ai/pro_tools.yaml +167 -0
package/gateway/ai/reddit_scanner.py +7 -1
package/gateway/ai/server.py +295 -116
package/gateway/ai/session_phoenix.py +121 -0
package/gateway/ai/social_queue.py +166 -10
package/gateway/ai/tenant_auth.py +329 -0
package/gateway/ai/tenant_data.py +339 -0
package/gateway/ai/tenant_paths.py +150 -0
package/gateway/core/diff_engine_v2.py +517 -54
package/gateway/core/semver_classifier.py +52 -6
package/package.json +4 -1
package/scripts/build-license-core.sh +0 -85
package/scripts/security-check.sh +0 -66
package/scripts/test-license-core-so.sh +0 -107

package/gateway/ai/cli_contract.py ADDED Viewed

@@ -0,0 +1,185 @@
+"""LED-1415 — CLI subprocess contract.
+The deliberation engine drives 4 model CLIs as subprocesses
+(claude / codex / gemini / cursor) and treats their stdout as model
+verdict text. Three classes of bug have surfaced in this pipeline:
+  1. Banner contamination — the Delimit governance shim leaks ASCII
+     art onto stdout instead of stderr (PR #154, fixed by LED-1428).
+  2. Empty/silent responses — CLI exits 0 but stdout is empty
+     (transient API issues, OOM, network blips). Caught by LED-1416's
+     retry state machine.
+  3. Schema drift — CLI changes its output shape between versions
+     (e.g., adds an auto-correction line at the top). Caught
+     reactively by failing deliberation panels.
+This module holds the ONE contract that every CLI response must
+satisfy + the ONE validator that enforces it. Both the per-CLI mock
+tests (tests/test_cli_contract.py) AND the weekly real-CLI smoke
+script (scripts/smoke_cli_contracts.py) call validate_cli_contract()
+so the contract definition lives in exactly one place — extending
+it doesn't require changing two places to remember.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass, field
+from typing import List, Optional
+# The 4 known CLIs the deliberation engine targets. cursor is included
+# even though it's not yet installed in the dev environment — adding
+# it to the contract surface now means the validator is ready when it
+# lands; smoke skips when the binary isn't present.
+KNOWN_CLI_NAMES = ("claude", "codex", "gemini", "cursor")
+# Minimum scrubbed-response length we'll accept as "looks like a real
+# model verdict" rather than "leftover garbage after banner strip."
+# Calibrated against historical scrub-debug.jsonl entries: every real
+# round-1/round-2 verdict from past deliberations was >= 60 chars;
+# every banner-only contamination was < 30 chars. 30 is the cutoff
+# the production scrubber already uses; keeping that here means the
+# validator + the scrubber agree.
+MIN_VERDICT_LEN = 30
+# Patterns that signal "the response is contamination, not a verdict."
+# Each gets the response REJECTED even if length and scrub passed.
+_CONTAMINATION_MARKERS = (
+    re.compile(r"^\[scrub:\s*contaminated\b", re.IGNORECASE),
+    re.compile(r"^\[.+\bunavailable\b.+\bnot found in PATH\]", re.IGNORECASE),
+    re.compile(r"^\[.+\bskipped under INTERNAL_PYTEST_GUARD", re.IGNORECASE),
+    re.compile(r"^\[.+\btimed out after\b", re.IGNORECASE),
+    re.compile(r"^\[.+\breturned empty response\]", re.IGNORECASE),
+    re.compile(r"^\[.+\berror:.+\]\s*$", re.IGNORECASE),
+)
+# A response should contain at least ONE of these markers to be
+# recognizable as a panel verdict. The deliberation engine prompts all
+# models to end with `VERDICT: ...` so we expect to see it. Falling
+# back: "AGREE" / "DISAGREE" / "REMEDIATE" / "AGREE WITH MODIFICATIONS"
+# all appear in real responses even when the trailing VERDICT line is
+# omitted by a chatty model.
+_VERDICT_HINT_RE = re.compile(
+    r"\b(VERDICT:|AGREE|DISAGREE|REMEDIATE|APPROVE|REJECT)\b",
+    re.IGNORECASE,
+)
+@dataclass
+class CliContractResult:
+    """Outcome of validating one CLI's response.
+    `ok` is True iff every contract clause passed. `failures` is the
+    list of clauses that fired — the smoke script ntfys with this list
+    so the operator can see exactly what shape the regression took.
+    """
+    cli: str
+    raw_len: int
+    scrubbed_len: int
+    ok: bool
+    failures: List[str] = field(default_factory=list)
+    preview: str = ""  # First 200 chars of scrubbed text, for log readability
+def validate_cli_contract(
+    cli_name: str,
+    raw_stdout: str,
+    raw_stderr: str = "",
+    expect_verdict_hint: bool = True,
+) -> CliContractResult:
+    """Apply the per-CLI contract to one subprocess response.
+    Mirrors the EXACT production scrub path so the validator's view
+    matches what ai/deliberation.py's _call_cli sees. Failures append
+    a short reason string; an empty failures list means the response
+    is contract-clean.
+    Args:
+        cli_name: which CLI produced this (claude/codex/gemini/cursor);
+            used in the failure messages.
+        raw_stdout: subprocess.stdout bytes decoded to str.
+        raw_stderr: subprocess.stderr bytes decoded to str. The
+            contract is permissive on stderr — banner output is
+            ALLOWED there (intentional shim behavior); but completely
+            empty stderr + completely empty stdout is suspicious.
+        expect_verdict_hint: when True, fail the response if it
+            doesn't contain at least one verdict marker. Mock tests
+            and the smoke script set this; tests of low-content
+            responses (e.g., a `--version` smoke) set False.
+    Returns:
+        CliContractResult with `ok`, `failures`, and a preview.
+    """
+    # Import lazily so this module can be imported in a context where
+    # ai.deliberation isn't available (e.g., the smoke script when
+    # gateway code path changes).
+    failures: List[str] = []
+    try:
+        from ai.deliberation import _scrub_cli_output
+        scrubbed = _scrub_cli_output(raw_stdout, source=cli_name).strip()
+    except Exception as exc:
+        return CliContractResult(
+            cli=cli_name,
+            raw_len=len(raw_stdout),
+            scrubbed_len=0,
+            ok=False,
+            failures=[f"scrub_failed:{type(exc).__name__}:{str(exc)[:80]}"],
+            preview="",
+        )
+    # 1. Contamination markers — if the scrubber returned one, fail.
+    for pat in _CONTAMINATION_MARKERS:
+        if pat.search(scrubbed):
+            failures.append(f"contamination_marker:{pat.pattern[:40]}")
+            break
+    # 2. Minimum length. Below MIN_VERDICT_LEN is almost certainly
+    # garbage even if scrub didn't tag it.
+    if len(scrubbed) < MIN_VERDICT_LEN and "contamination_marker" not in " ".join(failures):
+        failures.append(f"too_short:{len(scrubbed)}<{MIN_VERDICT_LEN}")
+    # 3. Verdict hint — at least one of VERDICT:/AGREE/DISAGREE/REMEDIATE/
+    # APPROVE/REJECT must appear. Skip when expect_verdict_hint=False.
+    if expect_verdict_hint and not _VERDICT_HINT_RE.search(scrubbed):
+        failures.append("no_verdict_hint")
+    # 4. Doesn't start with a known banner prefix (defense-in-depth on
+    # top of scrub). If a brand-new banner shape lands tomorrow that
+    # the scrubber doesn't know about, this should catch it.
+    if scrubbed.startswith("["):
+        # Bracketed prefix is almost always a tool-emitted status line
+        # (e.g. "[Delimit]" / "[claude error: ...]") not a model verdict.
+        if not any(scrubbed.lower().startswith(p) for p in (
+            "[delimit", "[scrub:", "[claude", "[codex", "[gemini", "[cursor",
+        )):
+            # Unknown bracketed prefix — surface for inspection
+            failures.append(f"unknown_bracketed_prefix:{scrubbed[:40]!r}")
+    return CliContractResult(
+        cli=cli_name,
+        raw_len=len(raw_stdout),
+        scrubbed_len=len(scrubbed),
+        ok=not failures,
+        failures=failures,
+        preview=scrubbed[:200],
+    )
+def format_contract_report(results: List[CliContractResult]) -> str:
+    """Human-readable summary of N validation results for ntfy / logs."""
+    lines = []
+    n_ok = sum(1 for r in results if r.ok)
+    lines.append(f"CLI contract: {n_ok}/{len(results)} clean")
+    for r in results:
+        flag = "OK" if r.ok else "FAIL"
+        lines.append(f"  [{flag}] {r.cli:8s} raw={r.raw_len}B scrubbed={r.scrubbed_len}B")
+        if not r.ok:
+            for f in r.failures:
+                lines.append(f"           ↳ {f}")
+            if r.preview:
+                lines.append(f"           preview: {r.preview[:100]!r}")
+    return "\n".join(lines)

package/gateway/ai/governance.py CHANGED Viewed

@@ -13,7 +13,10 @@ This replaces _with_next_steps — governance IS the next step system.
 import json
 import logging
 import os
+import re
+import subprocess
 import time
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional
@@ -826,6 +829,184 @@ def govern(tool_name: str, result: Dict[str, Any], project_path: str = ".") -> D
     return governed_result
+# ─────────────────────────────────────────────────────────────────────
+# LED-2214b-followup — sensor_github_issue sync impl
+# ─────────────────────────────────────────────────────────────────────
+#
+# The outreach daemon's monitor_phase needs to call the same logic that
+# delimit_sensor_github_issue (MCP tool) runs, but synchronously and
+# without the _with_next_steps wrapping. Before this extraction the
+# daemon tried to import the impl from two paths that don't exist —
+# `ai.governance._sensor_github_issue_impl` and
+# `backends.governance_bridge.sensor_github_issue` — and silently fell
+# back to "monitor skipped" on every tick, leaving the entire reply-
+# tracking cycle dead.
+#
+# Now both callers share this function. The MCP tool wraps the result
+# with `_with_next_steps`; the daemon consumes the raw dict.
+_NEGATIVE_KEYWORDS = (
+    "not interested", "won't be", "will not", "don't need", "do not need",
+    "no thanks", "pass on", "not a fit", "not for us", "closing",
+    "won't adopt", "will not adopt", "reject", "declined",
+)
+_REPO_FORMAT_RE = re.compile(r"^[\w.-]+/[\w.-]+$")
+# Module-local guard so the warning fires at most once per process.
+_REPO_ALLOWLIST_WARNED = False
+def _check_repo_allowlist(repo: str) -> Optional[Dict[str, Any]]:
+    """Return a refusal dict if the repo isn't in DELIMIT_ALLOWED_REPOS.
+    Duplicates the logic of ai.server._check_repo_allowlist intentionally:
+    importing from ai.server would create a circular import (server.py
+    imports from governance). Mirror with care — both copies must stay
+    in sync until LED-216 splits the allowlist into its own module.
+    """
+    global _REPO_ALLOWLIST_WARNED
+    allowlist_raw = os.environ.get("DELIMIT_ALLOWED_REPOS", "").strip()
+    if not allowlist_raw:
+        if not _REPO_ALLOWLIST_WARNED:
+            logger.warning(
+                "DELIMIT_ALLOWED_REPOS unset — sensor_github_issue calls "
+                "pass through to gh api using the caller's token."
+            )
+            _REPO_ALLOWLIST_WARNED = True
+        return None
+    allowed = {entry.strip().lower() for entry in allowlist_raw.split(",") if entry.strip()}
+    if (repo or "").strip().lower() not in allowed:
+        return {
+            "error": "repo_not_allowlisted",
+            "repo": repo,
+            "allowed": sorted(allowed),
+            "hint": (
+                "Repo not in DELIMIT_ALLOWED_REPOS. Add it or use a tool "
+                "that does not reach external APIs."
+            ),
+        }
+    return None
+def _sensor_github_issue_impl(
+    repo: str,
+    issue_number: int,
+    since_comment_id: int = 0,
+) -> Dict[str, Any]:
+    """Sync implementation of the sensor_github_issue MCP tool.
+    Returns the RAW result dict (no _with_next_steps wrapping). Callers
+    that want the MCP wrapping apply it themselves. Returns
+    ``{"error": ..., "has_new_activity": False}`` on any failure mode
+    rather than raising — the outreach daemon's monitor loop relies on
+    fail-soft behavior so one bad LED doesn't kill the whole tick.
+    Result schema (success path):
+      {
+        "repo": str, "issue_number": str,
+        "signal": {id, venture, metric, source, timestamp, severity},
+        "issue_state": "open" | "closed" | "unknown",
+        "new_comments": [{id, author, created_at, body}, ...],
+        "latest_comment_id": int,
+        "total_comments": int,
+        "has_new_activity": bool,
+      }
+    """
+    # Validate inputs — defense-in-depth even though subprocess.run with
+    # list argv (no shell=True) makes classic injection inert.
+    if not _REPO_FORMAT_RE.match(repo or ""):
+        return {"error": f"Invalid repo format: {repo!r}. Use owner/repo.",
+                "has_new_activity": False}
+    if ".." in repo:
+        return {"error": "Invalid repo: path traversal sequences not allowed",
+                "has_new_activity": False}
+    if not isinstance(issue_number, int) or issue_number <= 0:
+        return {"error": f"Invalid issue number: {issue_number}",
+                "has_new_activity": False}
+    refusal = _check_repo_allowlist(repo)
+    if refusal is not None:
+        refusal.setdefault("has_new_activity", False)
+        return refusal
+    try:
+        # Fetch comments
+        comments_jq = (
+            "[.[] | {id: .id, author: .user.login, "
+            "created_at: .created_at, body: (.body | .[0:500])}]"
+        )
+        comments_proc = subprocess.run(
+            ["gh", "api",
+             f"repos/{repo}/issues/{issue_number}/comments",
+             "--jq", comments_jq],
+            capture_output=True, text=True, timeout=30,
+        )
+        if comments_proc.returncode != 0:
+            return {
+                "error": f"gh api comments failed: {(comments_proc.stderr or '').strip()[:200]}",
+                "has_new_activity": False,
+            }
+        all_comments = json.loads(comments_proc.stdout) if comments_proc.stdout.strip() else []
+        new_comments = [c for c in all_comments if c.get("id", 0) > since_comment_id]
+        # Fetch issue state
+        issue_jq = "{state: .state, labels: [.labels[].name], reactions: .reactions.total_count}"
+        issue_proc = subprocess.run(
+            ["gh", "api",
+             f"repos/{repo}/issues/{issue_number}",
+             "--jq", issue_jq],
+            capture_output=True, text=True, timeout=30,
+        )
+        if issue_proc.returncode != 0:
+            return {
+                "error": f"gh api issue failed: {(issue_proc.stderr or '').strip()[:200]}",
+                "has_new_activity": False,
+            }
+        issue_info = json.loads(issue_proc.stdout) if issue_proc.stdout.strip() else {}
+        issue_state = issue_info.get("state", "unknown")
+        # Severity classification — green default; amber on closed; red on
+        # negative keyword in any new comment body.
+        severity = "green"
+        combined_body = " ".join(c.get("body", "") or "" for c in new_comments).lower()
+        has_negative = any(kw in combined_body for kw in _NEGATIVE_KEYWORDS)
+        if has_negative:
+            severity = "red"
+        elif issue_state == "closed":
+            severity = "amber"
+        latest_comment_id = max((c.get("id", 0) for c in all_comments), default=since_comment_id)
+        repo_key = repo.replace("/", "_")
+        return {
+            "repo": repo,
+            "issue_number": str(issue_number),
+            "signal": {
+                "id": f"sensor:github_issue:{repo_key}:{issue_number}",
+                "venture": "delimit",
+                "metric": "outreach_issue_activity",
+                "source": f"https://github.com/{repo}/issues/{issue_number}",
+                "timestamp": datetime.now(timezone.utc).isoformat(),
+                "severity": severity,
+            },
+            "issue_state": issue_state,
+            "new_comments": new_comments,
+            "latest_comment_id": latest_comment_id,
+            "total_comments": len(all_comments),
+            "has_new_activity": len(new_comments) > 0,
+        }
+    except subprocess.TimeoutExpired:
+        return {"error": "gh command timed out after 30s",
+                "has_new_activity": False}
+    except json.JSONDecodeError as exc:
+        return {"error": f"Failed to parse gh output: {exc}",
+                "has_new_activity": False}
+    except Exception as exc:  # noqa: BLE001 — sensor must fail soft
+        logger.error("sensor_github_issue impl error: %s", exc)
+        return {"error": str(exc), "has_new_activity": False}
 def _deep_get(d: Dict, key: str) -> Any:
     """Get a value from a dict, supporting nested keys with dots."""
     if "." in key:

package/gateway/ai/heartbeat.py ADDED Viewed

@@ -0,0 +1,290 @@
+"""Heartbeat liveness framework — Phase 1 local file-based (LED-1412).
+Solves the silent-staleness class that the 2026-05-15 session exposed:
+delimit-reddit-proxy.service was inactive/disabled for 13 days, all
+reddit scans failed silently with 429/403, and the founder noticed via
+"3 day old posts" — not the system. There was no central liveness
+reporting and no alert.
+Phase 1 (this module): every scheduled task writes a heartbeat file
+when it runs. A central check tool walks the heartbeat directory and
+flags anything stale. Local-only — Codex's correct caveat that
+heartbeats can't catch a full-host outage motivates Phase 2 (external
+deadman ping, tracked separately as LED-1414).
+Heartbeat file format — one per service at ~/.delimit/heartbeats/<service>.json:
+{
+  "service": "delimit-reddit-proxy",
+  "last_run": "2026-05-15T14:23:51Z",
+  "last_success": "2026-05-15T14:23:51Z",  # may differ from last_run on partial failure
+  "status": "ok" | "degraded" | "failed",
+  "next_expected": "2026-05-15T15:23:51Z",
+  "detail": "string — optional one-line context for status != ok"
+}
+Memory anchor: feedback_corrupted_worktree_phantom_failures.md (sister
+failure class — both surface as "system reports stale data because no-one
+checks freshness").
+"""
+from __future__ import annotations
+import json
+import os
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+# All heartbeats live under one directory. Override via env for tests.
+DEFAULT_HEARTBEAT_DIR = Path.home() / ".delimit" / "heartbeats"
+# Per-service staleness thresholds (seconds). Overridable via config file
+# at ~/.delimit/heartbeats/_thresholds.json. Service names match the
+# `service` key written by write_heartbeat().
+DEFAULT_STALENESS_THRESHOLDS: Dict[str, int] = {
+    # Reddit scanner: hourly social loop. >2 hours = stale.
+    "delimit-reddit-proxy": 7200,
+    "delimit-social-loop": 7200,
+    # Inbox daemon: 5-min poll. >30 min = stale.
+    "delimit-inbox": 1800,
+    # License watch: daily timer. >36 hours = stale.
+    "delimit-license-watch": 129600,
+    # Drift check: daily. >36 hours = stale.
+    "delimit-drift-check": 129600,
+    # stake.one INJ-claim: daily 13:00 UTC. >30 hours = stale.
+    "stakeone-inj-claim": 108000,
+}
+# Fallback for services not in the threshold map.
+DEFAULT_FALLBACK_STALENESS = 86400  # 24 hours
+def _heartbeat_dir(override: Optional[str] = None) -> Path:
+    """Resolve the heartbeat directory. Honors:
+    - explicit override arg
+    - DELIMIT_HEARTBEAT_DIR env var
+    - default ~/.delimit/heartbeats/
+    """
+    if override:
+        return Path(override)
+    env = os.environ.get("DELIMIT_HEARTBEAT_DIR")
+    if env:
+        return Path(env)
+    return DEFAULT_HEARTBEAT_DIR
+def _now_iso() -> str:
+    """Current UTC time as ISO 8601 with Z suffix (matches existing
+    delimit timestamp convention)."""
+    return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+def _parse_iso(ts: str) -> Optional[float]:
+    """Parse an ISO 8601 timestamp to a unix epoch float. Returns None
+    on parse failure — callers treat None as 'unknown' (degraded but
+    not actionable)."""
+    if not ts:
+        return None
+    try:
+        # %Y-%m-%dT%H:%M:%SZ — UTC, no fractional seconds.
+        return time.mktime(time.strptime(ts, "%Y-%m-%dT%H:%M:%SZ")) - time.timezone
+    except (ValueError, TypeError):
+        return None
+def write_heartbeat(
+    service: str,
+    status: str = "ok",
+    next_expected_in: Optional[int] = None,
+    detail: str = "",
+    success: bool = True,
+    heartbeat_dir: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Write a heartbeat for `service`.
+    Called by every scheduled task at the end of its run. On success,
+    pass status='ok' and success=True (default). On partial failure
+    (e.g., one of N subreddits 429'd but most succeeded), pass
+    status='degraded'. On total failure, status='failed' + success=False.
+    Args:
+        service: stable service identifier (e.g., 'delimit-reddit-proxy').
+            Should match the systemd unit name where applicable.
+        status: 'ok' | 'degraded' | 'failed'.
+        next_expected_in: seconds until the next run is expected. Used
+            by check_staleness to compute next_expected timestamp.
+        detail: optional one-line context (printed to operators on stale).
+        success: True if the run achieved its primary purpose (independent
+            of `status` — a successful run can still be 'degraded' if
+            some optional sub-tasks failed). last_success only updates
+            when True.
+        heartbeat_dir: override the heartbeat directory (for tests).
+    Returns:
+        Dict with the written record (also persisted to disk).
+    """
+    target_dir = _heartbeat_dir(heartbeat_dir)
+    target_dir.mkdir(parents=True, exist_ok=True)
+    file_path = target_dir / f"{service}.json"
+    now = _now_iso()
+    next_expected = ""
+    if next_expected_in:
+        next_expected_epoch = time.time() + next_expected_in
+        next_expected = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(next_expected_epoch))
+    # Preserve last_success across runs (only update if this run succeeded).
+    last_success = now if success else ""
+    if not success and file_path.exists():
+        try:
+            prior = json.loads(file_path.read_text())
+            last_success = prior.get("last_success", "")
+        except (json.JSONDecodeError, OSError):
+            pass  # Ignore corrupted prior; treat as no last_success known.
+    record = {
+        "service": service,
+        "last_run": now,
+        "last_success": last_success,
+        "status": status,
+        "next_expected": next_expected,
+        "detail": detail,
+    }
+    file_path.write_text(json.dumps(record, indent=2) + "\n")
+    return record
+def read_heartbeats(heartbeat_dir: Optional[str] = None) -> List[Dict[str, Any]]:
+    """Read every heartbeat file in the directory. Skips files that
+    don't parse as JSON (corrupted heartbeats are reported as a separate
+    'parse_error' entry so the operator sees them)."""
+    target_dir = _heartbeat_dir(heartbeat_dir)
+    if not target_dir.exists():
+        return []
+    out: List[Dict[str, Any]] = []
+    for path in sorted(target_dir.glob("*.json")):
+        # Skip the threshold config file
+        if path.name == "_thresholds.json":
+            continue
+        try:
+            data = json.loads(path.read_text())
+            out.append(data)
+        except (json.JSONDecodeError, OSError) as e:
+            out.append({
+                "service": path.stem,
+                "status": "parse_error",
+                "detail": f"heartbeat file {path.name} unreadable: {type(e).__name__}: {e}",
+                "last_run": "",
+                "last_success": "",
+                "next_expected": "",
+            })
+    return out
+def _load_thresholds(heartbeat_dir: Optional[str] = None) -> Dict[str, int]:
+    """Merge defaults with the optional override at <dir>/_thresholds.json."""
+    thresholds = dict(DEFAULT_STALENESS_THRESHOLDS)
+    target_dir = _heartbeat_dir(heartbeat_dir)
+    override_path = target_dir / "_thresholds.json"
+    if override_path.exists():
+        try:
+            override = json.loads(override_path.read_text())
+            if isinstance(override, dict):
+                thresholds.update({k: int(v) for k, v in override.items() if isinstance(v, (int, float))})
+        except (json.JSONDecodeError, OSError, ValueError):
+            pass
+    return thresholds
+def check_staleness(heartbeat_dir: Optional[str] = None) -> Dict[str, Any]:
+    """Walk all heartbeats and classify each by staleness.
+    Returns:
+        {
+          "checked_at": ISO8601 string,
+          "summary": {"ok": N, "stale": N, "degraded": N, "failed": N, "parse_error": N},
+          "services": [{service, status, last_run, last_success, age_seconds,
+                        threshold_seconds, classification}],
+          "stale_services": [<service names that are stale>],  # convenience for alerts
+        }
+    Classification rules (most-severe-first):
+      - parse_error: heartbeat file unreadable
+      - failed: status='failed' in the record
+      - stale: last_run older than threshold
+      - degraded: status='degraded' in the record
+      - ok: status='ok' AND last_run within threshold
+      - never_seen: heartbeat directory exists but service has no file
+        (only reported when a service is configured in thresholds but
+        has never written a heartbeat — surfaces "scheduled task never
+        ran since heartbeat instrumentation landed")
+    """
+    now = time.time()
+    records = read_heartbeats(heartbeat_dir)
+    thresholds = _load_thresholds(heartbeat_dir)
+    by_service: Dict[str, Dict[str, Any]] = {}
+    for rec in records:
+        service = rec.get("service", "?unknown?")
+        last_run_epoch = _parse_iso(rec.get("last_run", ""))
+        threshold = thresholds.get(service, DEFAULT_FALLBACK_STALENESS)
+        if last_run_epoch is not None:
+            age_seconds = int(now - last_run_epoch)
+        else:
+            age_seconds = -1
+        # Classify (most-severe-first)
+        if rec.get("status") == "parse_error":
+            classification = "parse_error"
+        elif rec.get("status") == "failed":
+            classification = "failed"
+        elif age_seconds < 0:
+            classification = "unknown_age"
+        elif age_seconds > threshold:
+            classification = "stale"
+        elif rec.get("status") == "degraded":
+            classification = "degraded"
+        else:
+            classification = "ok"
+        by_service[service] = {
+            "service": service,
+            "status": rec.get("status", "?"),
+            "last_run": rec.get("last_run", ""),
+            "last_success": rec.get("last_success", ""),
+            "age_seconds": age_seconds,
+            "threshold_seconds": threshold,
+            "classification": classification,
+            "detail": rec.get("detail", ""),
+        }
+    # Add never_seen entries for configured services that have no record
+    for service in thresholds.keys():
+        if service not in by_service:
+            by_service[service] = {
+                "service": service,
+                "status": "never_seen",
+                "last_run": "",
+                "last_success": "",
+                "age_seconds": -1,
+                "threshold_seconds": thresholds[service],
+                "classification": "never_seen",
+                "detail": "no heartbeat file found — service may not be instrumented yet",
+            }
+    services = list(by_service.values())
+    summary = {"ok": 0, "stale": 0, "degraded": 0, "failed": 0, "parse_error": 0,
+               "never_seen": 0, "unknown_age": 0}
+    stale_services = []
+    for svc in services:
+        c = svc["classification"]
+        summary[c] = summary.get(c, 0) + 1
+        if c in ("stale", "failed", "parse_error", "never_seen"):
+            stale_services.append(svc["service"])
+    return {
+        "checked_at": _now_iso(),
+        "summary": summary,
+        "services": services,
+        "stale_services": stale_services,
+    }