npm - delimit-cli - Versions diffs - 4.5.13 → 4.6.1 - Mend

delimit-cli 4.5.13 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/CHANGELOG.md +48 -0
package/README.md +9 -8
package/bin/delimit-cli.js +179 -4
package/bin/delimit-setup.js +46 -6
package/gateway/ai/_compile_status.py +154 -0
package/gateway/ai/agent_dispatch.py +41 -0
package/gateway/ai/backends/git_health.py +175 -0
package/gateway/ai/backends/tools_infra.py +163 -10
package/gateway/ai/cli_contract.py +185 -0
package/gateway/ai/daemon.py +10 -0
package/gateway/ai/daily_digest.py +1 -2
package/gateway/ai/delimit_daemon.py +67 -0
package/gateway/ai/dispatch_gate.py +399 -0
package/gateway/ai/governance.py +181 -0
package/gateway/ai/heartbeat.py +290 -0
package/gateway/ai/hot_reload.py +1 -2
package/gateway/ai/led193_daemon/executor.py +9 -0
package/gateway/ai/ledger_manager.py +90 -4
package/gateway/ai/ledger_proof.py +127 -0
package/gateway/ai/license.py +132 -47
package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
package/gateway/ai/license_core.pyi +1 -1
package/gateway/ai/notify.py +39 -0
package/gateway/ai/outreach_loop_daemon.py +349 -0
package/gateway/ai/outreach_substantive.py +1437 -0
package/gateway/ai/pro_tools.yaml +167 -0
package/gateway/ai/reaper.py +70 -0
package/gateway/ai/reddit_scanner.py +17 -6
package/gateway/ai/sensing/schema.py +1 -1
package/gateway/ai/sensing/signal_store.py +0 -1
package/gateway/ai/server.py +5490 -1602
package/gateway/ai/social_capability/fit_floor.py +114 -12
package/gateway/ai/social_queue.py +166 -10
package/gateway/ai/tdqs_lint.py +611 -0
package/gateway/ai/tenant_auth.py +329 -0
package/gateway/ai/tenant_data.py +339 -0
package/gateway/ai/tenant_paths.py +150 -0
package/gateway/ai/usage_allowlist.py +198 -0
package/gateway/ai/workers/base.py +2 -2
package/gateway/ai/workers/executor.py +32 -3
package/gateway/ai/workers/outreach_drafter.py +0 -1
package/gateway/ai/workers/pr_drafter.py +0 -1
package/gateway/ai/x_ranker.py +12 -2
package/gateway/core/json_schema_diff.py +25 -1
package/lib/auth-signin.js +136 -0
package/lib/auth-signout.js +169 -0
package/lib/delimit-template.js +11 -0
package/lib/migration-2092-banner.js +213 -0
package/package.json +5 -2
package/server.json +4 -4
package/scripts/build-license-core.sh +0 -85
package/scripts/security-check.sh +0 -66
package/scripts/test-license-core-so.sh +0 -107

package/gateway/ai/backends/git_health.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""Git worktree sanity checks (LED-1411).
+Single source of truth for "is this directory a healthy git worktree?"
+Used by delimit_test_smoke, delimit_deploy_plan, and delimit_evidence_collect
+as a precheck before they trust ambient checkout state.
+Background — LED-1403 / LED-1401 incident (2026-05-14):
+`/home/delimit/npm-delimit/.git` was configured `bare = true` but had source
+files alongside, AND a stranded sibling worktree at `/tmp/delimit-mcp-main`
+where `git status` showed every file as both `D` and `??` (deleted from
+index, untracked on disk). `delimit_test_smoke` ran against this corrupt
+state and reported `attest-mcp Q2 3-tier exit codes` failures that did NOT
+exist on real main. I almost shipped a "fix" for a non-bug (LED-1403,
+closed `not_reproducible` after a fresh clone proved tests passed).
+This module exists so the same class of phantom failure can't recur.
+Precheck must:
+  - Add <100ms to caller startup (no network, no fetch)
+  - Emit a single actionable remediation line on failure
+  - Return a structured dict (callers may inline-handle or surface up)
+Memory anchor: feedback_corrupted_worktree_phantom_failures.md
+"""
+from __future__ import annotations
+import subprocess
+from pathlib import Path
+from typing import Any, Dict
+def _run(cmd: list, cwd: str, timeout: float = 2.0) -> str:
+    """Run a git command with a tight timeout. Returns stdout stripped,
+    or empty string on any failure (intentional — caller decides what
+    constitutes a failure based on the structured result, not exceptions)."""
+    try:
+        return subprocess.check_output(
+            cmd,
+            cwd=cwd,
+            stderr=subprocess.DEVNULL,
+            timeout=timeout,
+            text=True,
+        ).strip()
+    except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError, OSError):
+        return ""
+def check_worktree_sanity(repo_path: str) -> Dict[str, Any]:
+    """Verify the directory at `repo_path` is a healthy git worktree.
+    Checks (in order; cheapest first):
+      1. Path exists and contains a `.git` directory (or file pointing to one)
+      2. `git rev-parse --is-inside-work-tree` returns `true`
+      3. `git rev-parse --is-bare-repository` returns `false`
+      4. `git worktree list` includes the resolved CWD
+      5. `git status --porcelain=v1` does NOT show every file as BOTH
+         deleted-from-index AND untracked (the LED-1401 corruption signature)
+    Returns a dict with:
+      - ok: bool — overall health
+      - reason: str — short failure code (`not_a_repo`, `bare_repo_with_files`,
+        `stranded_worktree`, `corrupt_status`) when ok=False, else `healthy`
+      - detail: str — actionable remediation message
+      - path: str — the path that was checked
+    Non-raising: errors return ok=False with a structured reason, so callers
+    can decide whether to halt or warn.
+    """
+    p = Path(repo_path)
+    if not p.exists() or not p.is_dir():
+        return {
+            "ok": False,
+            "reason": "not_a_directory",
+            "detail": f"{repo_path} is not a directory.",
+            "path": repo_path,
+        }
+    git_meta = p / ".git"
+    if not git_meta.exists():
+        return {
+            "ok": False,
+            "reason": "not_a_repo",
+            "detail": f"{repo_path} has no .git/ — not a git worktree.",
+            "path": repo_path,
+        }
+    # Bare-repo check first (LED-1401 signature: bare=true + source files
+    # alongside). Checked BEFORE is-inside-work-tree because a bare repo
+    # answers "false" to that question — we want the more informative
+    # bare-repo message to win when both conditions hold.
+    is_bare = _run(["git", "rev-parse", "--is-bare-repository"], cwd=repo_path)
+    if is_bare == "true":
+        return {
+            "ok": False,
+            "reason": "bare_repo_with_files",
+            "detail": (
+                f"{repo_path}/.git/ has `core.bare = true` but the directory "
+                f"holds source files. Tests against this state run stale "
+                f"code. Re-clone fresh: `git clone <url> /tmp/<repo>-fresh "
+                f"&& cd /tmp/<repo>-fresh`"
+            ),
+            "path": repo_path,
+        }
+    # Inside-work-tree check
+    inside = _run(["git", "rev-parse", "--is-inside-work-tree"], cwd=repo_path)
+    if inside != "true":
+        return {
+            "ok": False,
+            "reason": "not_a_worktree",
+            "detail": (
+                f"{repo_path} is not inside a git work tree "
+                f"(rev-parse --is-inside-work-tree returned {inside!r}). "
+                f"Re-clone fresh: `git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
+            ),
+            "path": repo_path,
+        }
+    # Worktree-list membership check (catches stranded sibling worktrees)
+    worktrees = _run(["git", "worktree", "list", "--porcelain"], cwd=repo_path)
+    resolved = str(p.resolve())
+    if worktrees and resolved not in worktrees:
+        # The current directory isn't a registered worktree of its own
+        # .git/ — likely a stale checkout that was wiped+repopulated outside
+        # git's awareness. This is the LED-1401 stranded-sibling signature.
+        return {
+            "ok": False,
+            "reason": "stranded_worktree",
+            "detail": (
+                f"{resolved} is not a registered worktree of its own .git/. "
+                f"Run `git worktree list` to inspect; re-clone fresh if "
+                f"orphaned: `git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
+            ),
+            "path": repo_path,
+            "worktree_list": worktrees,
+        }
+    # LED-1401 corrupt-status signature: every file appears as BOTH `D` and `??`
+    # (deleted from index, untracked on disk). Sample the first 50 status lines
+    # — if >=10 distinct paths show this pattern, it's pathological.
+    status = _run(["git", "status", "--porcelain=v1"], cwd=repo_path, timeout=3.0)
+    if status:
+        lines = status.split("\n")[:200]
+        deleted_paths = set()
+        untracked_paths = set()
+        for line in lines:
+            if len(line) < 4:
+                continue
+            xy = line[:2]
+            path = line[3:].lstrip()
+            if "D" in xy:
+                deleted_paths.add(path)
+            if xy == "??":
+                untracked_paths.add(path)
+        overlap = deleted_paths & untracked_paths
+        if len(overlap) >= 10:
+            return {
+                "ok": False,
+                "reason": "corrupt_status",
+                "detail": (
+                    f"{repo_path} shows >={len(overlap)} files as both deleted-from-index "
+                    f"AND untracked-on-disk — the worktree was wiped and repopulated "
+                    f"outside git's awareness (LED-1401 signature). Re-clone fresh: "
+                    f"`git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
+                ),
+                "path": repo_path,
+                "overlap_count": len(overlap),
+            }
+    return {
+        "ok": True,
+        "reason": "healthy",
+        "detail": "git worktree is healthy",
+        "path": repo_path,
+    }

package/gateway/ai/backends/tools_infra.py CHANGED Viewed

@@ -64,12 +64,29 @@ _CREDENTIAL_FALSE_POSITIVES = re.compile(
     r"sk-ant-demo|sk-demo|AIza-demo|xai-demo|demo[_-]?(?:key|secret|token)|"
     r"-demo['\"]|"
     # Function-call RHS (reading from parsed JSON, env, getters, slicing strings)
-    r"json\.loads|\.read_text\(|\.slice\(|"
+    r"json\.loads|\.read_text\(|\.slice\(|\.split\(|"
     r"\w+\.get\(|token\s*=\s*_make_token|"
     # RHS that is a parameter reference like token=tokens.get("access_token"...
     r"=\s*\w+\.get\(|"
     # Dict index dereference: token_data["token"], result["secret"], etc.
     r"_data\[|_result\[|"
+    # LED-1278 (b): function-call RHS with leading underscore (e.g. _load_token())
+    r"=\s*_\w+\(|"
+    # LED-1278 (c) [2026-05-22]: naked function-call RHS without leading
+    # underscore. Matches the common shape `const token = readCurrentToken();`
+    # in bin/delimit-cli.js — the token is being READ from somewhere, not
+    # hardcoded. Tightened with `\s*;?\s*$` to require end-of-statement so
+    # we don't suppress `token = realLeak("AKIAIOSFODNN7EXAMPLE")` shapes
+    # where the call argument is itself a literal secret.
+    r"=\s*\w+\([^)]{0,40}\)\s*;?\s*$|"
+    # LED-1278 (c) [2026-05-22]: parenthesized property-access fallback chain
+    # like `const token = (options.token || process.env.TOKEN)`. Common shape
+    # for CLI option parsing where the RHS reads from a known input source,
+    # never a literal. Requires the open-paren to be followed by a word + dot
+    # (property access) so we don't match `token = ("AKIA..." || "")` shapes.
+    r"=\s*\(\s*\w+\.\w+|"
+    # LED-1278 (b): documentation/example placeholders in angle brackets
+    r"<[^>]*?(?:long|same|random|your|placeholder|example|secret|token|key)[^>]*?>|"
     # Bare `if not <var>:` and similar control-flow lines that mention
     # the credential variable name but contain no value.
     r"if\s+not\s+\w+:|"
@@ -98,6 +115,82 @@ SCAN_EXTENSIONS = {".py", ".js", ".ts", ".jsx", ".tsx", ".go", ".rb", ".java", "
 # Skip directories
 SKIP_DIRS = {"node_modules", ".git", "__pycache__", ".venv", "venv", ".tox", "dist", "build", ".next", ".nuxt", "vendor"}
+# LED-1278 (a): test-tree path patterns excluded by default. The scanner walks  # nosec
+# test directories with prod rules, so test fixtures (placeholder tokens,  # nosec
+# trivial JWT bodies, code-injection demos) get surfaced as critical findings  # nosec
+# on every audit. Default behavior now skips these; callers can pass  # nosec
+# include_tests=True to scan everything.  # nosec
+TEST_PATH_PATTERNS = (
+    re.compile(r"(?:^|[\\/])tests?[\\/]"),         # tests/ or test/ as a path component
+    re.compile(r"(?:^|[\\/])__tests__[\\/]"),      # JS __tests__/
+    re.compile(r"(?:^|[\\/])spec[\\/]"),           # spec/
+    re.compile(r"(?:^|[\\/])fixtures?[\\/]"),      # fixtures/ or fixture/
+    re.compile(r"(?:^|[\\/])test_[^\\/]+\.py$"),   # test_*.py
+    re.compile(r"_test\.(?:py|go|rb|java)$"),       # *_test.py / *_test.go
+    re.compile(r"\.(?:test|spec)\.(?:js|jsx|ts|tsx|mjs|cjs)$"),  # *.test.js, *.spec.tsx
+)
+def _is_test_path(path: str) -> bool:
+    """Return True if path looks like a test file/dir per TEST_PATH_PATTERNS."""
+    s = str(path)
+    return any(pat.search(s) for pat in TEST_PATH_PATTERNS)
+# LED-1278 (b): well-known dummy / fixture values. Even when include_tests=True
+# (or when production code intentionally embeds canonical placeholders in
+# docs/examples), these specific shapes should be suppressed as `info` log
+# lines, not raised as critical findings.
+#
+# Each entry: (regex applied to the matched secret text, human label).
+KNOWN_DUMMY_PATTERNS = [
+    # AWS canonical dummy from official AWS documentation.
+    (re.compile(r"AKIAIOSFODNN7EXAMPLE"), "aws_doc_dummy"),
+    # GitHub token placeholders that use the printable-alphabet pattern.
+    (re.compile(r"^gh[pousr]_ABCDEFGHIJKLMNOPQRSTUVWXYZ", re.IGNORECASE), "github_alphabet_dummy"),
+    # Slack tokens with the leading 1234567890 sequence.
+    (re.compile(r"^xox[baprs]-1234567890-"), "slack_seq_dummy"),
+    # JWT with the unsigned-HS256 header + trivial body. We match the literal
+    # eyJhbGciOiJIUzI1NiJ9 header and check the payload separately below.
+    (re.compile(r"^eyJhbGciOiJIUzI1NiJ9\."), "jwt_hs256_trivial"),
+    # Generic dict-credential placeholder values: fake/test/dummy/example/etc.
+    (re.compile(r"['\"](?:fake|test|dummy|example|placeholder|stale|from-)[A-Za-z0-9_\-]*['\"]\s*$", re.IGNORECASE),
+     "generic_placeholder_value"),
+    # Provider test-key shapes: xai-key-123, google-key-7, claude-key-2 etc.
+    (re.compile(r"['\"](?:xai|google|claude|gem|grok|codex|ollama)[-_]?key[-_]?\d+['\"]\s*$", re.IGNORECASE),
+     "provider_test_key"),
+]
+def _looks_like_known_dummy(secret_name: str, matched_text: str) -> Optional[str]:
+    """Return a label if matched_text is a known-dummy/fixture value, else None.
+    Used by the secret scanner to convert what would otherwise be a critical
+    finding into an `info`-level suppressed entry. Keeps the audit-trail
+    visible (so a future regression in the allowlist is detectable) while
+    eliminating the false-positive-storm noise.
+    For JWT, additionally checks that the body is the trivial `sub:1234567890`
+    payload — we don't want to suppress real signed JWTs that happen to use
+    HS256.
+    """
+    for pattern, label in KNOWN_DUMMY_PATTERNS:
+        if pattern.search(matched_text):
+            if label == "jwt_hs256_trivial":
+                # Only treat as dummy if the payload is the canonical demo
+                # body (`sub: "1234567890"` or trivial abc123 segment).
+                # The JWT pattern produces something like:
+                #   eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123def456ghi789
+                # The middle segment base64-decodes to {"sub":"1234567890"}.
+                if (
+                    "eyJzdWIiOiIxMjM0NTY3ODkwIn0" in matched_text
+                    or re.search(r"\.[A-Za-z0-9_-]*abc123[A-Za-z0-9_-]*$", matched_text)
+                ):
+                    return label
+                continue
+            return label
+    return None
 def _run_cmd(cmd: List[str], timeout: int = 30, cwd: Optional[str] = None) -> Dict[str, Any]:
     """Run a command and return stdout, stderr, returncode.
@@ -144,8 +237,13 @@ def _bump_semver(version: str, bump: str) -> str:
     return f"{major}.{minor}.{patch}"
-def _scan_files(target: str) -> List[Path]:
-    """Collect scannable source files under target."""
+def _scan_files(target: str, include_tests: bool = False) -> List[Path]:
+    """Collect scannable source files under target.
+    LED-1278 (a): when include_tests=False (the new default), skip files that
+    match TEST_PATH_PATTERNS so test fixtures do not surface as findings.
+    Single-file targets are always scanned regardless (caller asked explicitly).
+    """
     root = Path(target).resolve()
     files = []
     if root.is_file():
@@ -154,10 +252,25 @@ def _scan_files(target: str) -> List[Path]:
         return []
     for dirpath, dirnames, filenames in os.walk(root, onerror=lambda _err: None):
         dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
+        if not include_tests:
+            # Prune obvious test directory names before recursing so we don't
+            # walk huge __tests__/ trees just to discard them later.
+            dirnames[:] = [
+                d for d in dirnames
+                if d not in ("tests", "test", "__tests__", "spec", "fixtures", "fixture")
+            ]
         for filename in filenames:
             p = Path(dirpath) / filename
-            if p.suffix in SCAN_EXTENSIONS:
-                files.append(p)
+            if p.suffix not in SCAN_EXTENSIONS:
+                continue
+            if not include_tests:
+                try:
+                    rel = str(p.relative_to(root))
+                except ValueError:
+                    rel = str(p)
+                if _is_test_path(rel):
+                    continue
+            files.append(p)
             # Cap to avoid scanning massive repos
             if len(files) >= 5000:
                 return files
@@ -166,11 +279,26 @@ def _scan_files(target: str) -> List[Path]:
 # ─── 5. security_audit ──────────────────────────────────────────────────
-def security_audit(target: str = ".") -> Dict[str, Any]:
+def security_audit(target: str = ".", include_tests: bool = False) -> Dict[str, Any]:
     """Audit security: dependency vulnerabilities + anti-patterns + secret detection.
     Default: runs pip-audit/npm-audit, regex scans for secrets and dangerous patterns.
     Optional upgrade: set SNYK_TOKEN or TRIVY_PATH for enhanced scanning.
+    LED-1278 fixes:
+      (a) include_tests defaults to False — test directories (tests/, __tests__/,
+          spec/, fixtures/, *_test.py, *.test.tsx, etc.) are skipped so
+          test fixtures don't get raised as critical production findings.
+          Pass include_tests=True to scan everything (legacy behavior).
+      (b) Well-known dummy/placeholder values (AWS canonical example,
+          alphabet-pattern GitHub tokens, leading-1234567890 Slack tokens,
+          trivial JWT, fake/test/dummy/placeholder dict values, provider
+          test-key shapes) are suppressed and recorded as `info`-severity
+          allowlist hits in `suppressed_findings` for audit visibility.
+    Args:
+        target: Repository or file path to audit.
+        include_tests: When True, scan test directories (default False).
     """
     target_path = Path(target).resolve()
     if not target_path.exists():
@@ -179,6 +307,7 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
     vulnerabilities = []
     anti_patterns_found = []
     secrets_found = []
+    suppressed_findings: List[Dict[str, Any]] = []  # LED-1278 (b): allowlist log
     tools_used = []
     severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
@@ -284,8 +413,10 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
             pass
     # --- 2. Anti-pattern scan ---
-    files = _scan_files(target)
-    tools_used.append(f"pattern-scanner ({len(files)} files)")
+    files = _scan_files(target, include_tests=include_tests)
+    scan_label = f"pattern-scanner ({len(files)} files"
+    scan_label += ", include_tests=True" if include_tests else ", tests excluded"
+    tools_used.append(scan_label + ")")
     for fpath in files:
         try:
@@ -305,6 +436,25 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
                 if secret_name in _FP_FILTERED and _CREDENTIAL_FALSE_POSITIVES.search(matched_text):
                     continue
                 line_num = content[:match.start()].count("\n") + 1
+                # LED-1278 (b): well-known dummy/placeholder values get
+                # suppressed to info-level rather than raised as critical.
+                # Logged in suppressed_findings so a future regression in the
+                # allowlist (e.g. real key matching by accident) is auditable.
+                dummy_label = _looks_like_known_dummy(secret_name, matched_text)
+                if dummy_label:
+                    suppressed_findings.append({
+                        "file": rel,
+                        "line": line_num,
+                        "type": secret_name,
+                        "reason": dummy_label,
+                        "severity": "info",
+                    })
+                    severity_counts["info"] += 1
+                    logger.info(
+                        "security_audit: suppressed known-dummy %s (%s) in %s:%d",
+                        secret_name, dummy_label, rel, line_num,
+                    )
+                    continue
                 # Redact actual secret values in snippet output
                 snippet_raw = content[max(0, match.start() - 10):match.end() + 10].strip()[:80]
                 secrets_found.append({
@@ -358,6 +508,9 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
         "anti_patterns": anti_patterns_found,
         "secrets_detected": len(secrets_found),
         "secrets": secrets_found[:20],  # Cap output to avoid huge responses
+        "suppressed_findings": suppressed_findings[:20],  # LED-1278 (b): allowlist audit log
+        "suppressed_count": len(suppressed_findings),
+        "include_tests": include_tests,  # LED-1278 (a): expose scan scope
         "env_in_git": env_in_git,
         "severity_summary": severity_counts,
         "tools_used": tools_used,
@@ -765,9 +918,9 @@ def release_plan(environment: str = "production", version: str = "", repository:
     # Commits since last tag
     if last_tag:
-        r = _run_cmd(["git", "log", f"{last_tag}..HEAD", "--oneline", "--no-decorate"], cwd=cwd)
+        r = _run_cmd(["git", "log", f"{last_tag}..HEAD", "--format=%s"], cwd=cwd)
     else:
-        r = _run_cmd(["git", "log", "--oneline", "--no-decorate", "-50"], cwd=cwd)
+        r = _run_cmd(["git", "log", "--format=%s", "-50"], cwd=cwd)
     commits = [line.strip() for line in r["stdout"].strip().split("\n") if line.strip()] if r["stdout"].strip() else []
     result["commits_since_last_tag"] = len(commits)
     result["commits"] = commits[:30]  # Cap

package/gateway/ai/cli_contract.py ADDED Viewed

@@ -0,0 +1,185 @@
+"""LED-1415 — CLI subprocess contract.
+The deliberation engine drives 4 model CLIs as subprocesses
+(claude / codex / gemini / cursor) and treats their stdout as model
+verdict text. Three classes of bug have surfaced in this pipeline:
+  1. Banner contamination — the Delimit governance shim leaks ASCII
+     art onto stdout instead of stderr (PR #154, fixed by LED-1428).
+  2. Empty/silent responses — CLI exits 0 but stdout is empty
+     (transient API issues, OOM, network blips). Caught by LED-1416's
+     retry state machine.
+  3. Schema drift — CLI changes its output shape between versions
+     (e.g., adds an auto-correction line at the top). Caught
+     reactively by failing deliberation panels.
+This module holds the ONE contract that every CLI response must
+satisfy + the ONE validator that enforces it. Both the per-CLI mock
+tests (tests/test_cli_contract.py) AND the weekly real-CLI smoke
+script (scripts/smoke_cli_contracts.py) call validate_cli_contract()
+so the contract definition lives in exactly one place — extending
+it doesn't require changing two places to remember.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass, field
+from typing import List, Optional
+# The 4 known CLIs the deliberation engine targets. cursor is included
+# even though it's not yet installed in the dev environment — adding
+# it to the contract surface now means the validator is ready when it
+# lands; smoke skips when the binary isn't present.
+KNOWN_CLI_NAMES = ("claude", "codex", "gemini", "cursor")
+# Minimum scrubbed-response length we'll accept as "looks like a real
+# model verdict" rather than "leftover garbage after banner strip."
+# Calibrated against historical scrub-debug.jsonl entries: every real
+# round-1/round-2 verdict from past deliberations was >= 60 chars;
+# every banner-only contamination was < 30 chars. 30 is the cutoff
+# the production scrubber already uses; keeping that here means the
+# validator + the scrubber agree.
+MIN_VERDICT_LEN = 30
+# Patterns that signal "the response is contamination, not a verdict."
+# Each gets the response REJECTED even if length and scrub passed.
+_CONTAMINATION_MARKERS = (
+    re.compile(r"^\[scrub:\s*contaminated\b", re.IGNORECASE),
+    re.compile(r"^\[.+\bunavailable\b.+\bnot found in PATH\]", re.IGNORECASE),
+    re.compile(r"^\[.+\bskipped under INTERNAL_PYTEST_GUARD", re.IGNORECASE),
+    re.compile(r"^\[.+\btimed out after\b", re.IGNORECASE),
+    re.compile(r"^\[.+\breturned empty response\]", re.IGNORECASE),
+    re.compile(r"^\[.+\berror:.+\]\s*$", re.IGNORECASE),
+)
+# A response should contain at least ONE of these markers to be
+# recognizable as a panel verdict. The deliberation engine prompts all
+# models to end with `VERDICT: ...` so we expect to see it. Falling
+# back: "AGREE" / "DISAGREE" / "REMEDIATE" / "AGREE WITH MODIFICATIONS"
+# all appear in real responses even when the trailing VERDICT line is
+# omitted by a chatty model.
+_VERDICT_HINT_RE = re.compile(
+    r"\b(VERDICT:|AGREE|DISAGREE|REMEDIATE|APPROVE|REJECT)\b",
+    re.IGNORECASE,
+)
+@dataclass
+class CliContractResult:
+    """Outcome of validating one CLI's response.
+    `ok` is True iff every contract clause passed. `failures` is the
+    list of clauses that fired — the smoke script ntfys with this list
+    so the operator can see exactly what shape the regression took.
+    """
+    cli: str
+    raw_len: int
+    scrubbed_len: int
+    ok: bool
+    failures: List[str] = field(default_factory=list)
+    preview: str = ""  # First 200 chars of scrubbed text, for log readability
+def validate_cli_contract(
+    cli_name: str,
+    raw_stdout: str,
+    raw_stderr: str = "",
+    expect_verdict_hint: bool = True,
+) -> CliContractResult:
+    """Apply the per-CLI contract to one subprocess response.
+    Mirrors the EXACT production scrub path so the validator's view
+    matches what ai/deliberation.py's _call_cli sees. Failures append
+    a short reason string; an empty failures list means the response
+    is contract-clean.
+    Args:
+        cli_name: which CLI produced this (claude/codex/gemini/cursor);
+            used in the failure messages.
+        raw_stdout: subprocess.stdout bytes decoded to str.
+        raw_stderr: subprocess.stderr bytes decoded to str. The
+            contract is permissive on stderr — banner output is
+            ALLOWED there (intentional shim behavior); but completely
+            empty stderr + completely empty stdout is suspicious.
+        expect_verdict_hint: when True, fail the response if it
+            doesn't contain at least one verdict marker. Mock tests
+            and the smoke script set this; tests of low-content
+            responses (e.g., a `--version` smoke) set False.
+    Returns:
+        CliContractResult with `ok`, `failures`, and a preview.
+    """
+    # Import lazily so this module can be imported in a context where
+    # ai.deliberation isn't available (e.g., the smoke script when
+    # gateway code path changes).
+    failures: List[str] = []
+    try:
+        from ai.deliberation import _scrub_cli_output
+        scrubbed = _scrub_cli_output(raw_stdout, source=cli_name).strip()
+    except Exception as exc:
+        return CliContractResult(
+            cli=cli_name,
+            raw_len=len(raw_stdout),
+            scrubbed_len=0,
+            ok=False,
+            failures=[f"scrub_failed:{type(exc).__name__}:{str(exc)[:80]}"],
+            preview="",
+        )
+    # 1. Contamination markers — if the scrubber returned one, fail.
+    for pat in _CONTAMINATION_MARKERS:
+        if pat.search(scrubbed):
+            failures.append(f"contamination_marker:{pat.pattern[:40]}")
+            break
+    # 2. Minimum length. Below MIN_VERDICT_LEN is almost certainly
+    # garbage even if scrub didn't tag it.
+    if len(scrubbed) < MIN_VERDICT_LEN and "contamination_marker" not in " ".join(failures):
+        failures.append(f"too_short:{len(scrubbed)}<{MIN_VERDICT_LEN}")
+    # 3. Verdict hint — at least one of VERDICT:/AGREE/DISAGREE/REMEDIATE/
+    # APPROVE/REJECT must appear. Skip when expect_verdict_hint=False.
+    if expect_verdict_hint and not _VERDICT_HINT_RE.search(scrubbed):
+        failures.append("no_verdict_hint")
+    # 4. Doesn't start with a known banner prefix (defense-in-depth on
+    # top of scrub). If a brand-new banner shape lands tomorrow that
+    # the scrubber doesn't know about, this should catch it.
+    if scrubbed.startswith("["):
+        # Bracketed prefix is almost always a tool-emitted status line
+        # (e.g. "[Delimit]" / "[claude error: ...]") not a model verdict.
+        if not any(scrubbed.lower().startswith(p) for p in (
+            "[delimit", "[scrub:", "[claude", "[codex", "[gemini", "[cursor",
+        )):
+            # Unknown bracketed prefix — surface for inspection
+            failures.append(f"unknown_bracketed_prefix:{scrubbed[:40]!r}")
+    return CliContractResult(
+        cli=cli_name,
+        raw_len=len(raw_stdout),
+        scrubbed_len=len(scrubbed),
+        ok=not failures,
+        failures=failures,
+        preview=scrubbed[:200],
+    )
+def format_contract_report(results: List[CliContractResult]) -> str:
+    """Human-readable summary of N validation results for ntfy / logs."""
+    lines = []
+    n_ok = sum(1 for r in results if r.ok)
+    lines.append(f"CLI contract: {n_ok}/{len(results)} clean")
+    for r in results:
+        flag = "OK" if r.ok else "FAIL"
+        lines.append(f"  [{flag}] {r.cli:8s} raw={r.raw_len}B scrubbed={r.scrubbed_len}B")
+        if not r.ok:
+            for f in r.failures:
+                lines.append(f"           ↳ {f}")
+            if r.preview:
+                lines.append(f"           preview: {r.preview[:100]!r}")
+    return "\n".join(lines)

package/gateway/ai/daemon.py CHANGED Viewed

@@ -75,6 +75,7 @@ AUTO_PATTERNS = {
     "test": ["test", "coverage", "smoke"],
     "docs": ["docs", "documentation", "readme"],
     "governance": ["governance", "policy", "compliance"],
+    "build": ["feat", "fix", "task", "implementation"],
 }
@@ -263,6 +264,14 @@ def get_next_automatable_item(
     return None
+def _run_build(item_id: str, venture: str = "") -> dict:
+    """Run the governed build loop for a specific item (LED-1146)."""
+    from ai.loop_engine import run_governed_iteration
+    # Use a persistent session for the daemon
+    session_id = "daemon-build-loop"
+    return run_governed_iteration(session_id=session_id)
 def process_item(item: dict, log_path: Optional[Path] = None) -> dict:
     """Process a single ledger item by running the suggested tool.
@@ -293,6 +302,7 @@ def process_item(item: dict, log_path: Optional[Path] = None) -> dict:
         "test": _run_test,
         "governance": _run_governance,
         "docs": _run_docs,
+        "build": _run_build,
     }
     runner = tool_map.get(tool)

package/gateway/ai/daily_digest.py CHANGED Viewed

@@ -20,11 +20,10 @@ Call via MCP: delimit_digest(action="run") or scheduled cron.
 from __future__ import annotations
 import json
-import time
 from collections import Counter
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict
 DIGEST_DIR = Path.home() / ".delimit" / "digest"
 LEDGER_DIR = Path.home() / ".delimit" / "ledger"