delimit-cli 4.6.0 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -36,14 +36,17 @@ customizations around our managed section).
36
36
  - Documentation refreshes: cross-agent-handoff worked example surfaced on README,
37
37
  test-count badge bumped, misleading version stamps removed.
38
38
 
39
- ### Known issue (pre-existing, fix tracked)
40
-
41
- - **`delimit attest mcp` exit codes** (LED-1403): on tool error (e.g. no
42
- lockfile → npm audit unavailable) and unknown attestation kind, the CLI
43
- currently returns exit 1 instead of the expected exit 2. CI/CD pipelines
44
- that gate on tier-2 (treating "tool unavailable" as a hard error vs.
45
- "fail" which is a soft check) should pin this expectation. Tracked for
46
- fix in a follow-up release.
39
+ ### Known issue (pre-existing, fix tracked) — **RETRACTED 2026-05-15**
40
+
41
+ > ~~**`delimit attest mcp` exit codes** (LED-1403): on tool error (e.g. no
42
+ > lockfile → npm audit unavailable) and unknown attestation kind, the CLI
43
+ > currently returns exit 1 instead of the expected exit 2.~~
44
+ >
45
+ > **Retraction:** the original report was a phantom test failure caused by a
46
+ > corrupted local git worktree (LED-1401), not a real CLI bug. On a clean
47
+ > clone, all 6 `attest-mcp` test suites pass and the CLI returns the correct
48
+ > exit codes (0 pass+skip / 1 fail / 2 error per STR-656). LED-1403 closed
49
+ > `not_reproducible`. No customer action required.
47
50
 
48
51
 
49
52
  ## [4.5.2] - 2026-05-02
@@ -4629,9 +4629,19 @@ program
4629
4629
  const prePushPath = path.join(hooksDir, 'pre-push');
4630
4630
  const marker = '# delimit-governance-hook';
4631
4631
 
4632
- // Resolution order: local node_modules → global PATH → npx fallback.
4633
- // npx is last because it can fail with Arborist 'extraneous' errors
4634
- // when a project's node_modules / lockfile drift (LED-1248).
4632
+ // Resolution order: local node_modules → global PATH →
4633
+ // global node_modules direct npx fallback.
4634
+ //
4635
+ // npx is the LAST resort because on some npm-arborist environments
4636
+ // it crashes with "Cannot read properties of undefined (reading
4637
+ // 'extraneous')" before reaching the CLI (LED-1207, LED-1248). That
4638
+ // failure mode silently breaks the gate and forces --no-verify,
4639
+ // which violates the no-silent-no-verify rule.
4640
+ //
4641
+ // The third tier (`node $(npm root -g)/delimit-cli/bin/delimit-cli.js`)
4642
+ // catches the case where delimit-cli is globally installed but its bin
4643
+ // shim isn't on PATH (npm-installed-but-symlink-missing, fresh CI
4644
+ // containers, etc.) — bypassing npm/npx entirely.
4635
4645
  const preCommitHook = `#!/bin/sh
4636
4646
  ${marker}
4637
4647
  # Delimit API governance gate
@@ -4640,6 +4650,8 @@ if [ -x ./node_modules/.bin/delimit-cli ]; then
4640
4650
  ./node_modules/.bin/delimit-cli check --staged
4641
4651
  elif command -v delimit-cli >/dev/null 2>&1; then
4642
4652
  delimit-cli check --staged
4653
+ elif _delimit_global="$(npm root -g 2>/dev/null)/delimit-cli/bin/delimit-cli.js" && [ -f "$_delimit_global" ]; then
4654
+ node "$_delimit_global" check --staged
4643
4655
  else
4644
4656
  npx delimit-cli check --staged
4645
4657
  fi
@@ -4653,6 +4665,8 @@ if [ -x ./node_modules/.bin/delimit-cli ]; then
4653
4665
  ./node_modules/.bin/delimit-cli check --base origin/main
4654
4666
  elif command -v delimit-cli >/dev/null 2>&1; then
4655
4667
  delimit-cli check --base origin/main
4668
+ elif _delimit_global="$(npm root -g 2>/dev/null)/delimit-cli/bin/delimit-cli.js" && [ -f "$_delimit_global" ]; then
4669
+ node "$_delimit_global" check --base origin/main
4656
4670
  else
4657
4671
  npx delimit-cli check --base origin/main
4658
4672
  fi
@@ -36,6 +36,11 @@ DLQ_AUTO_PAUSE_THRESHOLD = 20
36
36
  TASK_TYPE_ROUTER = {
37
37
  # Outreach and social work — Gemini Flash is fast and cheap
38
38
  "outreach": "gemini",
39
+ # LED-2214b: substantive github outreach gets the same default
40
+ # routing as generic outreach (cheap, fast drafter) but is named
41
+ # distinctly so a regression that resurrects the generic dispatch
42
+ # path does not silently land here.
43
+ "outreach_substantive": "gemini",
39
44
  "social": "gemini",
40
45
  "content": "gemini",
41
46
  "sensor": "gemini",
@@ -0,0 +1,175 @@
1
+ """Git worktree sanity checks (LED-1411).
2
+
3
+ Single source of truth for "is this directory a healthy git worktree?"
4
+ Used by delimit_test_smoke, delimit_deploy_plan, and delimit_evidence_collect
5
+ as a precheck before they trust ambient checkout state.
6
+
7
+ Background — LED-1403 / LED-1401 incident (2026-05-14):
8
+ `/home/delimit/npm-delimit/.git` was configured `bare = true` but had source
9
+ files alongside, AND a stranded sibling worktree at `/tmp/delimit-mcp-main`
10
+ where `git status` showed every file as both `D` and `??` (deleted from
11
+ index, untracked on disk). `delimit_test_smoke` ran against this corrupt
12
+ state and reported `attest-mcp Q2 3-tier exit codes` failures that did NOT
13
+ exist on real main. I almost shipped a "fix" for a non-bug (LED-1403,
14
+ closed `not_reproducible` after a fresh clone proved tests passed).
15
+
16
+ This module exists so the same class of phantom failure can't recur.
17
+ Precheck must:
18
+ - Add <100ms to caller startup (no network, no fetch)
19
+ - Emit a single actionable remediation line on failure
20
+ - Return a structured dict (callers may inline-handle or surface up)
21
+
22
+ Memory anchor: feedback_corrupted_worktree_phantom_failures.md
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import subprocess
28
+ from pathlib import Path
29
+ from typing import Any, Dict
30
+
31
+
32
+ def _run(cmd: list, cwd: str, timeout: float = 2.0) -> str:
33
+ """Run a git command with a tight timeout. Returns stdout stripped,
34
+ or empty string on any failure (intentional — caller decides what
35
+ constitutes a failure based on the structured result, not exceptions)."""
36
+ try:
37
+ return subprocess.check_output(
38
+ cmd,
39
+ cwd=cwd,
40
+ stderr=subprocess.DEVNULL,
41
+ timeout=timeout,
42
+ text=True,
43
+ ).strip()
44
+ except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError, OSError):
45
+ return ""
46
+
47
+
48
+ def check_worktree_sanity(repo_path: str) -> Dict[str, Any]:
49
+ """Verify the directory at `repo_path` is a healthy git worktree.
50
+
51
+ Checks (in order; cheapest first):
52
+ 1. Path exists and contains a `.git` directory (or file pointing to one)
53
+ 2. `git rev-parse --is-inside-work-tree` returns `true`
54
+ 3. `git rev-parse --is-bare-repository` returns `false`
55
+ 4. `git worktree list` includes the resolved CWD
56
+ 5. `git status --porcelain=v1` does NOT show every file as BOTH
57
+ deleted-from-index AND untracked (the LED-1401 corruption signature)
58
+
59
+ Returns a dict with:
60
+ - ok: bool — overall health
61
+ - reason: str — short failure code (`not_a_repo`, `bare_repo_with_files`,
62
+ `stranded_worktree`, `corrupt_status`) when ok=False, else `healthy`
63
+ - detail: str — actionable remediation message
64
+ - path: str — the path that was checked
65
+
66
+ Non-raising: errors return ok=False with a structured reason, so callers
67
+ can decide whether to halt or warn.
68
+ """
69
+ p = Path(repo_path)
70
+ if not p.exists() or not p.is_dir():
71
+ return {
72
+ "ok": False,
73
+ "reason": "not_a_directory",
74
+ "detail": f"{repo_path} is not a directory.",
75
+ "path": repo_path,
76
+ }
77
+
78
+ git_meta = p / ".git"
79
+ if not git_meta.exists():
80
+ return {
81
+ "ok": False,
82
+ "reason": "not_a_repo",
83
+ "detail": f"{repo_path} has no .git/ — not a git worktree.",
84
+ "path": repo_path,
85
+ }
86
+
87
+ # Bare-repo check first (LED-1401 signature: bare=true + source files
88
+ # alongside). Checked BEFORE is-inside-work-tree because a bare repo
89
+ # answers "false" to that question — we want the more informative
90
+ # bare-repo message to win when both conditions hold.
91
+ is_bare = _run(["git", "rev-parse", "--is-bare-repository"], cwd=repo_path)
92
+ if is_bare == "true":
93
+ return {
94
+ "ok": False,
95
+ "reason": "bare_repo_with_files",
96
+ "detail": (
97
+ f"{repo_path}/.git/ has `core.bare = true` but the directory "
98
+ f"holds source files. Tests against this state run stale "
99
+ f"code. Re-clone fresh: `git clone <url> /tmp/<repo>-fresh "
100
+ f"&& cd /tmp/<repo>-fresh`"
101
+ ),
102
+ "path": repo_path,
103
+ }
104
+
105
+ # Inside-work-tree check
106
+ inside = _run(["git", "rev-parse", "--is-inside-work-tree"], cwd=repo_path)
107
+ if inside != "true":
108
+ return {
109
+ "ok": False,
110
+ "reason": "not_a_worktree",
111
+ "detail": (
112
+ f"{repo_path} is not inside a git work tree "
113
+ f"(rev-parse --is-inside-work-tree returned {inside!r}). "
114
+ f"Re-clone fresh: `git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
115
+ ),
116
+ "path": repo_path,
117
+ }
118
+
119
+ # Worktree-list membership check (catches stranded sibling worktrees)
120
+ worktrees = _run(["git", "worktree", "list", "--porcelain"], cwd=repo_path)
121
+ resolved = str(p.resolve())
122
+ if worktrees and resolved not in worktrees:
123
+ # The current directory isn't a registered worktree of its own
124
+ # .git/ — likely a stale checkout that was wiped+repopulated outside
125
+ # git's awareness. This is the LED-1401 stranded-sibling signature.
126
+ return {
127
+ "ok": False,
128
+ "reason": "stranded_worktree",
129
+ "detail": (
130
+ f"{resolved} is not a registered worktree of its own .git/. "
131
+ f"Run `git worktree list` to inspect; re-clone fresh if "
132
+ f"orphaned: `git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
133
+ ),
134
+ "path": repo_path,
135
+ "worktree_list": worktrees,
136
+ }
137
+
138
+ # LED-1401 corrupt-status signature: every file appears as BOTH `D` and `??`
139
+ # (deleted from index, untracked on disk). Sample the first 50 status lines
140
+ # — if >=10 distinct paths show this pattern, it's pathological.
141
+ status = _run(["git", "status", "--porcelain=v1"], cwd=repo_path, timeout=3.0)
142
+ if status:
143
+ lines = status.split("\n")[:200]
144
+ deleted_paths = set()
145
+ untracked_paths = set()
146
+ for line in lines:
147
+ if len(line) < 4:
148
+ continue
149
+ xy = line[:2]
150
+ path = line[3:].lstrip()
151
+ if "D" in xy:
152
+ deleted_paths.add(path)
153
+ if xy == "??":
154
+ untracked_paths.add(path)
155
+ overlap = deleted_paths & untracked_paths
156
+ if len(overlap) >= 10:
157
+ return {
158
+ "ok": False,
159
+ "reason": "corrupt_status",
160
+ "detail": (
161
+ f"{repo_path} shows >={len(overlap)} files as both deleted-from-index "
162
+ f"AND untracked-on-disk — the worktree was wiped and repopulated "
163
+ f"outside git's awareness (LED-1401 signature). Re-clone fresh: "
164
+ f"`git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
165
+ ),
166
+ "path": repo_path,
167
+ "overlap_count": len(overlap),
168
+ }
169
+
170
+ return {
171
+ "ok": True,
172
+ "reason": "healthy",
173
+ "detail": "git worktree is healthy",
174
+ "path": repo_path,
175
+ }
@@ -72,6 +72,19 @@ _CREDENTIAL_FALSE_POSITIVES = re.compile(
72
72
  r"_data\[|_result\[|"
73
73
  # LED-1278 (b): function-call RHS with leading underscore (e.g. _load_token())
74
74
  r"=\s*_\w+\(|"
75
+ # LED-1278 (c) [2026-05-22]: naked function-call RHS without leading
76
+ # underscore. Matches the common shape `const token = readCurrentToken();`
77
+ # in bin/delimit-cli.js — the token is being READ from somewhere, not
78
+ # hardcoded. Tightened with `\s*;?\s*$` to require end-of-statement so
79
+ # we don't suppress `token = realLeak("AKIAIOSFODNN7EXAMPLE")` shapes
80
+ # where the call argument is itself a literal secret.
81
+ r"=\s*\w+\([^)]{0,40}\)\s*;?\s*$|"
82
+ # LED-1278 (c) [2026-05-22]: parenthesized property-access fallback chain
83
+ # like `const token = (options.token || process.env.TOKEN)`. Common shape
84
+ # for CLI option parsing where the RHS reads from a known input source,
85
+ # never a literal. Requires the open-paren to be followed by a word + dot
86
+ # (property access) so we don't match `token = ("AKIA..." || "")` shapes.
87
+ r"=\s*\(\s*\w+\.\w+|"
75
88
  # LED-1278 (b): documentation/example placeholders in angle brackets
76
89
  r"<[^>]*?(?:long|same|random|your|placeholder|example|secret|token|key)[^>]*?>|"
77
90
  # Bare `if not <var>:` and similar control-flow lines that mention
@@ -0,0 +1,185 @@
1
+ """LED-1415 — CLI subprocess contract.
2
+
3
+ The deliberation engine drives 4 model CLIs as subprocesses
4
+ (claude / codex / gemini / cursor) and treats their stdout as model
5
+ verdict text. Three classes of bug have surfaced in this pipeline:
6
+
7
+ 1. Banner contamination — the Delimit governance shim leaks ASCII
8
+ art onto stdout instead of stderr (PR #154, fixed by LED-1428).
9
+ 2. Empty/silent responses — CLI exits 0 but stdout is empty
10
+ (transient API issues, OOM, network blips). Caught by LED-1416's
11
+ retry state machine.
12
+ 3. Schema drift — CLI changes its output shape between versions
13
+ (e.g., adds an auto-correction line at the top). Caught
14
+ reactively by failing deliberation panels.
15
+
16
+ This module holds the ONE contract that every CLI response must
17
+ satisfy + the ONE validator that enforces it. Both the per-CLI mock
18
+ tests (tests/test_cli_contract.py) AND the weekly real-CLI smoke
19
+ script (scripts/smoke_cli_contracts.py) call validate_cli_contract()
20
+ so the contract definition lives in exactly one place — extending
21
+ it doesn't require changing two places to remember.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import re
27
+ from dataclasses import dataclass, field
28
+ from typing import List, Optional
29
+
30
+
31
+ # The 4 known CLIs the deliberation engine targets. cursor is included
32
+ # even though it's not yet installed in the dev environment — adding
33
+ # it to the contract surface now means the validator is ready when it
34
+ # lands; smoke skips when the binary isn't present.
35
+ KNOWN_CLI_NAMES = ("claude", "codex", "gemini", "cursor")
36
+
37
+
38
+ # Minimum scrubbed-response length we'll accept as "looks like a real
39
+ # model verdict" rather than "leftover garbage after banner strip."
40
+ # Calibrated against historical scrub-debug.jsonl entries: every real
41
+ # round-1/round-2 verdict from past deliberations was >= 60 chars;
42
+ # every banner-only contamination was < 30 chars. 30 is the cutoff
43
+ # the production scrubber already uses; keeping that here means the
44
+ # validator + the scrubber agree.
45
+ MIN_VERDICT_LEN = 30
46
+
47
+
48
+ # Patterns that signal "the response is contamination, not a verdict."
49
+ # Each gets the response REJECTED even if length and scrub passed.
50
+ _CONTAMINATION_MARKERS = (
51
+ re.compile(r"^\[scrub:\s*contaminated\b", re.IGNORECASE),
52
+ re.compile(r"^\[.+\bunavailable\b.+\bnot found in PATH\]", re.IGNORECASE),
53
+ re.compile(r"^\[.+\bskipped under INTERNAL_PYTEST_GUARD", re.IGNORECASE),
54
+ re.compile(r"^\[.+\btimed out after\b", re.IGNORECASE),
55
+ re.compile(r"^\[.+\breturned empty response\]", re.IGNORECASE),
56
+ re.compile(r"^\[.+\berror:.+\]\s*$", re.IGNORECASE),
57
+ )
58
+
59
+
60
+ # A response should contain at least ONE of these markers to be
61
+ # recognizable as a panel verdict. The deliberation engine prompts all
62
+ # models to end with `VERDICT: ...` so we expect to see it. Falling
63
+ # back: "AGREE" / "DISAGREE" / "REMEDIATE" / "AGREE WITH MODIFICATIONS"
64
+ # all appear in real responses even when the trailing VERDICT line is
65
+ # omitted by a chatty model.
66
+ _VERDICT_HINT_RE = re.compile(
67
+ r"\b(VERDICT:|AGREE|DISAGREE|REMEDIATE|APPROVE|REJECT)\b",
68
+ re.IGNORECASE,
69
+ )
70
+
71
+
72
+ @dataclass
73
+ class CliContractResult:
74
+ """Outcome of validating one CLI's response.
75
+
76
+ `ok` is True iff every contract clause passed. `failures` is the
77
+ list of clauses that fired — the smoke script ntfys with this list
78
+ so the operator can see exactly what shape the regression took.
79
+ """
80
+ cli: str
81
+ raw_len: int
82
+ scrubbed_len: int
83
+ ok: bool
84
+ failures: List[str] = field(default_factory=list)
85
+ preview: str = "" # First 200 chars of scrubbed text, for log readability
86
+
87
+
88
+ def validate_cli_contract(
89
+ cli_name: str,
90
+ raw_stdout: str,
91
+ raw_stderr: str = "",
92
+ expect_verdict_hint: bool = True,
93
+ ) -> CliContractResult:
94
+ """Apply the per-CLI contract to one subprocess response.
95
+
96
+ Mirrors the EXACT production scrub path so the validator's view
97
+ matches what ai/deliberation.py's _call_cli sees. Failures append
98
+ a short reason string; an empty failures list means the response
99
+ is contract-clean.
100
+
101
+ Args:
102
+ cli_name: which CLI produced this (claude/codex/gemini/cursor);
103
+ used in the failure messages.
104
+ raw_stdout: subprocess.stdout bytes decoded to str.
105
+ raw_stderr: subprocess.stderr bytes decoded to str. The
106
+ contract is permissive on stderr — banner output is
107
+ ALLOWED there (intentional shim behavior); but completely
108
+ empty stderr + completely empty stdout is suspicious.
109
+ expect_verdict_hint: when True, fail the response if it
110
+ doesn't contain at least one verdict marker. Mock tests
111
+ and the smoke script set this; tests of low-content
112
+ responses (e.g., a `--version` smoke) set False.
113
+
114
+ Returns:
115
+ CliContractResult with `ok`, `failures`, and a preview.
116
+ """
117
+ # Import lazily so this module can be imported in a context where
118
+ # ai.deliberation isn't available (e.g., the smoke script when
119
+ # gateway code path changes).
120
+ failures: List[str] = []
121
+ try:
122
+ from ai.deliberation import _scrub_cli_output
123
+ scrubbed = _scrub_cli_output(raw_stdout, source=cli_name).strip()
124
+ except Exception as exc:
125
+ return CliContractResult(
126
+ cli=cli_name,
127
+ raw_len=len(raw_stdout),
128
+ scrubbed_len=0,
129
+ ok=False,
130
+ failures=[f"scrub_failed:{type(exc).__name__}:{str(exc)[:80]}"],
131
+ preview="",
132
+ )
133
+
134
+ # 1. Contamination markers — if the scrubber returned one, fail.
135
+ for pat in _CONTAMINATION_MARKERS:
136
+ if pat.search(scrubbed):
137
+ failures.append(f"contamination_marker:{pat.pattern[:40]}")
138
+ break
139
+
140
+ # 2. Minimum length. Below MIN_VERDICT_LEN is almost certainly
141
+ # garbage even if scrub didn't tag it.
142
+ if len(scrubbed) < MIN_VERDICT_LEN and "contamination_marker" not in " ".join(failures):
143
+ failures.append(f"too_short:{len(scrubbed)}<{MIN_VERDICT_LEN}")
144
+
145
+ # 3. Verdict hint — at least one of VERDICT:/AGREE/DISAGREE/REMEDIATE/
146
+ # APPROVE/REJECT must appear. Skip when expect_verdict_hint=False.
147
+ if expect_verdict_hint and not _VERDICT_HINT_RE.search(scrubbed):
148
+ failures.append("no_verdict_hint")
149
+
150
+ # 4. Doesn't start with a known banner prefix (defense-in-depth on
151
+ # top of scrub). If a brand-new banner shape lands tomorrow that
152
+ # the scrubber doesn't know about, this should catch it.
153
+ if scrubbed.startswith("["):
154
+ # Bracketed prefix is almost always a tool-emitted status line
155
+ # (e.g. "[Delimit]" / "[claude error: ...]") not a model verdict.
156
+ if not any(scrubbed.lower().startswith(p) for p in (
157
+ "[delimit", "[scrub:", "[claude", "[codex", "[gemini", "[cursor",
158
+ )):
159
+ # Unknown bracketed prefix — surface for inspection
160
+ failures.append(f"unknown_bracketed_prefix:{scrubbed[:40]!r}")
161
+
162
+ return CliContractResult(
163
+ cli=cli_name,
164
+ raw_len=len(raw_stdout),
165
+ scrubbed_len=len(scrubbed),
166
+ ok=not failures,
167
+ failures=failures,
168
+ preview=scrubbed[:200],
169
+ )
170
+
171
+
172
+ def format_contract_report(results: List[CliContractResult]) -> str:
173
+ """Human-readable summary of N validation results for ntfy / logs."""
174
+ lines = []
175
+ n_ok = sum(1 for r in results if r.ok)
176
+ lines.append(f"CLI contract: {n_ok}/{len(results)} clean")
177
+ for r in results:
178
+ flag = "OK" if r.ok else "FAIL"
179
+ lines.append(f" [{flag}] {r.cli:8s} raw={r.raw_len}B scrubbed={r.scrubbed_len}B")
180
+ if not r.ok:
181
+ for f in r.failures:
182
+ lines.append(f" ↳ {f}")
183
+ if r.preview:
184
+ lines.append(f" preview: {r.preview[:100]!r}")
185
+ return "\n".join(lines)
@@ -13,7 +13,10 @@ This replaces _with_next_steps — governance IS the next step system.
13
13
  import json
14
14
  import logging
15
15
  import os
16
+ import re
17
+ import subprocess
16
18
  import time
19
+ from datetime import datetime, timezone
17
20
  from pathlib import Path
18
21
  from typing import Any, Dict, List, Optional
19
22
 
@@ -826,6 +829,184 @@ def govern(tool_name: str, result: Dict[str, Any], project_path: str = ".") -> D
826
829
  return governed_result
827
830
 
828
831
 
832
+ # ─────────────────────────────────────────────────────────────────────
833
+ # LED-2214b-followup — sensor_github_issue sync impl
834
+ # ─────────────────────────────────────────────────────────────────────
835
+ #
836
+ # The outreach daemon's monitor_phase needs to call the same logic that
837
+ # delimit_sensor_github_issue (MCP tool) runs, but synchronously and
838
+ # without the _with_next_steps wrapping. Before this extraction the
839
+ # daemon tried to import the impl from two paths that don't exist —
840
+ # `ai.governance._sensor_github_issue_impl` and
841
+ # `backends.governance_bridge.sensor_github_issue` — and silently fell
842
+ # back to "monitor skipped" on every tick, leaving the entire reply-
843
+ # tracking cycle dead.
844
+ #
845
+ # Now both callers share this function. The MCP tool wraps the result
846
+ # with `_with_next_steps`; the daemon consumes the raw dict.
847
+
848
+ _NEGATIVE_KEYWORDS = (
849
+ "not interested", "won't be", "will not", "don't need", "do not need",
850
+ "no thanks", "pass on", "not a fit", "not for us", "closing",
851
+ "won't adopt", "will not adopt", "reject", "declined",
852
+ )
853
+
854
+ _REPO_FORMAT_RE = re.compile(r"^[\w.-]+/[\w.-]+$")
855
+
856
+ # Module-local guard so the warning fires at most once per process.
857
+ _REPO_ALLOWLIST_WARNED = False
858
+
859
+
860
+ def _check_repo_allowlist(repo: str) -> Optional[Dict[str, Any]]:
861
+ """Return a refusal dict if the repo isn't in DELIMIT_ALLOWED_REPOS.
862
+
863
+ Duplicates the logic of ai.server._check_repo_allowlist intentionally:
864
+ importing from ai.server would create a circular import (server.py
865
+ imports from governance). Mirror with care — both copies must stay
866
+ in sync until LED-216 splits the allowlist into its own module.
867
+ """
868
+ global _REPO_ALLOWLIST_WARNED
869
+ allowlist_raw = os.environ.get("DELIMIT_ALLOWED_REPOS", "").strip()
870
+ if not allowlist_raw:
871
+ if not _REPO_ALLOWLIST_WARNED:
872
+ logger.warning(
873
+ "DELIMIT_ALLOWED_REPOS unset — sensor_github_issue calls "
874
+ "pass through to gh api using the caller's token."
875
+ )
876
+ _REPO_ALLOWLIST_WARNED = True
877
+ return None
878
+ allowed = {entry.strip().lower() for entry in allowlist_raw.split(",") if entry.strip()}
879
+ if (repo or "").strip().lower() not in allowed:
880
+ return {
881
+ "error": "repo_not_allowlisted",
882
+ "repo": repo,
883
+ "allowed": sorted(allowed),
884
+ "hint": (
885
+ "Repo not in DELIMIT_ALLOWED_REPOS. Add it or use a tool "
886
+ "that does not reach external APIs."
887
+ ),
888
+ }
889
+ return None
890
+
891
+
892
+ def _sensor_github_issue_impl(
893
+ repo: str,
894
+ issue_number: int,
895
+ since_comment_id: int = 0,
896
+ ) -> Dict[str, Any]:
897
+ """Sync implementation of the sensor_github_issue MCP tool.
898
+
899
+ Returns the RAW result dict (no _with_next_steps wrapping). Callers
900
+ that want the MCP wrapping apply it themselves. Returns
901
+ ``{"error": ..., "has_new_activity": False}`` on any failure mode
902
+ rather than raising — the outreach daemon's monitor loop relies on
903
+ fail-soft behavior so one bad LED doesn't kill the whole tick.
904
+
905
+ Result schema (success path):
906
+ {
907
+ "repo": str, "issue_number": str,
908
+ "signal": {id, venture, metric, source, timestamp, severity},
909
+ "issue_state": "open" | "closed" | "unknown",
910
+ "new_comments": [{id, author, created_at, body}, ...],
911
+ "latest_comment_id": int,
912
+ "total_comments": int,
913
+ "has_new_activity": bool,
914
+ }
915
+ """
916
+ # Validate inputs — defense-in-depth even though subprocess.run with
917
+ # list argv (no shell=True) makes classic injection inert.
918
+ if not _REPO_FORMAT_RE.match(repo or ""):
919
+ return {"error": f"Invalid repo format: {repo!r}. Use owner/repo.",
920
+ "has_new_activity": False}
921
+ if ".." in repo:
922
+ return {"error": "Invalid repo: path traversal sequences not allowed",
923
+ "has_new_activity": False}
924
+ if not isinstance(issue_number, int) or issue_number <= 0:
925
+ return {"error": f"Invalid issue number: {issue_number}",
926
+ "has_new_activity": False}
927
+
928
+ refusal = _check_repo_allowlist(repo)
929
+ if refusal is not None:
930
+ refusal.setdefault("has_new_activity", False)
931
+ return refusal
932
+
933
+ try:
934
+ # Fetch comments
935
+ comments_jq = (
936
+ "[.[] | {id: .id, author: .user.login, "
937
+ "created_at: .created_at, body: (.body | .[0:500])}]"
938
+ )
939
+ comments_proc = subprocess.run(
940
+ ["gh", "api",
941
+ f"repos/{repo}/issues/{issue_number}/comments",
942
+ "--jq", comments_jq],
943
+ capture_output=True, text=True, timeout=30,
944
+ )
945
+ if comments_proc.returncode != 0:
946
+ return {
947
+ "error": f"gh api comments failed: {(comments_proc.stderr or '').strip()[:200]}",
948
+ "has_new_activity": False,
949
+ }
950
+ all_comments = json.loads(comments_proc.stdout) if comments_proc.stdout.strip() else []
951
+ new_comments = [c for c in all_comments if c.get("id", 0) > since_comment_id]
952
+
953
+ # Fetch issue state
954
+ issue_jq = "{state: .state, labels: [.labels[].name], reactions: .reactions.total_count}"
955
+ issue_proc = subprocess.run(
956
+ ["gh", "api",
957
+ f"repos/{repo}/issues/{issue_number}",
958
+ "--jq", issue_jq],
959
+ capture_output=True, text=True, timeout=30,
960
+ )
961
+ if issue_proc.returncode != 0:
962
+ return {
963
+ "error": f"gh api issue failed: {(issue_proc.stderr or '').strip()[:200]}",
964
+ "has_new_activity": False,
965
+ }
966
+ issue_info = json.loads(issue_proc.stdout) if issue_proc.stdout.strip() else {}
967
+ issue_state = issue_info.get("state", "unknown")
968
+
969
+ # Severity classification — green default; amber on closed; red on
970
+ # negative keyword in any new comment body.
971
+ severity = "green"
972
+ combined_body = " ".join(c.get("body", "") or "" for c in new_comments).lower()
973
+ has_negative = any(kw in combined_body for kw in _NEGATIVE_KEYWORDS)
974
+ if has_negative:
975
+ severity = "red"
976
+ elif issue_state == "closed":
977
+ severity = "amber"
978
+
979
+ latest_comment_id = max((c.get("id", 0) for c in all_comments), default=since_comment_id)
980
+ repo_key = repo.replace("/", "_")
981
+
982
+ return {
983
+ "repo": repo,
984
+ "issue_number": str(issue_number),
985
+ "signal": {
986
+ "id": f"sensor:github_issue:{repo_key}:{issue_number}",
987
+ "venture": "delimit",
988
+ "metric": "outreach_issue_activity",
989
+ "source": f"https://github.com/{repo}/issues/{issue_number}",
990
+ "timestamp": datetime.now(timezone.utc).isoformat(),
991
+ "severity": severity,
992
+ },
993
+ "issue_state": issue_state,
994
+ "new_comments": new_comments,
995
+ "latest_comment_id": latest_comment_id,
996
+ "total_comments": len(all_comments),
997
+ "has_new_activity": len(new_comments) > 0,
998
+ }
999
+ except subprocess.TimeoutExpired:
1000
+ return {"error": "gh command timed out after 30s",
1001
+ "has_new_activity": False}
1002
+ except json.JSONDecodeError as exc:
1003
+ return {"error": f"Failed to parse gh output: {exc}",
1004
+ "has_new_activity": False}
1005
+ except Exception as exc: # noqa: BLE001 — sensor must fail soft
1006
+ logger.error("sensor_github_issue impl error: %s", exc)
1007
+ return {"error": str(exc), "has_new_activity": False}
1008
+
1009
+
829
1010
  def _deep_get(d: Dict, key: str) -> Any:
830
1011
  """Get a value from a dict, supporting nested keys with dots."""
831
1012
  if "." in key: