delimit-cli 4.6.0 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -8
- package/bin/delimit-cli.js +17 -3
- package/gateway/ai/agent_dispatch.py +5 -0
- package/gateway/ai/backends/git_health.py +175 -0
- package/gateway/ai/backends/tools_infra.py +13 -0
- package/gateway/ai/cli_contract.py +185 -0
- package/gateway/ai/governance.py +181 -0
- package/gateway/ai/heartbeat.py +290 -0
- package/gateway/ai/ledger_manager.py +81 -4
- package/gateway/ai/ledger_proof.py +127 -0
- package/gateway/ai/license.py +132 -47
- package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
- package/gateway/ai/license_core.pyi +1 -1
- package/gateway/ai/outreach_loop_daemon.py +349 -0
- package/gateway/ai/outreach_substantive.py +768 -7
- package/gateway/ai/pro_tools.yaml +167 -0
- package/gateway/ai/reddit_scanner.py +7 -1
- package/gateway/ai/server.py +295 -116
- package/gateway/ai/social_queue.py +166 -10
- package/gateway/ai/tenant_auth.py +329 -0
- package/gateway/ai/tenant_data.py +339 -0
- package/gateway/ai/tenant_paths.py +150 -0
- package/package.json +4 -1
- package/scripts/build-license-core.sh +0 -85
- package/scripts/security-check.sh +0 -66
- package/scripts/test-license-core-so.sh +0 -107
package/CHANGELOG.md
CHANGED
|
@@ -36,14 +36,17 @@ customizations around our managed section).
|
|
|
36
36
|
- Documentation refreshes: cross-agent-handoff worked example surfaced on README,
|
|
37
37
|
test-count badge bumped, misleading version stamps removed.
|
|
38
38
|
|
|
39
|
-
### Known issue (pre-existing, fix tracked)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
39
|
+
### Known issue (pre-existing, fix tracked) — **RETRACTED 2026-05-15**
|
|
40
|
+
|
|
41
|
+
> ~~**`delimit attest mcp` exit codes** (LED-1403): on tool error (e.g. no
|
|
42
|
+
> lockfile → npm audit unavailable) and unknown attestation kind, the CLI
|
|
43
|
+
> currently returns exit 1 instead of the expected exit 2.~~
|
|
44
|
+
>
|
|
45
|
+
> **Retraction:** the original report was a phantom test failure caused by a
|
|
46
|
+
> corrupted local git worktree (LED-1401), not a real CLI bug. On a clean
|
|
47
|
+
> clone, all 6 `attest-mcp` test suites pass and the CLI returns the correct
|
|
48
|
+
> exit codes (0 pass+skip / 1 fail / 2 error per STR-656). LED-1403 closed
|
|
49
|
+
> `not_reproducible`. No customer action required.
|
|
47
50
|
|
|
48
51
|
|
|
49
52
|
## [4.5.2] - 2026-05-02
|
package/bin/delimit-cli.js
CHANGED
|
@@ -4629,9 +4629,19 @@ program
|
|
|
4629
4629
|
const prePushPath = path.join(hooksDir, 'pre-push');
|
|
4630
4630
|
const marker = '# delimit-governance-hook';
|
|
4631
4631
|
|
|
4632
|
-
// Resolution order: local node_modules → global PATH →
|
|
4633
|
-
//
|
|
4634
|
-
//
|
|
4632
|
+
// Resolution order: local node_modules → global PATH →
|
|
4633
|
+
// global node_modules direct → npx fallback.
|
|
4634
|
+
//
|
|
4635
|
+
// npx is the LAST resort because on some npm-arborist environments
|
|
4636
|
+
// it crashes with "Cannot read properties of undefined (reading
|
|
4637
|
+
// 'extraneous')" before reaching the CLI (LED-1207, LED-1248). That
|
|
4638
|
+
// failure mode silently breaks the gate and forces --no-verify,
|
|
4639
|
+
// which violates the no-silent-no-verify rule.
|
|
4640
|
+
//
|
|
4641
|
+
// The third tier (`node $(npm root -g)/delimit-cli/bin/delimit-cli.js`)
|
|
4642
|
+
// catches the case where delimit-cli is globally installed but its bin
|
|
4643
|
+
// shim isn't on PATH (npm-installed-but-symlink-missing, fresh CI
|
|
4644
|
+
// containers, etc.) — bypassing npm/npx entirely.
|
|
4635
4645
|
const preCommitHook = `#!/bin/sh
|
|
4636
4646
|
${marker}
|
|
4637
4647
|
# Delimit API governance gate
|
|
@@ -4640,6 +4650,8 @@ if [ -x ./node_modules/.bin/delimit-cli ]; then
|
|
|
4640
4650
|
./node_modules/.bin/delimit-cli check --staged
|
|
4641
4651
|
elif command -v delimit-cli >/dev/null 2>&1; then
|
|
4642
4652
|
delimit-cli check --staged
|
|
4653
|
+
elif _delimit_global="$(npm root -g 2>/dev/null)/delimit-cli/bin/delimit-cli.js" && [ -f "$_delimit_global" ]; then
|
|
4654
|
+
node "$_delimit_global" check --staged
|
|
4643
4655
|
else
|
|
4644
4656
|
npx delimit-cli check --staged
|
|
4645
4657
|
fi
|
|
@@ -4653,6 +4665,8 @@ if [ -x ./node_modules/.bin/delimit-cli ]; then
|
|
|
4653
4665
|
./node_modules/.bin/delimit-cli check --base origin/main
|
|
4654
4666
|
elif command -v delimit-cli >/dev/null 2>&1; then
|
|
4655
4667
|
delimit-cli check --base origin/main
|
|
4668
|
+
elif _delimit_global="$(npm root -g 2>/dev/null)/delimit-cli/bin/delimit-cli.js" && [ -f "$_delimit_global" ]; then
|
|
4669
|
+
node "$_delimit_global" check --base origin/main
|
|
4656
4670
|
else
|
|
4657
4671
|
npx delimit-cli check --base origin/main
|
|
4658
4672
|
fi
|
|
@@ -36,6 +36,11 @@ DLQ_AUTO_PAUSE_THRESHOLD = 20
|
|
|
36
36
|
TASK_TYPE_ROUTER = {
|
|
37
37
|
# Outreach and social work — Gemini Flash is fast and cheap
|
|
38
38
|
"outreach": "gemini",
|
|
39
|
+
# LED-2214b: substantive github outreach gets the same default
|
|
40
|
+
# routing as generic outreach (cheap, fast drafter) but is named
|
|
41
|
+
# distinctly so a regression that resurrects the generic dispatch
|
|
42
|
+
# path does not silently land here.
|
|
43
|
+
"outreach_substantive": "gemini",
|
|
39
44
|
"social": "gemini",
|
|
40
45
|
"content": "gemini",
|
|
41
46
|
"sensor": "gemini",
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""Git worktree sanity checks (LED-1411).
|
|
2
|
+
|
|
3
|
+
Single source of truth for "is this directory a healthy git worktree?"
|
|
4
|
+
Used by delimit_test_smoke, delimit_deploy_plan, and delimit_evidence_collect
|
|
5
|
+
as a precheck before they trust ambient checkout state.
|
|
6
|
+
|
|
7
|
+
Background — LED-1403 / LED-1401 incident (2026-05-14):
|
|
8
|
+
`/home/delimit/npm-delimit/.git` was configured `bare = true` but had source
|
|
9
|
+
files alongside, AND a stranded sibling worktree at `/tmp/delimit-mcp-main`
|
|
10
|
+
where `git status` showed every file as both `D` and `??` (deleted from
|
|
11
|
+
index, untracked on disk). `delimit_test_smoke` ran against this corrupt
|
|
12
|
+
state and reported `attest-mcp Q2 3-tier exit codes` failures that did NOT
|
|
13
|
+
exist on real main. I almost shipped a "fix" for a non-bug (LED-1403,
|
|
14
|
+
closed `not_reproducible` after a fresh clone proved tests passed).
|
|
15
|
+
|
|
16
|
+
This module exists so the same class of phantom failure can't recur.
|
|
17
|
+
Precheck must:
|
|
18
|
+
- Add <100ms to caller startup (no network, no fetch)
|
|
19
|
+
- Emit a single actionable remediation line on failure
|
|
20
|
+
- Return a structured dict (callers may inline-handle or surface up)
|
|
21
|
+
|
|
22
|
+
Memory anchor: feedback_corrupted_worktree_phantom_failures.md
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import subprocess
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Any, Dict
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _run(cmd: list, cwd: str, timeout: float = 2.0) -> str:
|
|
33
|
+
"""Run a git command with a tight timeout. Returns stdout stripped,
|
|
34
|
+
or empty string on any failure (intentional — caller decides what
|
|
35
|
+
constitutes a failure based on the structured result, not exceptions)."""
|
|
36
|
+
try:
|
|
37
|
+
return subprocess.check_output(
|
|
38
|
+
cmd,
|
|
39
|
+
cwd=cwd,
|
|
40
|
+
stderr=subprocess.DEVNULL,
|
|
41
|
+
timeout=timeout,
|
|
42
|
+
text=True,
|
|
43
|
+
).strip()
|
|
44
|
+
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
45
|
+
return ""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def check_worktree_sanity(repo_path: str) -> Dict[str, Any]:
|
|
49
|
+
"""Verify the directory at `repo_path` is a healthy git worktree.
|
|
50
|
+
|
|
51
|
+
Checks (in order; cheapest first):
|
|
52
|
+
1. Path exists and contains a `.git` directory (or file pointing to one)
|
|
53
|
+
2. `git rev-parse --is-inside-work-tree` returns `true`
|
|
54
|
+
3. `git rev-parse --is-bare-repository` returns `false`
|
|
55
|
+
4. `git worktree list` includes the resolved CWD
|
|
56
|
+
5. `git status --porcelain=v1` does NOT show every file as BOTH
|
|
57
|
+
deleted-from-index AND untracked (the LED-1401 corruption signature)
|
|
58
|
+
|
|
59
|
+
Returns a dict with:
|
|
60
|
+
- ok: bool — overall health
|
|
61
|
+
- reason: str — short failure code (`not_a_repo`, `bare_repo_with_files`,
|
|
62
|
+
`stranded_worktree`, `corrupt_status`) when ok=False, else `healthy`
|
|
63
|
+
- detail: str — actionable remediation message
|
|
64
|
+
- path: str — the path that was checked
|
|
65
|
+
|
|
66
|
+
Non-raising: errors return ok=False with a structured reason, so callers
|
|
67
|
+
can decide whether to halt or warn.
|
|
68
|
+
"""
|
|
69
|
+
p = Path(repo_path)
|
|
70
|
+
if not p.exists() or not p.is_dir():
|
|
71
|
+
return {
|
|
72
|
+
"ok": False,
|
|
73
|
+
"reason": "not_a_directory",
|
|
74
|
+
"detail": f"{repo_path} is not a directory.",
|
|
75
|
+
"path": repo_path,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
git_meta = p / ".git"
|
|
79
|
+
if not git_meta.exists():
|
|
80
|
+
return {
|
|
81
|
+
"ok": False,
|
|
82
|
+
"reason": "not_a_repo",
|
|
83
|
+
"detail": f"{repo_path} has no .git/ — not a git worktree.",
|
|
84
|
+
"path": repo_path,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# Bare-repo check first (LED-1401 signature: bare=true + source files
|
|
88
|
+
# alongside). Checked BEFORE is-inside-work-tree because a bare repo
|
|
89
|
+
# answers "false" to that question — we want the more informative
|
|
90
|
+
# bare-repo message to win when both conditions hold.
|
|
91
|
+
is_bare = _run(["git", "rev-parse", "--is-bare-repository"], cwd=repo_path)
|
|
92
|
+
if is_bare == "true":
|
|
93
|
+
return {
|
|
94
|
+
"ok": False,
|
|
95
|
+
"reason": "bare_repo_with_files",
|
|
96
|
+
"detail": (
|
|
97
|
+
f"{repo_path}/.git/ has `core.bare = true` but the directory "
|
|
98
|
+
f"holds source files. Tests against this state run stale "
|
|
99
|
+
f"code. Re-clone fresh: `git clone <url> /tmp/<repo>-fresh "
|
|
100
|
+
f"&& cd /tmp/<repo>-fresh`"
|
|
101
|
+
),
|
|
102
|
+
"path": repo_path,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# Inside-work-tree check
|
|
106
|
+
inside = _run(["git", "rev-parse", "--is-inside-work-tree"], cwd=repo_path)
|
|
107
|
+
if inside != "true":
|
|
108
|
+
return {
|
|
109
|
+
"ok": False,
|
|
110
|
+
"reason": "not_a_worktree",
|
|
111
|
+
"detail": (
|
|
112
|
+
f"{repo_path} is not inside a git work tree "
|
|
113
|
+
f"(rev-parse --is-inside-work-tree returned {inside!r}). "
|
|
114
|
+
f"Re-clone fresh: `git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
|
|
115
|
+
),
|
|
116
|
+
"path": repo_path,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
# Worktree-list membership check (catches stranded sibling worktrees)
|
|
120
|
+
worktrees = _run(["git", "worktree", "list", "--porcelain"], cwd=repo_path)
|
|
121
|
+
resolved = str(p.resolve())
|
|
122
|
+
if worktrees and resolved not in worktrees:
|
|
123
|
+
# The current directory isn't a registered worktree of its own
|
|
124
|
+
# .git/ — likely a stale checkout that was wiped+repopulated outside
|
|
125
|
+
# git's awareness. This is the LED-1401 stranded-sibling signature.
|
|
126
|
+
return {
|
|
127
|
+
"ok": False,
|
|
128
|
+
"reason": "stranded_worktree",
|
|
129
|
+
"detail": (
|
|
130
|
+
f"{resolved} is not a registered worktree of its own .git/. "
|
|
131
|
+
f"Run `git worktree list` to inspect; re-clone fresh if "
|
|
132
|
+
f"orphaned: `git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
|
|
133
|
+
),
|
|
134
|
+
"path": repo_path,
|
|
135
|
+
"worktree_list": worktrees,
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
# LED-1401 corrupt-status signature: every file appears as BOTH `D` and `??`
|
|
139
|
+
# (deleted from index, untracked on disk). Sample the first 50 status lines
|
|
140
|
+
# — if >=10 distinct paths show this pattern, it's pathological.
|
|
141
|
+
status = _run(["git", "status", "--porcelain=v1"], cwd=repo_path, timeout=3.0)
|
|
142
|
+
if status:
|
|
143
|
+
lines = status.split("\n")[:200]
|
|
144
|
+
deleted_paths = set()
|
|
145
|
+
untracked_paths = set()
|
|
146
|
+
for line in lines:
|
|
147
|
+
if len(line) < 4:
|
|
148
|
+
continue
|
|
149
|
+
xy = line[:2]
|
|
150
|
+
path = line[3:].lstrip()
|
|
151
|
+
if "D" in xy:
|
|
152
|
+
deleted_paths.add(path)
|
|
153
|
+
if xy == "??":
|
|
154
|
+
untracked_paths.add(path)
|
|
155
|
+
overlap = deleted_paths & untracked_paths
|
|
156
|
+
if len(overlap) >= 10:
|
|
157
|
+
return {
|
|
158
|
+
"ok": False,
|
|
159
|
+
"reason": "corrupt_status",
|
|
160
|
+
"detail": (
|
|
161
|
+
f"{repo_path} shows >={len(overlap)} files as both deleted-from-index "
|
|
162
|
+
f"AND untracked-on-disk — the worktree was wiped and repopulated "
|
|
163
|
+
f"outside git's awareness (LED-1401 signature). Re-clone fresh: "
|
|
164
|
+
f"`git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
|
|
165
|
+
),
|
|
166
|
+
"path": repo_path,
|
|
167
|
+
"overlap_count": len(overlap),
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
"ok": True,
|
|
172
|
+
"reason": "healthy",
|
|
173
|
+
"detail": "git worktree is healthy",
|
|
174
|
+
"path": repo_path,
|
|
175
|
+
}
|
|
@@ -72,6 +72,19 @@ _CREDENTIAL_FALSE_POSITIVES = re.compile(
|
|
|
72
72
|
r"_data\[|_result\[|"
|
|
73
73
|
# LED-1278 (b): function-call RHS with leading underscore (e.g. _load_token())
|
|
74
74
|
r"=\s*_\w+\(|"
|
|
75
|
+
# LED-1278 (c) [2026-05-22]: naked function-call RHS without leading
|
|
76
|
+
# underscore. Matches the common shape `const token = readCurrentToken();`
|
|
77
|
+
# in bin/delimit-cli.js — the token is being READ from somewhere, not
|
|
78
|
+
# hardcoded. Tightened with `\s*;?\s*$` to require end-of-statement so
|
|
79
|
+
# we don't suppress `token = realLeak("AKIAIOSFODNN7EXAMPLE")` shapes
|
|
80
|
+
# where the call argument is itself a literal secret.
|
|
81
|
+
r"=\s*\w+\([^)]{0,40}\)\s*;?\s*$|"
|
|
82
|
+
# LED-1278 (c) [2026-05-22]: parenthesized property-access fallback chain
|
|
83
|
+
# like `const token = (options.token || process.env.TOKEN)`. Common shape
|
|
84
|
+
# for CLI option parsing where the RHS reads from a known input source,
|
|
85
|
+
# never a literal. Requires the open-paren to be followed by a word + dot
|
|
86
|
+
# (property access) so we don't match `token = ("AKIA..." || "")` shapes.
|
|
87
|
+
r"=\s*\(\s*\w+\.\w+|"
|
|
75
88
|
# LED-1278 (b): documentation/example placeholders in angle brackets
|
|
76
89
|
r"<[^>]*?(?:long|same|random|your|placeholder|example|secret|token|key)[^>]*?>|"
|
|
77
90
|
# Bare `if not <var>:` and similar control-flow lines that mention
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""LED-1415 — CLI subprocess contract.
|
|
2
|
+
|
|
3
|
+
The deliberation engine drives 4 model CLIs as subprocesses
|
|
4
|
+
(claude / codex / gemini / cursor) and treats their stdout as model
|
|
5
|
+
verdict text. Three classes of bug have surfaced in this pipeline:
|
|
6
|
+
|
|
7
|
+
1. Banner contamination — the Delimit governance shim leaks ASCII
|
|
8
|
+
art onto stdout instead of stderr (PR #154, fixed by LED-1428).
|
|
9
|
+
2. Empty/silent responses — CLI exits 0 but stdout is empty
|
|
10
|
+
(transient API issues, OOM, network blips). Caught by LED-1416's
|
|
11
|
+
retry state machine.
|
|
12
|
+
3. Schema drift — CLI changes its output shape between versions
|
|
13
|
+
(e.g., adds an auto-correction line at the top). Caught
|
|
14
|
+
reactively by failing deliberation panels.
|
|
15
|
+
|
|
16
|
+
This module holds the ONE contract that every CLI response must
|
|
17
|
+
satisfy + the ONE validator that enforces it. Both the per-CLI mock
|
|
18
|
+
tests (tests/test_cli_contract.py) AND the weekly real-CLI smoke
|
|
19
|
+
script (scripts/smoke_cli_contracts.py) call validate_cli_contract()
|
|
20
|
+
so the contract definition lives in exactly one place — extending
|
|
21
|
+
it doesn't require changing two places to remember.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import re
|
|
27
|
+
from dataclasses import dataclass, field
|
|
28
|
+
from typing import List, Optional
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# The 4 known CLIs the deliberation engine targets. cursor is included
|
|
32
|
+
# even though it's not yet installed in the dev environment — adding
|
|
33
|
+
# it to the contract surface now means the validator is ready when it
|
|
34
|
+
# lands; smoke skips when the binary isn't present.
|
|
35
|
+
KNOWN_CLI_NAMES = ("claude", "codex", "gemini", "cursor")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# Minimum scrubbed-response length we'll accept as "looks like a real
|
|
39
|
+
# model verdict" rather than "leftover garbage after banner strip."
|
|
40
|
+
# Calibrated against historical scrub-debug.jsonl entries: every real
|
|
41
|
+
# round-1/round-2 verdict from past deliberations was >= 60 chars;
|
|
42
|
+
# every banner-only contamination was < 30 chars. 30 is the cutoff
|
|
43
|
+
# the production scrubber already uses; keeping that here means the
|
|
44
|
+
# validator + the scrubber agree.
|
|
45
|
+
MIN_VERDICT_LEN = 30
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Patterns that signal "the response is contamination, not a verdict."
|
|
49
|
+
# Each gets the response REJECTED even if length and scrub passed.
|
|
50
|
+
_CONTAMINATION_MARKERS = (
|
|
51
|
+
re.compile(r"^\[scrub:\s*contaminated\b", re.IGNORECASE),
|
|
52
|
+
re.compile(r"^\[.+\bunavailable\b.+\bnot found in PATH\]", re.IGNORECASE),
|
|
53
|
+
re.compile(r"^\[.+\bskipped under INTERNAL_PYTEST_GUARD", re.IGNORECASE),
|
|
54
|
+
re.compile(r"^\[.+\btimed out after\b", re.IGNORECASE),
|
|
55
|
+
re.compile(r"^\[.+\breturned empty response\]", re.IGNORECASE),
|
|
56
|
+
re.compile(r"^\[.+\berror:.+\]\s*$", re.IGNORECASE),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# A response should contain at least ONE of these markers to be
|
|
61
|
+
# recognizable as a panel verdict. The deliberation engine prompts all
|
|
62
|
+
# models to end with `VERDICT: ...` so we expect to see it. Falling
|
|
63
|
+
# back: "AGREE" / "DISAGREE" / "REMEDIATE" / "AGREE WITH MODIFICATIONS"
|
|
64
|
+
# all appear in real responses even when the trailing VERDICT line is
|
|
65
|
+
# omitted by a chatty model.
|
|
66
|
+
_VERDICT_HINT_RE = re.compile(
|
|
67
|
+
r"\b(VERDICT:|AGREE|DISAGREE|REMEDIATE|APPROVE|REJECT)\b",
|
|
68
|
+
re.IGNORECASE,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class CliContractResult:
|
|
74
|
+
"""Outcome of validating one CLI's response.
|
|
75
|
+
|
|
76
|
+
`ok` is True iff every contract clause passed. `failures` is the
|
|
77
|
+
list of clauses that fired — the smoke script ntfys with this list
|
|
78
|
+
so the operator can see exactly what shape the regression took.
|
|
79
|
+
"""
|
|
80
|
+
cli: str
|
|
81
|
+
raw_len: int
|
|
82
|
+
scrubbed_len: int
|
|
83
|
+
ok: bool
|
|
84
|
+
failures: List[str] = field(default_factory=list)
|
|
85
|
+
preview: str = "" # First 200 chars of scrubbed text, for log readability
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def validate_cli_contract(
|
|
89
|
+
cli_name: str,
|
|
90
|
+
raw_stdout: str,
|
|
91
|
+
raw_stderr: str = "",
|
|
92
|
+
expect_verdict_hint: bool = True,
|
|
93
|
+
) -> CliContractResult:
|
|
94
|
+
"""Apply the per-CLI contract to one subprocess response.
|
|
95
|
+
|
|
96
|
+
Mirrors the EXACT production scrub path so the validator's view
|
|
97
|
+
matches what ai/deliberation.py's _call_cli sees. Failures append
|
|
98
|
+
a short reason string; an empty failures list means the response
|
|
99
|
+
is contract-clean.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
cli_name: which CLI produced this (claude/codex/gemini/cursor);
|
|
103
|
+
used in the failure messages.
|
|
104
|
+
raw_stdout: subprocess.stdout bytes decoded to str.
|
|
105
|
+
raw_stderr: subprocess.stderr bytes decoded to str. The
|
|
106
|
+
contract is permissive on stderr — banner output is
|
|
107
|
+
ALLOWED there (intentional shim behavior); but completely
|
|
108
|
+
empty stderr + completely empty stdout is suspicious.
|
|
109
|
+
expect_verdict_hint: when True, fail the response if it
|
|
110
|
+
doesn't contain at least one verdict marker. Mock tests
|
|
111
|
+
and the smoke script set this; tests of low-content
|
|
112
|
+
responses (e.g., a `--version` smoke) set False.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
CliContractResult with `ok`, `failures`, and a preview.
|
|
116
|
+
"""
|
|
117
|
+
# Import lazily so this module can be imported in a context where
|
|
118
|
+
# ai.deliberation isn't available (e.g., the smoke script when
|
|
119
|
+
# gateway code path changes).
|
|
120
|
+
failures: List[str] = []
|
|
121
|
+
try:
|
|
122
|
+
from ai.deliberation import _scrub_cli_output
|
|
123
|
+
scrubbed = _scrub_cli_output(raw_stdout, source=cli_name).strip()
|
|
124
|
+
except Exception as exc:
|
|
125
|
+
return CliContractResult(
|
|
126
|
+
cli=cli_name,
|
|
127
|
+
raw_len=len(raw_stdout),
|
|
128
|
+
scrubbed_len=0,
|
|
129
|
+
ok=False,
|
|
130
|
+
failures=[f"scrub_failed:{type(exc).__name__}:{str(exc)[:80]}"],
|
|
131
|
+
preview="",
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# 1. Contamination markers — if the scrubber returned one, fail.
|
|
135
|
+
for pat in _CONTAMINATION_MARKERS:
|
|
136
|
+
if pat.search(scrubbed):
|
|
137
|
+
failures.append(f"contamination_marker:{pat.pattern[:40]}")
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
# 2. Minimum length. Below MIN_VERDICT_LEN is almost certainly
|
|
141
|
+
# garbage even if scrub didn't tag it.
|
|
142
|
+
if len(scrubbed) < MIN_VERDICT_LEN and "contamination_marker" not in " ".join(failures):
|
|
143
|
+
failures.append(f"too_short:{len(scrubbed)}<{MIN_VERDICT_LEN}")
|
|
144
|
+
|
|
145
|
+
# 3. Verdict hint — at least one of VERDICT:/AGREE/DISAGREE/REMEDIATE/
|
|
146
|
+
# APPROVE/REJECT must appear. Skip when expect_verdict_hint=False.
|
|
147
|
+
if expect_verdict_hint and not _VERDICT_HINT_RE.search(scrubbed):
|
|
148
|
+
failures.append("no_verdict_hint")
|
|
149
|
+
|
|
150
|
+
# 4. Doesn't start with a known banner prefix (defense-in-depth on
|
|
151
|
+
# top of scrub). If a brand-new banner shape lands tomorrow that
|
|
152
|
+
# the scrubber doesn't know about, this should catch it.
|
|
153
|
+
if scrubbed.startswith("["):
|
|
154
|
+
# Bracketed prefix is almost always a tool-emitted status line
|
|
155
|
+
# (e.g. "[Delimit]" / "[claude error: ...]") not a model verdict.
|
|
156
|
+
if not any(scrubbed.lower().startswith(p) for p in (
|
|
157
|
+
"[delimit", "[scrub:", "[claude", "[codex", "[gemini", "[cursor",
|
|
158
|
+
)):
|
|
159
|
+
# Unknown bracketed prefix — surface for inspection
|
|
160
|
+
failures.append(f"unknown_bracketed_prefix:{scrubbed[:40]!r}")
|
|
161
|
+
|
|
162
|
+
return CliContractResult(
|
|
163
|
+
cli=cli_name,
|
|
164
|
+
raw_len=len(raw_stdout),
|
|
165
|
+
scrubbed_len=len(scrubbed),
|
|
166
|
+
ok=not failures,
|
|
167
|
+
failures=failures,
|
|
168
|
+
preview=scrubbed[:200],
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def format_contract_report(results: List[CliContractResult]) -> str:
|
|
173
|
+
"""Human-readable summary of N validation results for ntfy / logs."""
|
|
174
|
+
lines = []
|
|
175
|
+
n_ok = sum(1 for r in results if r.ok)
|
|
176
|
+
lines.append(f"CLI contract: {n_ok}/{len(results)} clean")
|
|
177
|
+
for r in results:
|
|
178
|
+
flag = "OK" if r.ok else "FAIL"
|
|
179
|
+
lines.append(f" [{flag}] {r.cli:8s} raw={r.raw_len}B scrubbed={r.scrubbed_len}B")
|
|
180
|
+
if not r.ok:
|
|
181
|
+
for f in r.failures:
|
|
182
|
+
lines.append(f" ↳ {f}")
|
|
183
|
+
if r.preview:
|
|
184
|
+
lines.append(f" preview: {r.preview[:100]!r}")
|
|
185
|
+
return "\n".join(lines)
|
package/gateway/ai/governance.py
CHANGED
|
@@ -13,7 +13,10 @@ This replaces _with_next_steps — governance IS the next step system.
|
|
|
13
13
|
import json
|
|
14
14
|
import logging
|
|
15
15
|
import os
|
|
16
|
+
import re
|
|
17
|
+
import subprocess
|
|
16
18
|
import time
|
|
19
|
+
from datetime import datetime, timezone
|
|
17
20
|
from pathlib import Path
|
|
18
21
|
from typing import Any, Dict, List, Optional
|
|
19
22
|
|
|
@@ -826,6 +829,184 @@ def govern(tool_name: str, result: Dict[str, Any], project_path: str = ".") -> D
|
|
|
826
829
|
return governed_result
|
|
827
830
|
|
|
828
831
|
|
|
832
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
833
|
+
# LED-2214b-followup — sensor_github_issue sync impl
|
|
834
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
835
|
+
#
|
|
836
|
+
# The outreach daemon's monitor_phase needs to call the same logic that
|
|
837
|
+
# delimit_sensor_github_issue (MCP tool) runs, but synchronously and
|
|
838
|
+
# without the _with_next_steps wrapping. Before this extraction the
|
|
839
|
+
# daemon tried to import the impl from two paths that don't exist —
|
|
840
|
+
# `ai.governance._sensor_github_issue_impl` and
|
|
841
|
+
# `backends.governance_bridge.sensor_github_issue` — and silently fell
|
|
842
|
+
# back to "monitor skipped" on every tick, leaving the entire reply-
|
|
843
|
+
# tracking cycle dead.
|
|
844
|
+
#
|
|
845
|
+
# Now both callers share this function. The MCP tool wraps the result
|
|
846
|
+
# with `_with_next_steps`; the daemon consumes the raw dict.
|
|
847
|
+
|
|
848
|
+
_NEGATIVE_KEYWORDS = (
|
|
849
|
+
"not interested", "won't be", "will not", "don't need", "do not need",
|
|
850
|
+
"no thanks", "pass on", "not a fit", "not for us", "closing",
|
|
851
|
+
"won't adopt", "will not adopt", "reject", "declined",
|
|
852
|
+
)
|
|
853
|
+
|
|
854
|
+
_REPO_FORMAT_RE = re.compile(r"^[\w.-]+/[\w.-]+$")
|
|
855
|
+
|
|
856
|
+
# Module-local guard so the warning fires at most once per process.
|
|
857
|
+
_REPO_ALLOWLIST_WARNED = False
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
def _check_repo_allowlist(repo: str) -> Optional[Dict[str, Any]]:
|
|
861
|
+
"""Return a refusal dict if the repo isn't in DELIMIT_ALLOWED_REPOS.
|
|
862
|
+
|
|
863
|
+
Duplicates the logic of ai.server._check_repo_allowlist intentionally:
|
|
864
|
+
importing from ai.server would create a circular import (server.py
|
|
865
|
+
imports from governance). Mirror with care — both copies must stay
|
|
866
|
+
in sync until LED-216 splits the allowlist into its own module.
|
|
867
|
+
"""
|
|
868
|
+
global _REPO_ALLOWLIST_WARNED
|
|
869
|
+
allowlist_raw = os.environ.get("DELIMIT_ALLOWED_REPOS", "").strip()
|
|
870
|
+
if not allowlist_raw:
|
|
871
|
+
if not _REPO_ALLOWLIST_WARNED:
|
|
872
|
+
logger.warning(
|
|
873
|
+
"DELIMIT_ALLOWED_REPOS unset — sensor_github_issue calls "
|
|
874
|
+
"pass through to gh api using the caller's token."
|
|
875
|
+
)
|
|
876
|
+
_REPO_ALLOWLIST_WARNED = True
|
|
877
|
+
return None
|
|
878
|
+
allowed = {entry.strip().lower() for entry in allowlist_raw.split(",") if entry.strip()}
|
|
879
|
+
if (repo or "").strip().lower() not in allowed:
|
|
880
|
+
return {
|
|
881
|
+
"error": "repo_not_allowlisted",
|
|
882
|
+
"repo": repo,
|
|
883
|
+
"allowed": sorted(allowed),
|
|
884
|
+
"hint": (
|
|
885
|
+
"Repo not in DELIMIT_ALLOWED_REPOS. Add it or use a tool "
|
|
886
|
+
"that does not reach external APIs."
|
|
887
|
+
),
|
|
888
|
+
}
|
|
889
|
+
return None
|
|
890
|
+
|
|
891
|
+
|
|
892
|
+
def _sensor_github_issue_impl(
|
|
893
|
+
repo: str,
|
|
894
|
+
issue_number: int,
|
|
895
|
+
since_comment_id: int = 0,
|
|
896
|
+
) -> Dict[str, Any]:
|
|
897
|
+
"""Sync implementation of the sensor_github_issue MCP tool.
|
|
898
|
+
|
|
899
|
+
Returns the RAW result dict (no _with_next_steps wrapping). Callers
|
|
900
|
+
that want the MCP wrapping apply it themselves. Returns
|
|
901
|
+
``{"error": ..., "has_new_activity": False}`` on any failure mode
|
|
902
|
+
rather than raising — the outreach daemon's monitor loop relies on
|
|
903
|
+
fail-soft behavior so one bad LED doesn't kill the whole tick.
|
|
904
|
+
|
|
905
|
+
Result schema (success path):
|
|
906
|
+
{
|
|
907
|
+
"repo": str, "issue_number": str,
|
|
908
|
+
"signal": {id, venture, metric, source, timestamp, severity},
|
|
909
|
+
"issue_state": "open" | "closed" | "unknown",
|
|
910
|
+
"new_comments": [{id, author, created_at, body}, ...],
|
|
911
|
+
"latest_comment_id": int,
|
|
912
|
+
"total_comments": int,
|
|
913
|
+
"has_new_activity": bool,
|
|
914
|
+
}
|
|
915
|
+
"""
|
|
916
|
+
# Validate inputs — defense-in-depth even though subprocess.run with
|
|
917
|
+
# list argv (no shell=True) makes classic injection inert.
|
|
918
|
+
if not _REPO_FORMAT_RE.match(repo or ""):
|
|
919
|
+
return {"error": f"Invalid repo format: {repo!r}. Use owner/repo.",
|
|
920
|
+
"has_new_activity": False}
|
|
921
|
+
if ".." in repo:
|
|
922
|
+
return {"error": "Invalid repo: path traversal sequences not allowed",
|
|
923
|
+
"has_new_activity": False}
|
|
924
|
+
if not isinstance(issue_number, int) or issue_number <= 0:
|
|
925
|
+
return {"error": f"Invalid issue number: {issue_number}",
|
|
926
|
+
"has_new_activity": False}
|
|
927
|
+
|
|
928
|
+
refusal = _check_repo_allowlist(repo)
|
|
929
|
+
if refusal is not None:
|
|
930
|
+
refusal.setdefault("has_new_activity", False)
|
|
931
|
+
return refusal
|
|
932
|
+
|
|
933
|
+
try:
|
|
934
|
+
# Fetch comments
|
|
935
|
+
comments_jq = (
|
|
936
|
+
"[.[] | {id: .id, author: .user.login, "
|
|
937
|
+
"created_at: .created_at, body: (.body | .[0:500])}]"
|
|
938
|
+
)
|
|
939
|
+
comments_proc = subprocess.run(
|
|
940
|
+
["gh", "api",
|
|
941
|
+
f"repos/{repo}/issues/{issue_number}/comments",
|
|
942
|
+
"--jq", comments_jq],
|
|
943
|
+
capture_output=True, text=True, timeout=30,
|
|
944
|
+
)
|
|
945
|
+
if comments_proc.returncode != 0:
|
|
946
|
+
return {
|
|
947
|
+
"error": f"gh api comments failed: {(comments_proc.stderr or '').strip()[:200]}",
|
|
948
|
+
"has_new_activity": False,
|
|
949
|
+
}
|
|
950
|
+
all_comments = json.loads(comments_proc.stdout) if comments_proc.stdout.strip() else []
|
|
951
|
+
new_comments = [c for c in all_comments if c.get("id", 0) > since_comment_id]
|
|
952
|
+
|
|
953
|
+
# Fetch issue state
|
|
954
|
+
issue_jq = "{state: .state, labels: [.labels[].name], reactions: .reactions.total_count}"
|
|
955
|
+
issue_proc = subprocess.run(
|
|
956
|
+
["gh", "api",
|
|
957
|
+
f"repos/{repo}/issues/{issue_number}",
|
|
958
|
+
"--jq", issue_jq],
|
|
959
|
+
capture_output=True, text=True, timeout=30,
|
|
960
|
+
)
|
|
961
|
+
if issue_proc.returncode != 0:
|
|
962
|
+
return {
|
|
963
|
+
"error": f"gh api issue failed: {(issue_proc.stderr or '').strip()[:200]}",
|
|
964
|
+
"has_new_activity": False,
|
|
965
|
+
}
|
|
966
|
+
issue_info = json.loads(issue_proc.stdout) if issue_proc.stdout.strip() else {}
|
|
967
|
+
issue_state = issue_info.get("state", "unknown")
|
|
968
|
+
|
|
969
|
+
# Severity classification — green default; amber on closed; red on
|
|
970
|
+
# negative keyword in any new comment body.
|
|
971
|
+
severity = "green"
|
|
972
|
+
combined_body = " ".join(c.get("body", "") or "" for c in new_comments).lower()
|
|
973
|
+
has_negative = any(kw in combined_body for kw in _NEGATIVE_KEYWORDS)
|
|
974
|
+
if has_negative:
|
|
975
|
+
severity = "red"
|
|
976
|
+
elif issue_state == "closed":
|
|
977
|
+
severity = "amber"
|
|
978
|
+
|
|
979
|
+
latest_comment_id = max((c.get("id", 0) for c in all_comments), default=since_comment_id)
|
|
980
|
+
repo_key = repo.replace("/", "_")
|
|
981
|
+
|
|
982
|
+
return {
|
|
983
|
+
"repo": repo,
|
|
984
|
+
"issue_number": str(issue_number),
|
|
985
|
+
"signal": {
|
|
986
|
+
"id": f"sensor:github_issue:{repo_key}:{issue_number}",
|
|
987
|
+
"venture": "delimit",
|
|
988
|
+
"metric": "outreach_issue_activity",
|
|
989
|
+
"source": f"https://github.com/{repo}/issues/{issue_number}",
|
|
990
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
991
|
+
"severity": severity,
|
|
992
|
+
},
|
|
993
|
+
"issue_state": issue_state,
|
|
994
|
+
"new_comments": new_comments,
|
|
995
|
+
"latest_comment_id": latest_comment_id,
|
|
996
|
+
"total_comments": len(all_comments),
|
|
997
|
+
"has_new_activity": len(new_comments) > 0,
|
|
998
|
+
}
|
|
999
|
+
except subprocess.TimeoutExpired:
|
|
1000
|
+
return {"error": "gh command timed out after 30s",
|
|
1001
|
+
"has_new_activity": False}
|
|
1002
|
+
except json.JSONDecodeError as exc:
|
|
1003
|
+
return {"error": f"Failed to parse gh output: {exc}",
|
|
1004
|
+
"has_new_activity": False}
|
|
1005
|
+
except Exception as exc: # noqa: BLE001 — sensor must fail soft
|
|
1006
|
+
logger.error("sensor_github_issue impl error: %s", exc)
|
|
1007
|
+
return {"error": str(exc), "has_new_activity": False}
|
|
1008
|
+
|
|
1009
|
+
|
|
829
1010
|
def _deep_get(d: Dict, key: str) -> Any:
|
|
830
1011
|
"""Get a value from a dict, supporting nested keys with dots."""
|
|
831
1012
|
if "." in key:
|