delimit-cli 4.5.13 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +48 -0
- package/README.md +9 -8
- package/bin/delimit-cli.js +179 -4
- package/bin/delimit-setup.js +46 -6
- package/gateway/ai/_compile_status.py +154 -0
- package/gateway/ai/agent_dispatch.py +41 -0
- package/gateway/ai/backends/git_health.py +175 -0
- package/gateway/ai/backends/tools_infra.py +163 -10
- package/gateway/ai/cli_contract.py +185 -0
- package/gateway/ai/daemon.py +10 -0
- package/gateway/ai/daily_digest.py +1 -2
- package/gateway/ai/delimit_daemon.py +67 -0
- package/gateway/ai/dispatch_gate.py +399 -0
- package/gateway/ai/governance.py +181 -0
- package/gateway/ai/heartbeat.py +290 -0
- package/gateway/ai/hot_reload.py +1 -2
- package/gateway/ai/led193_daemon/executor.py +9 -0
- package/gateway/ai/ledger_manager.py +90 -4
- package/gateway/ai/ledger_proof.py +127 -0
- package/gateway/ai/license.py +132 -47
- package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
- package/gateway/ai/license_core.pyi +1 -1
- package/gateway/ai/notify.py +39 -0
- package/gateway/ai/outreach_loop_daemon.py +349 -0
- package/gateway/ai/outreach_substantive.py +1437 -0
- package/gateway/ai/pro_tools.yaml +167 -0
- package/gateway/ai/reaper.py +70 -0
- package/gateway/ai/reddit_scanner.py +17 -6
- package/gateway/ai/sensing/schema.py +1 -1
- package/gateway/ai/sensing/signal_store.py +0 -1
- package/gateway/ai/server.py +5490 -1602
- package/gateway/ai/social_capability/fit_floor.py +114 -12
- package/gateway/ai/social_queue.py +166 -10
- package/gateway/ai/tdqs_lint.py +611 -0
- package/gateway/ai/tenant_auth.py +329 -0
- package/gateway/ai/tenant_data.py +339 -0
- package/gateway/ai/tenant_paths.py +150 -0
- package/gateway/ai/usage_allowlist.py +198 -0
- package/gateway/ai/workers/base.py +2 -2
- package/gateway/ai/workers/executor.py +32 -3
- package/gateway/ai/workers/outreach_drafter.py +0 -1
- package/gateway/ai/workers/pr_drafter.py +0 -1
- package/gateway/ai/x_ranker.py +12 -2
- package/gateway/core/json_schema_diff.py +25 -1
- package/lib/auth-signin.js +136 -0
- package/lib/auth-signout.js +169 -0
- package/lib/delimit-template.js +11 -0
- package/lib/migration-2092-banner.js +213 -0
- package/package.json +5 -2
- package/server.json +4 -4
- package/scripts/build-license-core.sh +0 -85
- package/scripts/security-check.sh +0 -66
- package/scripts/test-license-core-so.sh +0 -107
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""Git worktree sanity checks (LED-1411).
|
|
2
|
+
|
|
3
|
+
Single source of truth for "is this directory a healthy git worktree?"
|
|
4
|
+
Used by delimit_test_smoke, delimit_deploy_plan, and delimit_evidence_collect
|
|
5
|
+
as a precheck before they trust ambient checkout state.
|
|
6
|
+
|
|
7
|
+
Background — LED-1403 / LED-1401 incident (2026-05-14):
|
|
8
|
+
`/home/delimit/npm-delimit/.git` was configured `bare = true` but had source
|
|
9
|
+
files alongside, AND a stranded sibling worktree at `/tmp/delimit-mcp-main`
|
|
10
|
+
where `git status` showed every file as both `D` and `??` (deleted from
|
|
11
|
+
index, untracked on disk). `delimit_test_smoke` ran against this corrupt
|
|
12
|
+
state and reported `attest-mcp Q2 3-tier exit codes` failures that did NOT
|
|
13
|
+
exist on real main. I almost shipped a "fix" for a non-bug (LED-1403,
|
|
14
|
+
closed `not_reproducible` after a fresh clone proved tests passed).
|
|
15
|
+
|
|
16
|
+
This module exists so the same class of phantom failure can't recur.
|
|
17
|
+
Precheck must:
|
|
18
|
+
- Add <100ms to caller startup (no network, no fetch)
|
|
19
|
+
- Emit a single actionable remediation line on failure
|
|
20
|
+
- Return a structured dict (callers may inline-handle or surface up)
|
|
21
|
+
|
|
22
|
+
Memory anchor: feedback_corrupted_worktree_phantom_failures.md
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import subprocess
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Any, Dict
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _run(cmd: list, cwd: str, timeout: float = 2.0) -> str:
|
|
33
|
+
"""Run a git command with a tight timeout. Returns stdout stripped,
|
|
34
|
+
or empty string on any failure (intentional — caller decides what
|
|
35
|
+
constitutes a failure based on the structured result, not exceptions)."""
|
|
36
|
+
try:
|
|
37
|
+
return subprocess.check_output(
|
|
38
|
+
cmd,
|
|
39
|
+
cwd=cwd,
|
|
40
|
+
stderr=subprocess.DEVNULL,
|
|
41
|
+
timeout=timeout,
|
|
42
|
+
text=True,
|
|
43
|
+
).strip()
|
|
44
|
+
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
45
|
+
return ""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def check_worktree_sanity(repo_path: str) -> Dict[str, Any]:
|
|
49
|
+
"""Verify the directory at `repo_path` is a healthy git worktree.
|
|
50
|
+
|
|
51
|
+
Checks (in order; cheapest first):
|
|
52
|
+
1. Path exists and contains a `.git` directory (or file pointing to one)
|
|
53
|
+
2. `git rev-parse --is-inside-work-tree` returns `true`
|
|
54
|
+
3. `git rev-parse --is-bare-repository` returns `false`
|
|
55
|
+
4. `git worktree list` includes the resolved CWD
|
|
56
|
+
5. `git status --porcelain=v1` does NOT show every file as BOTH
|
|
57
|
+
deleted-from-index AND untracked (the LED-1401 corruption signature)
|
|
58
|
+
|
|
59
|
+
Returns a dict with:
|
|
60
|
+
- ok: bool — overall health
|
|
61
|
+
- reason: str — short failure code (`not_a_repo`, `bare_repo_with_files`,
|
|
62
|
+
`stranded_worktree`, `corrupt_status`) when ok=False, else `healthy`
|
|
63
|
+
- detail: str — actionable remediation message
|
|
64
|
+
- path: str — the path that was checked
|
|
65
|
+
|
|
66
|
+
Non-raising: errors return ok=False with a structured reason, so callers
|
|
67
|
+
can decide whether to halt or warn.
|
|
68
|
+
"""
|
|
69
|
+
p = Path(repo_path)
|
|
70
|
+
if not p.exists() or not p.is_dir():
|
|
71
|
+
return {
|
|
72
|
+
"ok": False,
|
|
73
|
+
"reason": "not_a_directory",
|
|
74
|
+
"detail": f"{repo_path} is not a directory.",
|
|
75
|
+
"path": repo_path,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
git_meta = p / ".git"
|
|
79
|
+
if not git_meta.exists():
|
|
80
|
+
return {
|
|
81
|
+
"ok": False,
|
|
82
|
+
"reason": "not_a_repo",
|
|
83
|
+
"detail": f"{repo_path} has no .git/ — not a git worktree.",
|
|
84
|
+
"path": repo_path,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# Bare-repo check first (LED-1401 signature: bare=true + source files
|
|
88
|
+
# alongside). Checked BEFORE is-inside-work-tree because a bare repo
|
|
89
|
+
# answers "false" to that question — we want the more informative
|
|
90
|
+
# bare-repo message to win when both conditions hold.
|
|
91
|
+
is_bare = _run(["git", "rev-parse", "--is-bare-repository"], cwd=repo_path)
|
|
92
|
+
if is_bare == "true":
|
|
93
|
+
return {
|
|
94
|
+
"ok": False,
|
|
95
|
+
"reason": "bare_repo_with_files",
|
|
96
|
+
"detail": (
|
|
97
|
+
f"{repo_path}/.git/ has `core.bare = true` but the directory "
|
|
98
|
+
f"holds source files. Tests against this state run stale "
|
|
99
|
+
f"code. Re-clone fresh: `git clone <url> /tmp/<repo>-fresh "
|
|
100
|
+
f"&& cd /tmp/<repo>-fresh`"
|
|
101
|
+
),
|
|
102
|
+
"path": repo_path,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# Inside-work-tree check
|
|
106
|
+
inside = _run(["git", "rev-parse", "--is-inside-work-tree"], cwd=repo_path)
|
|
107
|
+
if inside != "true":
|
|
108
|
+
return {
|
|
109
|
+
"ok": False,
|
|
110
|
+
"reason": "not_a_worktree",
|
|
111
|
+
"detail": (
|
|
112
|
+
f"{repo_path} is not inside a git work tree "
|
|
113
|
+
f"(rev-parse --is-inside-work-tree returned {inside!r}). "
|
|
114
|
+
f"Re-clone fresh: `git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
|
|
115
|
+
),
|
|
116
|
+
"path": repo_path,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
# Worktree-list membership check (catches stranded sibling worktrees)
|
|
120
|
+
worktrees = _run(["git", "worktree", "list", "--porcelain"], cwd=repo_path)
|
|
121
|
+
resolved = str(p.resolve())
|
|
122
|
+
if worktrees and resolved not in worktrees:
|
|
123
|
+
# The current directory isn't a registered worktree of its own
|
|
124
|
+
# .git/ — likely a stale checkout that was wiped+repopulated outside
|
|
125
|
+
# git's awareness. This is the LED-1401 stranded-sibling signature.
|
|
126
|
+
return {
|
|
127
|
+
"ok": False,
|
|
128
|
+
"reason": "stranded_worktree",
|
|
129
|
+
"detail": (
|
|
130
|
+
f"{resolved} is not a registered worktree of its own .git/. "
|
|
131
|
+
f"Run `git worktree list` to inspect; re-clone fresh if "
|
|
132
|
+
f"orphaned: `git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
|
|
133
|
+
),
|
|
134
|
+
"path": repo_path,
|
|
135
|
+
"worktree_list": worktrees,
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
# LED-1401 corrupt-status signature: every file appears as BOTH `D` and `??`
|
|
139
|
+
# (deleted from index, untracked on disk). Sample the first 50 status lines
|
|
140
|
+
# — if >=10 distinct paths show this pattern, it's pathological.
|
|
141
|
+
status = _run(["git", "status", "--porcelain=v1"], cwd=repo_path, timeout=3.0)
|
|
142
|
+
if status:
|
|
143
|
+
lines = status.split("\n")[:200]
|
|
144
|
+
deleted_paths = set()
|
|
145
|
+
untracked_paths = set()
|
|
146
|
+
for line in lines:
|
|
147
|
+
if len(line) < 4:
|
|
148
|
+
continue
|
|
149
|
+
xy = line[:2]
|
|
150
|
+
path = line[3:].lstrip()
|
|
151
|
+
if "D" in xy:
|
|
152
|
+
deleted_paths.add(path)
|
|
153
|
+
if xy == "??":
|
|
154
|
+
untracked_paths.add(path)
|
|
155
|
+
overlap = deleted_paths & untracked_paths
|
|
156
|
+
if len(overlap) >= 10:
|
|
157
|
+
return {
|
|
158
|
+
"ok": False,
|
|
159
|
+
"reason": "corrupt_status",
|
|
160
|
+
"detail": (
|
|
161
|
+
f"{repo_path} shows >={len(overlap)} files as both deleted-from-index "
|
|
162
|
+
f"AND untracked-on-disk — the worktree was wiped and repopulated "
|
|
163
|
+
f"outside git's awareness (LED-1401 signature). Re-clone fresh: "
|
|
164
|
+
f"`git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
|
|
165
|
+
),
|
|
166
|
+
"path": repo_path,
|
|
167
|
+
"overlap_count": len(overlap),
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
"ok": True,
|
|
172
|
+
"reason": "healthy",
|
|
173
|
+
"detail": "git worktree is healthy",
|
|
174
|
+
"path": repo_path,
|
|
175
|
+
}
|
|
@@ -64,12 +64,29 @@ _CREDENTIAL_FALSE_POSITIVES = re.compile(
|
|
|
64
64
|
r"sk-ant-demo|sk-demo|AIza-demo|xai-demo|demo[_-]?(?:key|secret|token)|"
|
|
65
65
|
r"-demo['\"]|"
|
|
66
66
|
# Function-call RHS (reading from parsed JSON, env, getters, slicing strings)
|
|
67
|
-
r"json\.loads|\.read_text\(|\.slice\(|"
|
|
67
|
+
r"json\.loads|\.read_text\(|\.slice\(|\.split\(|"
|
|
68
68
|
r"\w+\.get\(|token\s*=\s*_make_token|"
|
|
69
69
|
# RHS that is a parameter reference like token=tokens.get("access_token"...
|
|
70
70
|
r"=\s*\w+\.get\(|"
|
|
71
71
|
# Dict index dereference: token_data["token"], result["secret"], etc.
|
|
72
72
|
r"_data\[|_result\[|"
|
|
73
|
+
# LED-1278 (b): function-call RHS with leading underscore (e.g. _load_token())
|
|
74
|
+
r"=\s*_\w+\(|"
|
|
75
|
+
# LED-1278 (c) [2026-05-22]: naked function-call RHS without leading
|
|
76
|
+
# underscore. Matches the common shape `const token = readCurrentToken();`
|
|
77
|
+
# in bin/delimit-cli.js — the token is being READ from somewhere, not
|
|
78
|
+
# hardcoded. Tightened with `\s*;?\s*$` to require end-of-statement so
|
|
79
|
+
# we don't suppress `token = realLeak("AKIAIOSFODNN7EXAMPLE")` shapes
|
|
80
|
+
# where the call argument is itself a literal secret.
|
|
81
|
+
r"=\s*\w+\([^)]{0,40}\)\s*;?\s*$|"
|
|
82
|
+
# LED-1278 (c) [2026-05-22]: parenthesized property-access fallback chain
|
|
83
|
+
# like `const token = (options.token || process.env.TOKEN)`. Common shape
|
|
84
|
+
# for CLI option parsing where the RHS reads from a known input source,
|
|
85
|
+
# never a literal. Requires the open-paren to be followed by a word + dot
|
|
86
|
+
# (property access) so we don't match `token = ("AKIA..." || "")` shapes.
|
|
87
|
+
r"=\s*\(\s*\w+\.\w+|"
|
|
88
|
+
# LED-1278 (b): documentation/example placeholders in angle brackets
|
|
89
|
+
r"<[^>]*?(?:long|same|random|your|placeholder|example|secret|token|key)[^>]*?>|"
|
|
73
90
|
# Bare `if not <var>:` and similar control-flow lines that mention
|
|
74
91
|
# the credential variable name but contain no value.
|
|
75
92
|
r"if\s+not\s+\w+:|"
|
|
@@ -98,6 +115,82 @@ SCAN_EXTENSIONS = {".py", ".js", ".ts", ".jsx", ".tsx", ".go", ".rb", ".java", "
|
|
|
98
115
|
# Skip directories
|
|
99
116
|
SKIP_DIRS = {"node_modules", ".git", "__pycache__", ".venv", "venv", ".tox", "dist", "build", ".next", ".nuxt", "vendor"}
|
|
100
117
|
|
|
118
|
+
# LED-1278 (a): test-tree path patterns excluded by default. The scanner walks # nosec
|
|
119
|
+
# test directories with prod rules, so test fixtures (placeholder tokens, # nosec
|
|
120
|
+
# trivial JWT bodies, code-injection demos) get surfaced as critical findings # nosec
|
|
121
|
+
# on every audit. Default behavior now skips these; callers can pass # nosec
|
|
122
|
+
# include_tests=True to scan everything. # nosec
|
|
123
|
+
TEST_PATH_PATTERNS = (
|
|
124
|
+
re.compile(r"(?:^|[\\/])tests?[\\/]"), # tests/ or test/ as a path component
|
|
125
|
+
re.compile(r"(?:^|[\\/])__tests__[\\/]"), # JS __tests__/
|
|
126
|
+
re.compile(r"(?:^|[\\/])spec[\\/]"), # spec/
|
|
127
|
+
re.compile(r"(?:^|[\\/])fixtures?[\\/]"), # fixtures/ or fixture/
|
|
128
|
+
re.compile(r"(?:^|[\\/])test_[^\\/]+\.py$"), # test_*.py
|
|
129
|
+
re.compile(r"_test\.(?:py|go|rb|java)$"), # *_test.py / *_test.go
|
|
130
|
+
re.compile(r"\.(?:test|spec)\.(?:js|jsx|ts|tsx|mjs|cjs)$"), # *.test.js, *.spec.tsx
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _is_test_path(path: str) -> bool:
|
|
135
|
+
"""Return True if path looks like a test file/dir per TEST_PATH_PATTERNS."""
|
|
136
|
+
s = str(path)
|
|
137
|
+
return any(pat.search(s) for pat in TEST_PATH_PATTERNS)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# LED-1278 (b): well-known dummy / fixture values. Even when include_tests=True
|
|
141
|
+
# (or when production code intentionally embeds canonical placeholders in
|
|
142
|
+
# docs/examples), these specific shapes should be suppressed as `info` log
|
|
143
|
+
# lines, not raised as critical findings.
|
|
144
|
+
#
|
|
145
|
+
# Each entry: (regex applied to the matched secret text, human label).
|
|
146
|
+
KNOWN_DUMMY_PATTERNS = [
|
|
147
|
+
# AWS canonical dummy from official AWS documentation.
|
|
148
|
+
(re.compile(r"AKIAIOSFODNN7EXAMPLE"), "aws_doc_dummy"),
|
|
149
|
+
# GitHub token placeholders that use the printable-alphabet pattern.
|
|
150
|
+
(re.compile(r"^gh[pousr]_ABCDEFGHIJKLMNOPQRSTUVWXYZ", re.IGNORECASE), "github_alphabet_dummy"),
|
|
151
|
+
# Slack tokens with the leading 1234567890 sequence.
|
|
152
|
+
(re.compile(r"^xox[baprs]-1234567890-"), "slack_seq_dummy"),
|
|
153
|
+
# JWT with the unsigned-HS256 header + trivial body. We match the literal
|
|
154
|
+
# eyJhbGciOiJIUzI1NiJ9 header and check the payload separately below.
|
|
155
|
+
(re.compile(r"^eyJhbGciOiJIUzI1NiJ9\."), "jwt_hs256_trivial"),
|
|
156
|
+
# Generic dict-credential placeholder values: fake/test/dummy/example/etc.
|
|
157
|
+
(re.compile(r"['\"](?:fake|test|dummy|example|placeholder|stale|from-)[A-Za-z0-9_\-]*['\"]\s*$", re.IGNORECASE),
|
|
158
|
+
"generic_placeholder_value"),
|
|
159
|
+
# Provider test-key shapes: xai-key-123, google-key-7, claude-key-2 etc.
|
|
160
|
+
(re.compile(r"['\"](?:xai|google|claude|gem|grok|codex|ollama)[-_]?key[-_]?\d+['\"]\s*$", re.IGNORECASE),
|
|
161
|
+
"provider_test_key"),
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _looks_like_known_dummy(secret_name: str, matched_text: str) -> Optional[str]:
|
|
166
|
+
"""Return a label if matched_text is a known-dummy/fixture value, else None.
|
|
167
|
+
|
|
168
|
+
Used by the secret scanner to convert what would otherwise be a critical
|
|
169
|
+
finding into an `info`-level suppressed entry. Keeps the audit-trail
|
|
170
|
+
visible (so a future regression in the allowlist is detectable) while
|
|
171
|
+
eliminating the false-positive-storm noise.
|
|
172
|
+
|
|
173
|
+
For JWT, additionally checks that the body is the trivial `sub:1234567890`
|
|
174
|
+
payload — we don't want to suppress real signed JWTs that happen to use
|
|
175
|
+
HS256.
|
|
176
|
+
"""
|
|
177
|
+
for pattern, label in KNOWN_DUMMY_PATTERNS:
|
|
178
|
+
if pattern.search(matched_text):
|
|
179
|
+
if label == "jwt_hs256_trivial":
|
|
180
|
+
# Only treat as dummy if the payload is the canonical demo
|
|
181
|
+
# body (`sub: "1234567890"` or trivial abc123 segment).
|
|
182
|
+
# The JWT pattern produces something like:
|
|
183
|
+
# eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123def456ghi789
|
|
184
|
+
# The middle segment base64-decodes to {"sub":"1234567890"}.
|
|
185
|
+
if (
|
|
186
|
+
"eyJzdWIiOiIxMjM0NTY3ODkwIn0" in matched_text
|
|
187
|
+
or re.search(r"\.[A-Za-z0-9_-]*abc123[A-Za-z0-9_-]*$", matched_text)
|
|
188
|
+
):
|
|
189
|
+
return label
|
|
190
|
+
continue
|
|
191
|
+
return label
|
|
192
|
+
return None
|
|
193
|
+
|
|
101
194
|
|
|
102
195
|
def _run_cmd(cmd: List[str], timeout: int = 30, cwd: Optional[str] = None) -> Dict[str, Any]:
|
|
103
196
|
"""Run a command and return stdout, stderr, returncode.
|
|
@@ -144,8 +237,13 @@ def _bump_semver(version: str, bump: str) -> str:
|
|
|
144
237
|
return f"{major}.{minor}.{patch}"
|
|
145
238
|
|
|
146
239
|
|
|
147
|
-
def _scan_files(target: str) -> List[Path]:
|
|
148
|
-
"""Collect scannable source files under target.
|
|
240
|
+
def _scan_files(target: str, include_tests: bool = False) -> List[Path]:
|
|
241
|
+
"""Collect scannable source files under target.
|
|
242
|
+
|
|
243
|
+
LED-1278 (a): when include_tests=False (the new default), skip files that
|
|
244
|
+
match TEST_PATH_PATTERNS so test fixtures do not surface as findings.
|
|
245
|
+
Single-file targets are always scanned regardless (caller asked explicitly).
|
|
246
|
+
"""
|
|
149
247
|
root = Path(target).resolve()
|
|
150
248
|
files = []
|
|
151
249
|
if root.is_file():
|
|
@@ -154,10 +252,25 @@ def _scan_files(target: str) -> List[Path]:
|
|
|
154
252
|
return []
|
|
155
253
|
for dirpath, dirnames, filenames in os.walk(root, onerror=lambda _err: None):
|
|
156
254
|
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
|
|
255
|
+
if not include_tests:
|
|
256
|
+
# Prune obvious test directory names before recursing so we don't
|
|
257
|
+
# walk huge __tests__/ trees just to discard them later.
|
|
258
|
+
dirnames[:] = [
|
|
259
|
+
d for d in dirnames
|
|
260
|
+
if d not in ("tests", "test", "__tests__", "spec", "fixtures", "fixture")
|
|
261
|
+
]
|
|
157
262
|
for filename in filenames:
|
|
158
263
|
p = Path(dirpath) / filename
|
|
159
|
-
if p.suffix in SCAN_EXTENSIONS:
|
|
160
|
-
|
|
264
|
+
if p.suffix not in SCAN_EXTENSIONS:
|
|
265
|
+
continue
|
|
266
|
+
if not include_tests:
|
|
267
|
+
try:
|
|
268
|
+
rel = str(p.relative_to(root))
|
|
269
|
+
except ValueError:
|
|
270
|
+
rel = str(p)
|
|
271
|
+
if _is_test_path(rel):
|
|
272
|
+
continue
|
|
273
|
+
files.append(p)
|
|
161
274
|
# Cap to avoid scanning massive repos
|
|
162
275
|
if len(files) >= 5000:
|
|
163
276
|
return files
|
|
@@ -166,11 +279,26 @@ def _scan_files(target: str) -> List[Path]:
|
|
|
166
279
|
|
|
167
280
|
# ─── 5. security_audit ──────────────────────────────────────────────────
|
|
168
281
|
|
|
169
|
-
def security_audit(target: str = ".") -> Dict[str, Any]:
|
|
282
|
+
def security_audit(target: str = ".", include_tests: bool = False) -> Dict[str, Any]:
|
|
170
283
|
"""Audit security: dependency vulnerabilities + anti-patterns + secret detection.
|
|
171
284
|
|
|
172
285
|
Default: runs pip-audit/npm-audit, regex scans for secrets and dangerous patterns.
|
|
173
286
|
Optional upgrade: set SNYK_TOKEN or TRIVY_PATH for enhanced scanning.
|
|
287
|
+
|
|
288
|
+
LED-1278 fixes:
|
|
289
|
+
(a) include_tests defaults to False — test directories (tests/, __tests__/,
|
|
290
|
+
spec/, fixtures/, *_test.py, *.test.tsx, etc.) are skipped so
|
|
291
|
+
test fixtures don't get raised as critical production findings.
|
|
292
|
+
Pass include_tests=True to scan everything (legacy behavior).
|
|
293
|
+
(b) Well-known dummy/placeholder values (AWS canonical example,
|
|
294
|
+
alphabet-pattern GitHub tokens, leading-1234567890 Slack tokens,
|
|
295
|
+
trivial JWT, fake/test/dummy/placeholder dict values, provider
|
|
296
|
+
test-key shapes) are suppressed and recorded as `info`-severity
|
|
297
|
+
allowlist hits in `suppressed_findings` for audit visibility.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
target: Repository or file path to audit.
|
|
301
|
+
include_tests: When True, scan test directories (default False).
|
|
174
302
|
"""
|
|
175
303
|
target_path = Path(target).resolve()
|
|
176
304
|
if not target_path.exists():
|
|
@@ -179,6 +307,7 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
|
|
|
179
307
|
vulnerabilities = []
|
|
180
308
|
anti_patterns_found = []
|
|
181
309
|
secrets_found = []
|
|
310
|
+
suppressed_findings: List[Dict[str, Any]] = [] # LED-1278 (b): allowlist log
|
|
182
311
|
tools_used = []
|
|
183
312
|
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
|
|
184
313
|
|
|
@@ -284,8 +413,10 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
|
|
|
284
413
|
pass
|
|
285
414
|
|
|
286
415
|
# --- 2. Anti-pattern scan ---
|
|
287
|
-
files = _scan_files(target)
|
|
288
|
-
|
|
416
|
+
files = _scan_files(target, include_tests=include_tests)
|
|
417
|
+
scan_label = f"pattern-scanner ({len(files)} files"
|
|
418
|
+
scan_label += ", include_tests=True" if include_tests else ", tests excluded"
|
|
419
|
+
tools_used.append(scan_label + ")")
|
|
289
420
|
|
|
290
421
|
for fpath in files:
|
|
291
422
|
try:
|
|
@@ -305,6 +436,25 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
|
|
|
305
436
|
if secret_name in _FP_FILTERED and _CREDENTIAL_FALSE_POSITIVES.search(matched_text):
|
|
306
437
|
continue
|
|
307
438
|
line_num = content[:match.start()].count("\n") + 1
|
|
439
|
+
# LED-1278 (b): well-known dummy/placeholder values get
|
|
440
|
+
# suppressed to info-level rather than raised as critical.
|
|
441
|
+
# Logged in suppressed_findings so a future regression in the
|
|
442
|
+
# allowlist (e.g. real key matching by accident) is auditable.
|
|
443
|
+
dummy_label = _looks_like_known_dummy(secret_name, matched_text)
|
|
444
|
+
if dummy_label:
|
|
445
|
+
suppressed_findings.append({
|
|
446
|
+
"file": rel,
|
|
447
|
+
"line": line_num,
|
|
448
|
+
"type": secret_name,
|
|
449
|
+
"reason": dummy_label,
|
|
450
|
+
"severity": "info",
|
|
451
|
+
})
|
|
452
|
+
severity_counts["info"] += 1
|
|
453
|
+
logger.info(
|
|
454
|
+
"security_audit: suppressed known-dummy %s (%s) in %s:%d",
|
|
455
|
+
secret_name, dummy_label, rel, line_num,
|
|
456
|
+
)
|
|
457
|
+
continue
|
|
308
458
|
# Redact actual secret values in snippet output
|
|
309
459
|
snippet_raw = content[max(0, match.start() - 10):match.end() + 10].strip()[:80]
|
|
310
460
|
secrets_found.append({
|
|
@@ -358,6 +508,9 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
|
|
|
358
508
|
"anti_patterns": anti_patterns_found,
|
|
359
509
|
"secrets_detected": len(secrets_found),
|
|
360
510
|
"secrets": secrets_found[:20], # Cap output to avoid huge responses
|
|
511
|
+
"suppressed_findings": suppressed_findings[:20], # LED-1278 (b): allowlist audit log
|
|
512
|
+
"suppressed_count": len(suppressed_findings),
|
|
513
|
+
"include_tests": include_tests, # LED-1278 (a): expose scan scope
|
|
361
514
|
"env_in_git": env_in_git,
|
|
362
515
|
"severity_summary": severity_counts,
|
|
363
516
|
"tools_used": tools_used,
|
|
@@ -765,9 +918,9 @@ def release_plan(environment: str = "production", version: str = "", repository:
|
|
|
765
918
|
|
|
766
919
|
# Commits since last tag
|
|
767
920
|
if last_tag:
|
|
768
|
-
r = _run_cmd(["git", "log", f"{last_tag}..HEAD", "--
|
|
921
|
+
r = _run_cmd(["git", "log", f"{last_tag}..HEAD", "--format=%s"], cwd=cwd)
|
|
769
922
|
else:
|
|
770
|
-
r = _run_cmd(["git", "log", "--
|
|
923
|
+
r = _run_cmd(["git", "log", "--format=%s", "-50"], cwd=cwd)
|
|
771
924
|
commits = [line.strip() for line in r["stdout"].strip().split("\n") if line.strip()] if r["stdout"].strip() else []
|
|
772
925
|
result["commits_since_last_tag"] = len(commits)
|
|
773
926
|
result["commits"] = commits[:30] # Cap
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""LED-1415 — CLI subprocess contract.
|
|
2
|
+
|
|
3
|
+
The deliberation engine drives 4 model CLIs as subprocesses
|
|
4
|
+
(claude / codex / gemini / cursor) and treats their stdout as model
|
|
5
|
+
verdict text. Three classes of bug have surfaced in this pipeline:
|
|
6
|
+
|
|
7
|
+
1. Banner contamination — the Delimit governance shim leaks ASCII
|
|
8
|
+
art onto stdout instead of stderr (PR #154, fixed by LED-1428).
|
|
9
|
+
2. Empty/silent responses — CLI exits 0 but stdout is empty
|
|
10
|
+
(transient API issues, OOM, network blips). Caught by LED-1416's
|
|
11
|
+
retry state machine.
|
|
12
|
+
3. Schema drift — CLI changes its output shape between versions
|
|
13
|
+
(e.g., adds an auto-correction line at the top). Caught
|
|
14
|
+
reactively by failing deliberation panels.
|
|
15
|
+
|
|
16
|
+
This module holds the ONE contract that every CLI response must
|
|
17
|
+
satisfy + the ONE validator that enforces it. Both the per-CLI mock
|
|
18
|
+
tests (tests/test_cli_contract.py) AND the weekly real-CLI smoke
|
|
19
|
+
script (scripts/smoke_cli_contracts.py) call validate_cli_contract()
|
|
20
|
+
so the contract definition lives in exactly one place — extending
|
|
21
|
+
it doesn't require changing two places to remember.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import re
|
|
27
|
+
from dataclasses import dataclass, field
|
|
28
|
+
from typing import List, Optional
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# The 4 known CLIs the deliberation engine targets. cursor is included
|
|
32
|
+
# even though it's not yet installed in the dev environment — adding
|
|
33
|
+
# it to the contract surface now means the validator is ready when it
|
|
34
|
+
# lands; smoke skips when the binary isn't present.
|
|
35
|
+
KNOWN_CLI_NAMES = ("claude", "codex", "gemini", "cursor")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# Minimum scrubbed-response length we'll accept as "looks like a real
|
|
39
|
+
# model verdict" rather than "leftover garbage after banner strip."
|
|
40
|
+
# Calibrated against historical scrub-debug.jsonl entries: every real
|
|
41
|
+
# round-1/round-2 verdict from past deliberations was >= 60 chars;
|
|
42
|
+
# every banner-only contamination was < 30 chars. 30 is the cutoff
|
|
43
|
+
# the production scrubber already uses; keeping that here means the
|
|
44
|
+
# validator + the scrubber agree.
|
|
45
|
+
MIN_VERDICT_LEN = 30
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Patterns that signal "the response is contamination, not a verdict."
|
|
49
|
+
# Each gets the response REJECTED even if length and scrub passed.
|
|
50
|
+
_CONTAMINATION_MARKERS = (
|
|
51
|
+
re.compile(r"^\[scrub:\s*contaminated\b", re.IGNORECASE),
|
|
52
|
+
re.compile(r"^\[.+\bunavailable\b.+\bnot found in PATH\]", re.IGNORECASE),
|
|
53
|
+
re.compile(r"^\[.+\bskipped under INTERNAL_PYTEST_GUARD", re.IGNORECASE),
|
|
54
|
+
re.compile(r"^\[.+\btimed out after\b", re.IGNORECASE),
|
|
55
|
+
re.compile(r"^\[.+\breturned empty response\]", re.IGNORECASE),
|
|
56
|
+
re.compile(r"^\[.+\berror:.+\]\s*$", re.IGNORECASE),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# A response should contain at least ONE of these markers to be
|
|
61
|
+
# recognizable as a panel verdict. The deliberation engine prompts all
|
|
62
|
+
# models to end with `VERDICT: ...` so we expect to see it. Falling
|
|
63
|
+
# back: "AGREE" / "DISAGREE" / "REMEDIATE" / "AGREE WITH MODIFICATIONS"
|
|
64
|
+
# all appear in real responses even when the trailing VERDICT line is
|
|
65
|
+
# omitted by a chatty model.
|
|
66
|
+
_VERDICT_HINT_RE = re.compile(
|
|
67
|
+
r"\b(VERDICT:|AGREE|DISAGREE|REMEDIATE|APPROVE|REJECT)\b",
|
|
68
|
+
re.IGNORECASE,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class CliContractResult:
|
|
74
|
+
"""Outcome of validating one CLI's response.
|
|
75
|
+
|
|
76
|
+
`ok` is True iff every contract clause passed. `failures` is the
|
|
77
|
+
list of clauses that fired — the smoke script ntfys with this list
|
|
78
|
+
so the operator can see exactly what shape the regression took.
|
|
79
|
+
"""
|
|
80
|
+
cli: str
|
|
81
|
+
raw_len: int
|
|
82
|
+
scrubbed_len: int
|
|
83
|
+
ok: bool
|
|
84
|
+
failures: List[str] = field(default_factory=list)
|
|
85
|
+
preview: str = "" # First 200 chars of scrubbed text, for log readability
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def validate_cli_contract(
|
|
89
|
+
cli_name: str,
|
|
90
|
+
raw_stdout: str,
|
|
91
|
+
raw_stderr: str = "",
|
|
92
|
+
expect_verdict_hint: bool = True,
|
|
93
|
+
) -> CliContractResult:
|
|
94
|
+
"""Apply the per-CLI contract to one subprocess response.
|
|
95
|
+
|
|
96
|
+
Mirrors the EXACT production scrub path so the validator's view
|
|
97
|
+
matches what ai/deliberation.py's _call_cli sees. Failures append
|
|
98
|
+
a short reason string; an empty failures list means the response
|
|
99
|
+
is contract-clean.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
cli_name: which CLI produced this (claude/codex/gemini/cursor);
|
|
103
|
+
used in the failure messages.
|
|
104
|
+
raw_stdout: subprocess.stdout bytes decoded to str.
|
|
105
|
+
raw_stderr: subprocess.stderr bytes decoded to str. The
|
|
106
|
+
contract is permissive on stderr — banner output is
|
|
107
|
+
ALLOWED there (intentional shim behavior); but completely
|
|
108
|
+
empty stderr + completely empty stdout is suspicious.
|
|
109
|
+
expect_verdict_hint: when True, fail the response if it
|
|
110
|
+
doesn't contain at least one verdict marker. Mock tests
|
|
111
|
+
and the smoke script set this; tests of low-content
|
|
112
|
+
responses (e.g., a `--version` smoke) set False.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
CliContractResult with `ok`, `failures`, and a preview.
|
|
116
|
+
"""
|
|
117
|
+
# Import lazily so this module can be imported in a context where
|
|
118
|
+
# ai.deliberation isn't available (e.g., the smoke script when
|
|
119
|
+
# gateway code path changes).
|
|
120
|
+
failures: List[str] = []
|
|
121
|
+
try:
|
|
122
|
+
from ai.deliberation import _scrub_cli_output
|
|
123
|
+
scrubbed = _scrub_cli_output(raw_stdout, source=cli_name).strip()
|
|
124
|
+
except Exception as exc:
|
|
125
|
+
return CliContractResult(
|
|
126
|
+
cli=cli_name,
|
|
127
|
+
raw_len=len(raw_stdout),
|
|
128
|
+
scrubbed_len=0,
|
|
129
|
+
ok=False,
|
|
130
|
+
failures=[f"scrub_failed:{type(exc).__name__}:{str(exc)[:80]}"],
|
|
131
|
+
preview="",
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# 1. Contamination markers — if the scrubber returned one, fail.
|
|
135
|
+
for pat in _CONTAMINATION_MARKERS:
|
|
136
|
+
if pat.search(scrubbed):
|
|
137
|
+
failures.append(f"contamination_marker:{pat.pattern[:40]}")
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
# 2. Minimum length. Below MIN_VERDICT_LEN is almost certainly
|
|
141
|
+
# garbage even if scrub didn't tag it.
|
|
142
|
+
if len(scrubbed) < MIN_VERDICT_LEN and "contamination_marker" not in " ".join(failures):
|
|
143
|
+
failures.append(f"too_short:{len(scrubbed)}<{MIN_VERDICT_LEN}")
|
|
144
|
+
|
|
145
|
+
# 3. Verdict hint — at least one of VERDICT:/AGREE/DISAGREE/REMEDIATE/
|
|
146
|
+
# APPROVE/REJECT must appear. Skip when expect_verdict_hint=False.
|
|
147
|
+
if expect_verdict_hint and not _VERDICT_HINT_RE.search(scrubbed):
|
|
148
|
+
failures.append("no_verdict_hint")
|
|
149
|
+
|
|
150
|
+
# 4. Doesn't start with a known banner prefix (defense-in-depth on
|
|
151
|
+
# top of scrub). If a brand-new banner shape lands tomorrow that
|
|
152
|
+
# the scrubber doesn't know about, this should catch it.
|
|
153
|
+
if scrubbed.startswith("["):
|
|
154
|
+
# Bracketed prefix is almost always a tool-emitted status line
|
|
155
|
+
# (e.g. "[Delimit]" / "[claude error: ...]") not a model verdict.
|
|
156
|
+
if not any(scrubbed.lower().startswith(p) for p in (
|
|
157
|
+
"[delimit", "[scrub:", "[claude", "[codex", "[gemini", "[cursor",
|
|
158
|
+
)):
|
|
159
|
+
# Unknown bracketed prefix — surface for inspection
|
|
160
|
+
failures.append(f"unknown_bracketed_prefix:{scrubbed[:40]!r}")
|
|
161
|
+
|
|
162
|
+
return CliContractResult(
|
|
163
|
+
cli=cli_name,
|
|
164
|
+
raw_len=len(raw_stdout),
|
|
165
|
+
scrubbed_len=len(scrubbed),
|
|
166
|
+
ok=not failures,
|
|
167
|
+
failures=failures,
|
|
168
|
+
preview=scrubbed[:200],
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def format_contract_report(results: List[CliContractResult]) -> str:
|
|
173
|
+
"""Human-readable summary of N validation results for ntfy / logs."""
|
|
174
|
+
lines = []
|
|
175
|
+
n_ok = sum(1 for r in results if r.ok)
|
|
176
|
+
lines.append(f"CLI contract: {n_ok}/{len(results)} clean")
|
|
177
|
+
for r in results:
|
|
178
|
+
flag = "OK" if r.ok else "FAIL"
|
|
179
|
+
lines.append(f" [{flag}] {r.cli:8s} raw={r.raw_len}B scrubbed={r.scrubbed_len}B")
|
|
180
|
+
if not r.ok:
|
|
181
|
+
for f in r.failures:
|
|
182
|
+
lines.append(f" ↳ {f}")
|
|
183
|
+
if r.preview:
|
|
184
|
+
lines.append(f" preview: {r.preview[:100]!r}")
|
|
185
|
+
return "\n".join(lines)
|
package/gateway/ai/daemon.py
CHANGED
|
@@ -75,6 +75,7 @@ AUTO_PATTERNS = {
|
|
|
75
75
|
"test": ["test", "coverage", "smoke"],
|
|
76
76
|
"docs": ["docs", "documentation", "readme"],
|
|
77
77
|
"governance": ["governance", "policy", "compliance"],
|
|
78
|
+
"build": ["feat", "fix", "task", "implementation"],
|
|
78
79
|
}
|
|
79
80
|
|
|
80
81
|
|
|
@@ -263,6 +264,14 @@ def get_next_automatable_item(
|
|
|
263
264
|
return None
|
|
264
265
|
|
|
265
266
|
|
|
267
|
+
|
|
268
|
+
def _run_build(item_id: str, venture: str = "") -> dict:
|
|
269
|
+
"""Run the governed build loop for a specific item (LED-1146)."""
|
|
270
|
+
from ai.loop_engine import run_governed_iteration
|
|
271
|
+
# Use a persistent session for the daemon
|
|
272
|
+
session_id = "daemon-build-loop"
|
|
273
|
+
return run_governed_iteration(session_id=session_id)
|
|
274
|
+
|
|
266
275
|
def process_item(item: dict, log_path: Optional[Path] = None) -> dict:
|
|
267
276
|
"""Process a single ledger item by running the suggested tool.
|
|
268
277
|
|
|
@@ -293,6 +302,7 @@ def process_item(item: dict, log_path: Optional[Path] = None) -> dict:
|
|
|
293
302
|
"test": _run_test,
|
|
294
303
|
"governance": _run_governance,
|
|
295
304
|
"docs": _run_docs,
|
|
305
|
+
"build": _run_build,
|
|
296
306
|
}
|
|
297
307
|
|
|
298
308
|
runner = tool_map.get(tool)
|
|
@@ -20,11 +20,10 @@ Call via MCP: delimit_digest(action="run") or scheduled cron.
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
22
|
import json
|
|
23
|
-
import time
|
|
24
23
|
from collections import Counter
|
|
25
24
|
from datetime import datetime, timedelta, timezone
|
|
26
25
|
from pathlib import Path
|
|
27
|
-
from typing import Any, Dict
|
|
26
|
+
from typing import Any, Dict
|
|
28
27
|
|
|
29
28
|
DIGEST_DIR = Path.home() / ".delimit" / "digest"
|
|
30
29
|
LEDGER_DIR = Path.home() / ".delimit" / "ledger"
|