delimit-cli 4.5.13 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +48 -0
  2. package/README.md +9 -8
  3. package/bin/delimit-cli.js +179 -4
  4. package/bin/delimit-setup.js +46 -6
  5. package/gateway/ai/_compile_status.py +154 -0
  6. package/gateway/ai/agent_dispatch.py +41 -0
  7. package/gateway/ai/backends/git_health.py +175 -0
  8. package/gateway/ai/backends/tools_infra.py +163 -10
  9. package/gateway/ai/cli_contract.py +185 -0
  10. package/gateway/ai/daemon.py +10 -0
  11. package/gateway/ai/daily_digest.py +1 -2
  12. package/gateway/ai/delimit_daemon.py +67 -0
  13. package/gateway/ai/dispatch_gate.py +399 -0
  14. package/gateway/ai/governance.py +181 -0
  15. package/gateway/ai/heartbeat.py +290 -0
  16. package/gateway/ai/hot_reload.py +1 -2
  17. package/gateway/ai/led193_daemon/executor.py +9 -0
  18. package/gateway/ai/ledger_manager.py +90 -4
  19. package/gateway/ai/ledger_proof.py +127 -0
  20. package/gateway/ai/license.py +132 -47
  21. package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
  22. package/gateway/ai/license_core.pyi +1 -1
  23. package/gateway/ai/notify.py +39 -0
  24. package/gateway/ai/outreach_loop_daemon.py +349 -0
  25. package/gateway/ai/outreach_substantive.py +1437 -0
  26. package/gateway/ai/pro_tools.yaml +167 -0
  27. package/gateway/ai/reaper.py +70 -0
  28. package/gateway/ai/reddit_scanner.py +17 -6
  29. package/gateway/ai/sensing/schema.py +1 -1
  30. package/gateway/ai/sensing/signal_store.py +0 -1
  31. package/gateway/ai/server.py +5490 -1602
  32. package/gateway/ai/social_capability/fit_floor.py +114 -12
  33. package/gateway/ai/social_queue.py +166 -10
  34. package/gateway/ai/tdqs_lint.py +611 -0
  35. package/gateway/ai/tenant_auth.py +329 -0
  36. package/gateway/ai/tenant_data.py +339 -0
  37. package/gateway/ai/tenant_paths.py +150 -0
  38. package/gateway/ai/usage_allowlist.py +198 -0
  39. package/gateway/ai/workers/base.py +2 -2
  40. package/gateway/ai/workers/executor.py +32 -3
  41. package/gateway/ai/workers/outreach_drafter.py +0 -1
  42. package/gateway/ai/workers/pr_drafter.py +0 -1
  43. package/gateway/ai/x_ranker.py +12 -2
  44. package/gateway/core/json_schema_diff.py +25 -1
  45. package/lib/auth-signin.js +136 -0
  46. package/lib/auth-signout.js +169 -0
  47. package/lib/delimit-template.js +11 -0
  48. package/lib/migration-2092-banner.js +213 -0
  49. package/package.json +5 -2
  50. package/server.json +4 -4
  51. package/scripts/build-license-core.sh +0 -85
  52. package/scripts/security-check.sh +0 -66
  53. package/scripts/test-license-core-so.sh +0 -107
@@ -0,0 +1,175 @@
1
+ """Git worktree sanity checks (LED-1411).
2
+
3
+ Single source of truth for "is this directory a healthy git worktree?"
4
+ Used by delimit_test_smoke, delimit_deploy_plan, and delimit_evidence_collect
5
+ as a precheck before they trust ambient checkout state.
6
+
7
+ Background — LED-1403 / LED-1401 incident (2026-05-14):
8
+ `/home/delimit/npm-delimit/.git` was configured `bare = true` but had source
9
+ files alongside, AND a stranded sibling worktree at `/tmp/delimit-mcp-main`
10
+ where `git status` showed every file as both `D` and `??` (deleted from
11
+ index, untracked on disk). `delimit_test_smoke` ran against this corrupt
12
+ state and reported `attest-mcp Q2 3-tier exit codes` failures that did NOT
13
+ exist on real main. I almost shipped a "fix" for a non-bug (LED-1403,
14
+ closed `not_reproducible` after a fresh clone proved tests passed).
15
+
16
+ This module exists so the same class of phantom failure can't recur.
17
+ Precheck must:
18
+ - Add <100ms to caller startup (no network, no fetch)
19
+ - Emit a single actionable remediation line on failure
20
+ - Return a structured dict (callers may inline-handle or surface up)
21
+
22
+ Memory anchor: feedback_corrupted_worktree_phantom_failures.md
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import subprocess
28
+ from pathlib import Path
29
+ from typing import Any, Dict
30
+
31
+
32
+ def _run(cmd: list, cwd: str, timeout: float = 2.0) -> str:
33
+ """Run a git command with a tight timeout. Returns stdout stripped,
34
+ or empty string on any failure (intentional — caller decides what
35
+ constitutes a failure based on the structured result, not exceptions)."""
36
+ try:
37
+ return subprocess.check_output(
38
+ cmd,
39
+ cwd=cwd,
40
+ stderr=subprocess.DEVNULL,
41
+ timeout=timeout,
42
+ text=True,
43
+ ).strip()
44
+ except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError, OSError):
45
+ return ""
46
+
47
+
48
+ def check_worktree_sanity(repo_path: str) -> Dict[str, Any]:
49
+ """Verify the directory at `repo_path` is a healthy git worktree.
50
+
51
+ Checks (in order; cheapest first):
52
+ 1. Path exists and contains a `.git` directory (or file pointing to one)
53
+ 2. `git rev-parse --is-inside-work-tree` returns `true`
54
+ 3. `git rev-parse --is-bare-repository` returns `false`
55
+ 4. `git worktree list` includes the resolved CWD
56
+ 5. `git status --porcelain=v1` does NOT show every file as BOTH
57
+ deleted-from-index AND untracked (the LED-1401 corruption signature)
58
+
59
+ Returns a dict with:
60
+ - ok: bool — overall health
61
+ - reason: str — short failure code (`not_a_repo`, `bare_repo_with_files`,
62
+ `stranded_worktree`, `corrupt_status`) when ok=False, else `healthy`
63
+ - detail: str — actionable remediation message
64
+ - path: str — the path that was checked
65
+
66
+ Non-raising: errors return ok=False with a structured reason, so callers
67
+ can decide whether to halt or warn.
68
+ """
69
+ p = Path(repo_path)
70
+ if not p.exists() or not p.is_dir():
71
+ return {
72
+ "ok": False,
73
+ "reason": "not_a_directory",
74
+ "detail": f"{repo_path} is not a directory.",
75
+ "path": repo_path,
76
+ }
77
+
78
+ git_meta = p / ".git"
79
+ if not git_meta.exists():
80
+ return {
81
+ "ok": False,
82
+ "reason": "not_a_repo",
83
+ "detail": f"{repo_path} has no .git/ — not a git worktree.",
84
+ "path": repo_path,
85
+ }
86
+
87
+ # Bare-repo check first (LED-1401 signature: bare=true + source files
88
+ # alongside). Checked BEFORE is-inside-work-tree because a bare repo
89
+ # answers "false" to that question — we want the more informative
90
+ # bare-repo message to win when both conditions hold.
91
+ is_bare = _run(["git", "rev-parse", "--is-bare-repository"], cwd=repo_path)
92
+ if is_bare == "true":
93
+ return {
94
+ "ok": False,
95
+ "reason": "bare_repo_with_files",
96
+ "detail": (
97
+ f"{repo_path}/.git/ has `core.bare = true` but the directory "
98
+ f"holds source files. Tests against this state run stale "
99
+ f"code. Re-clone fresh: `git clone <url> /tmp/<repo>-fresh "
100
+ f"&& cd /tmp/<repo>-fresh`"
101
+ ),
102
+ "path": repo_path,
103
+ }
104
+
105
+ # Inside-work-tree check
106
+ inside = _run(["git", "rev-parse", "--is-inside-work-tree"], cwd=repo_path)
107
+ if inside != "true":
108
+ return {
109
+ "ok": False,
110
+ "reason": "not_a_worktree",
111
+ "detail": (
112
+ f"{repo_path} is not inside a git work tree "
113
+ f"(rev-parse --is-inside-work-tree returned {inside!r}). "
114
+ f"Re-clone fresh: `git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
115
+ ),
116
+ "path": repo_path,
117
+ }
118
+
119
+ # Worktree-list membership check (catches stranded sibling worktrees)
120
+ worktrees = _run(["git", "worktree", "list", "--porcelain"], cwd=repo_path)
121
+ resolved = str(p.resolve())
122
+ if worktrees and resolved not in worktrees:
123
+ # The current directory isn't a registered worktree of its own
124
+ # .git/ — likely a stale checkout that was wiped+repopulated outside
125
+ # git's awareness. This is the LED-1401 stranded-sibling signature.
126
+ return {
127
+ "ok": False,
128
+ "reason": "stranded_worktree",
129
+ "detail": (
130
+ f"{resolved} is not a registered worktree of its own .git/. "
131
+ f"Run `git worktree list` to inspect; re-clone fresh if "
132
+ f"orphaned: `git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
133
+ ),
134
+ "path": repo_path,
135
+ "worktree_list": worktrees,
136
+ }
137
+
138
+ # LED-1401 corrupt-status signature: every file appears as BOTH `D` and `??`
139
+ # (deleted from index, untracked on disk). Sample the first 50 status lines
140
+ # — if >=10 distinct paths show this pattern, it's pathological.
141
+ status = _run(["git", "status", "--porcelain=v1"], cwd=repo_path, timeout=3.0)
142
+ if status:
143
+ lines = status.split("\n")[:200]
144
+ deleted_paths = set()
145
+ untracked_paths = set()
146
+ for line in lines:
147
+ if len(line) < 4:
148
+ continue
149
+ xy = line[:2]
150
+ path = line[3:].lstrip()
151
+ if "D" in xy:
152
+ deleted_paths.add(path)
153
+ if xy == "??":
154
+ untracked_paths.add(path)
155
+ overlap = deleted_paths & untracked_paths
156
+ if len(overlap) >= 10:
157
+ return {
158
+ "ok": False,
159
+ "reason": "corrupt_status",
160
+ "detail": (
161
+ f"{repo_path} shows >={len(overlap)} files as both deleted-from-index "
162
+ f"AND untracked-on-disk — the worktree was wiped and repopulated "
163
+ f"outside git's awareness (LED-1401 signature). Re-clone fresh: "
164
+ f"`git clone <url> /tmp/<repo>-fresh && cd /tmp/<repo>-fresh`"
165
+ ),
166
+ "path": repo_path,
167
+ "overlap_count": len(overlap),
168
+ }
169
+
170
+ return {
171
+ "ok": True,
172
+ "reason": "healthy",
173
+ "detail": "git worktree is healthy",
174
+ "path": repo_path,
175
+ }
@@ -64,12 +64,29 @@ _CREDENTIAL_FALSE_POSITIVES = re.compile(
64
64
  r"sk-ant-demo|sk-demo|AIza-demo|xai-demo|demo[_-]?(?:key|secret|token)|"
65
65
  r"-demo['\"]|"
66
66
  # Function-call RHS (reading from parsed JSON, env, getters, slicing strings)
67
- r"json\.loads|\.read_text\(|\.slice\(|"
67
+ r"json\.loads|\.read_text\(|\.slice\(|\.split\(|"
68
68
  r"\w+\.get\(|token\s*=\s*_make_token|"
69
69
  # RHS that is a parameter reference like token=tokens.get("access_token"...
70
70
  r"=\s*\w+\.get\(|"
71
71
  # Dict index dereference: token_data["token"], result["secret"], etc.
72
72
  r"_data\[|_result\[|"
73
+ # LED-1278 (b): function-call RHS with leading underscore (e.g. _load_token())
74
+ r"=\s*_\w+\(|"
75
+ # LED-1278 (c) [2026-05-22]: naked function-call RHS without leading
76
+ # underscore. Matches the common shape `const token = readCurrentToken();`
77
+ # in bin/delimit-cli.js — the token is being READ from somewhere, not
78
+ # hardcoded. Tightened with `\s*;?\s*$` to require end-of-statement so
79
+ # we don't suppress `token = realLeak("AKIAIOSFODNN7EXAMPLE")` shapes
80
+ # where the call argument is itself a literal secret.
81
+ r"=\s*\w+\([^)]{0,40}\)\s*;?\s*$|"
82
+ # LED-1278 (c) [2026-05-22]: parenthesized property-access fallback chain
83
+ # like `const token = (options.token || process.env.TOKEN)`. Common shape
84
+ # for CLI option parsing where the RHS reads from a known input source,
85
+ # never a literal. Requires the open-paren to be followed by a word + dot
86
+ # (property access) so we don't match `token = ("AKIA..." || "")` shapes.
87
+ r"=\s*\(\s*\w+\.\w+|"
88
+ # LED-1278 (b): documentation/example placeholders in angle brackets
89
+ r"<[^>]*?(?:long|same|random|your|placeholder|example|secret|token|key)[^>]*?>|"
73
90
  # Bare `if not <var>:` and similar control-flow lines that mention
74
91
  # the credential variable name but contain no value.
75
92
  r"if\s+not\s+\w+:|"
@@ -98,6 +115,82 @@ SCAN_EXTENSIONS = {".py", ".js", ".ts", ".jsx", ".tsx", ".go", ".rb", ".java", "
98
115
  # Skip directories
99
116
  SKIP_DIRS = {"node_modules", ".git", "__pycache__", ".venv", "venv", ".tox", "dist", "build", ".next", ".nuxt", "vendor"}
100
117
 
118
+ # LED-1278 (a): test-tree path patterns excluded by default. The scanner walks # nosec
119
+ # test directories with prod rules, so test fixtures (placeholder tokens, # nosec
120
+ # trivial JWT bodies, code-injection demos) get surfaced as critical findings # nosec
121
+ # on every audit. Default behavior now skips these; callers can pass # nosec
122
+ # include_tests=True to scan everything. # nosec
123
+ TEST_PATH_PATTERNS = (
124
+ re.compile(r"(?:^|[\\/])tests?[\\/]"), # tests/ or test/ as a path component
125
+ re.compile(r"(?:^|[\\/])__tests__[\\/]"), # JS __tests__/
126
+ re.compile(r"(?:^|[\\/])spec[\\/]"), # spec/
127
+ re.compile(r"(?:^|[\\/])fixtures?[\\/]"), # fixtures/ or fixture/
128
+ re.compile(r"(?:^|[\\/])test_[^\\/]+\.py$"), # test_*.py
129
+ re.compile(r"_test\.(?:py|go|rb|java)$"), # *_test.py / *_test.go
130
+ re.compile(r"\.(?:test|spec)\.(?:js|jsx|ts|tsx|mjs|cjs)$"), # *.test.js, *.spec.tsx
131
+ )
132
+
133
+
134
+ def _is_test_path(path: str) -> bool:
135
+ """Return True if path looks like a test file/dir per TEST_PATH_PATTERNS."""
136
+ s = str(path)
137
+ return any(pat.search(s) for pat in TEST_PATH_PATTERNS)
138
+
139
+
140
+ # LED-1278 (b): well-known dummy / fixture values. Even when include_tests=True
141
+ # (or when production code intentionally embeds canonical placeholders in
142
+ # docs/examples), these specific shapes should be suppressed as `info` log
143
+ # lines, not raised as critical findings.
144
+ #
145
+ # Each entry: (regex applied to the matched secret text, human label).
146
+ KNOWN_DUMMY_PATTERNS = [
147
+ # AWS canonical dummy from official AWS documentation.
148
+ (re.compile(r"AKIAIOSFODNN7EXAMPLE"), "aws_doc_dummy"),
149
+ # GitHub token placeholders that use the printable-alphabet pattern.
150
+ (re.compile(r"^gh[pousr]_ABCDEFGHIJKLMNOPQRSTUVWXYZ", re.IGNORECASE), "github_alphabet_dummy"),
151
+ # Slack tokens with the leading 1234567890 sequence.
152
+ (re.compile(r"^xox[baprs]-1234567890-"), "slack_seq_dummy"),
153
+ # JWT with the unsigned-HS256 header + trivial body. We match the literal
154
+ # eyJhbGciOiJIUzI1NiJ9 header and check the payload separately below.
155
+ (re.compile(r"^eyJhbGciOiJIUzI1NiJ9\."), "jwt_hs256_trivial"),
156
+ # Generic dict-credential placeholder values: fake/test/dummy/example/etc.
157
+ (re.compile(r"['\"](?:fake|test|dummy|example|placeholder|stale|from-)[A-Za-z0-9_\-]*['\"]\s*$", re.IGNORECASE),
158
+ "generic_placeholder_value"),
159
+ # Provider test-key shapes: xai-key-123, google-key-7, claude-key-2 etc.
160
+ (re.compile(r"['\"](?:xai|google|claude|gem|grok|codex|ollama)[-_]?key[-_]?\d+['\"]\s*$", re.IGNORECASE),
161
+ "provider_test_key"),
162
+ ]
163
+
164
+
165
+ def _looks_like_known_dummy(secret_name: str, matched_text: str) -> Optional[str]:
166
+ """Return a label if matched_text is a known-dummy/fixture value, else None.
167
+
168
+ Used by the secret scanner to convert what would otherwise be a critical
169
+ finding into an `info`-level suppressed entry. Keeps the audit-trail
170
+ visible (so a future regression in the allowlist is detectable) while
171
+ eliminating the false-positive-storm noise.
172
+
173
+ For JWT, additionally checks that the body is the trivial `sub:1234567890`
174
+ payload — we don't want to suppress real signed JWTs that happen to use
175
+ HS256.
176
+ """
177
+ for pattern, label in KNOWN_DUMMY_PATTERNS:
178
+ if pattern.search(matched_text):
179
+ if label == "jwt_hs256_trivial":
180
+ # Only treat as dummy if the payload is the canonical demo
181
+ # body (`sub: "1234567890"` or trivial abc123 segment).
182
+ # The JWT pattern produces something like:
183
+ # eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123def456ghi789
184
+ # The middle segment base64-decodes to {"sub":"1234567890"}.
185
+ if (
186
+ "eyJzdWIiOiIxMjM0NTY3ODkwIn0" in matched_text
187
+ or re.search(r"\.[A-Za-z0-9_-]*abc123[A-Za-z0-9_-]*$", matched_text)
188
+ ):
189
+ return label
190
+ continue
191
+ return label
192
+ return None
193
+
101
194
 
102
195
  def _run_cmd(cmd: List[str], timeout: int = 30, cwd: Optional[str] = None) -> Dict[str, Any]:
103
196
  """Run a command and return stdout, stderr, returncode.
@@ -144,8 +237,13 @@ def _bump_semver(version: str, bump: str) -> str:
144
237
  return f"{major}.{minor}.{patch}"
145
238
 
146
239
 
147
- def _scan_files(target: str) -> List[Path]:
148
- """Collect scannable source files under target."""
240
+ def _scan_files(target: str, include_tests: bool = False) -> List[Path]:
241
+ """Collect scannable source files under target.
242
+
243
+ LED-1278 (a): when include_tests=False (the new default), skip files that
244
+ match TEST_PATH_PATTERNS so test fixtures do not surface as findings.
245
+ Single-file targets are always scanned regardless (caller asked explicitly).
246
+ """
149
247
  root = Path(target).resolve()
150
248
  files = []
151
249
  if root.is_file():
@@ -154,10 +252,25 @@ def _scan_files(target: str) -> List[Path]:
154
252
  return []
155
253
  for dirpath, dirnames, filenames in os.walk(root, onerror=lambda _err: None):
156
254
  dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
255
+ if not include_tests:
256
+ # Prune obvious test directory names before recursing so we don't
257
+ # walk huge __tests__/ trees just to discard them later.
258
+ dirnames[:] = [
259
+ d for d in dirnames
260
+ if d not in ("tests", "test", "__tests__", "spec", "fixtures", "fixture")
261
+ ]
157
262
  for filename in filenames:
158
263
  p = Path(dirpath) / filename
159
- if p.suffix in SCAN_EXTENSIONS:
160
- files.append(p)
264
+ if p.suffix not in SCAN_EXTENSIONS:
265
+ continue
266
+ if not include_tests:
267
+ try:
268
+ rel = str(p.relative_to(root))
269
+ except ValueError:
270
+ rel = str(p)
271
+ if _is_test_path(rel):
272
+ continue
273
+ files.append(p)
161
274
  # Cap to avoid scanning massive repos
162
275
  if len(files) >= 5000:
163
276
  return files
@@ -166,11 +279,26 @@ def _scan_files(target: str) -> List[Path]:
166
279
 
167
280
  # ─── 5. security_audit ──────────────────────────────────────────────────
168
281
 
169
- def security_audit(target: str = ".") -> Dict[str, Any]:
282
+ def security_audit(target: str = ".", include_tests: bool = False) -> Dict[str, Any]:
170
283
  """Audit security: dependency vulnerabilities + anti-patterns + secret detection.
171
284
 
172
285
  Default: runs pip-audit/npm-audit, regex scans for secrets and dangerous patterns.
173
286
  Optional upgrade: set SNYK_TOKEN or TRIVY_PATH for enhanced scanning.
287
+
288
+ LED-1278 fixes:
289
+ (a) include_tests defaults to False — test directories (tests/, __tests__/,
290
+ spec/, fixtures/, *_test.py, *.test.tsx, etc.) are skipped so
291
+ test fixtures don't get raised as critical production findings.
292
+ Pass include_tests=True to scan everything (legacy behavior).
293
+ (b) Well-known dummy/placeholder values (AWS canonical example,
294
+ alphabet-pattern GitHub tokens, leading-1234567890 Slack tokens,
295
+ trivial JWT, fake/test/dummy/placeholder dict values, provider
296
+ test-key shapes) are suppressed and recorded as `info`-severity
297
+ allowlist hits in `suppressed_findings` for audit visibility.
298
+
299
+ Args:
300
+ target: Repository or file path to audit.
301
+ include_tests: When True, scan test directories (default False).
174
302
  """
175
303
  target_path = Path(target).resolve()
176
304
  if not target_path.exists():
@@ -179,6 +307,7 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
179
307
  vulnerabilities = []
180
308
  anti_patterns_found = []
181
309
  secrets_found = []
310
+ suppressed_findings: List[Dict[str, Any]] = [] # LED-1278 (b): allowlist log
182
311
  tools_used = []
183
312
  severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
184
313
 
@@ -284,8 +413,10 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
284
413
  pass
285
414
 
286
415
  # --- 2. Anti-pattern scan ---
287
- files = _scan_files(target)
288
- tools_used.append(f"pattern-scanner ({len(files)} files)")
416
+ files = _scan_files(target, include_tests=include_tests)
417
+ scan_label = f"pattern-scanner ({len(files)} files"
418
+ scan_label += ", include_tests=True" if include_tests else ", tests excluded"
419
+ tools_used.append(scan_label + ")")
289
420
 
290
421
  for fpath in files:
291
422
  try:
@@ -305,6 +436,25 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
305
436
  if secret_name in _FP_FILTERED and _CREDENTIAL_FALSE_POSITIVES.search(matched_text):
306
437
  continue
307
438
  line_num = content[:match.start()].count("\n") + 1
439
+ # LED-1278 (b): well-known dummy/placeholder values get
440
+ # suppressed to info-level rather than raised as critical.
441
+ # Logged in suppressed_findings so a future regression in the
442
+ # allowlist (e.g. real key matching by accident) is auditable.
443
+ dummy_label = _looks_like_known_dummy(secret_name, matched_text)
444
+ if dummy_label:
445
+ suppressed_findings.append({
446
+ "file": rel,
447
+ "line": line_num,
448
+ "type": secret_name,
449
+ "reason": dummy_label,
450
+ "severity": "info",
451
+ })
452
+ severity_counts["info"] += 1
453
+ logger.info(
454
+ "security_audit: suppressed known-dummy %s (%s) in %s:%d",
455
+ secret_name, dummy_label, rel, line_num,
456
+ )
457
+ continue
308
458
  # Redact actual secret values in snippet output
309
459
  snippet_raw = content[max(0, match.start() - 10):match.end() + 10].strip()[:80]
310
460
  secrets_found.append({
@@ -358,6 +508,9 @@ def security_audit(target: str = ".") -> Dict[str, Any]:
358
508
  "anti_patterns": anti_patterns_found,
359
509
  "secrets_detected": len(secrets_found),
360
510
  "secrets": secrets_found[:20], # Cap output to avoid huge responses
511
+ "suppressed_findings": suppressed_findings[:20], # LED-1278 (b): allowlist audit log
512
+ "suppressed_count": len(suppressed_findings),
513
+ "include_tests": include_tests, # LED-1278 (a): expose scan scope
361
514
  "env_in_git": env_in_git,
362
515
  "severity_summary": severity_counts,
363
516
  "tools_used": tools_used,
@@ -765,9 +918,9 @@ def release_plan(environment: str = "production", version: str = "", repository:
765
918
 
766
919
  # Commits since last tag
767
920
  if last_tag:
768
- r = _run_cmd(["git", "log", f"{last_tag}..HEAD", "--oneline", "--no-decorate"], cwd=cwd)
921
+ r = _run_cmd(["git", "log", f"{last_tag}..HEAD", "--format=%s"], cwd=cwd)
769
922
  else:
770
- r = _run_cmd(["git", "log", "--oneline", "--no-decorate", "-50"], cwd=cwd)
923
+ r = _run_cmd(["git", "log", "--format=%s", "-50"], cwd=cwd)
771
924
  commits = [line.strip() for line in r["stdout"].strip().split("\n") if line.strip()] if r["stdout"].strip() else []
772
925
  result["commits_since_last_tag"] = len(commits)
773
926
  result["commits"] = commits[:30] # Cap
@@ -0,0 +1,185 @@
1
+ """LED-1415 — CLI subprocess contract.
2
+
3
+ The deliberation engine drives 4 model CLIs as subprocesses
4
+ (claude / codex / gemini / cursor) and treats their stdout as model
5
+ verdict text. Three classes of bug have surfaced in this pipeline:
6
+
7
+ 1. Banner contamination — the Delimit governance shim leaks ASCII
8
+ art onto stdout instead of stderr (PR #154, fixed by LED-1428).
9
+ 2. Empty/silent responses — CLI exits 0 but stdout is empty
10
+ (transient API issues, OOM, network blips). Caught by LED-1416's
11
+ retry state machine.
12
+ 3. Schema drift — CLI changes its output shape between versions
13
+ (e.g., adds an auto-correction line at the top). Caught
14
+ reactively by failing deliberation panels.
15
+
16
+ This module holds the ONE contract that every CLI response must
17
+ satisfy + the ONE validator that enforces it. Both the per-CLI mock
18
+ tests (tests/test_cli_contract.py) AND the weekly real-CLI smoke
19
+ script (scripts/smoke_cli_contracts.py) call validate_cli_contract()
20
+ so the contract definition lives in exactly one place — extending
21
+ it doesn't require changing two places to remember.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import re
27
+ from dataclasses import dataclass, field
28
+ from typing import List, Optional
29
+
30
+
31
+ # The 4 known CLIs the deliberation engine targets. cursor is included
32
+ # even though it's not yet installed in the dev environment — adding
33
+ # it to the contract surface now means the validator is ready when it
34
+ # lands; smoke skips when the binary isn't present.
35
+ KNOWN_CLI_NAMES = ("claude", "codex", "gemini", "cursor")
36
+
37
+
38
+ # Minimum scrubbed-response length we'll accept as "looks like a real
39
+ # model verdict" rather than "leftover garbage after banner strip."
40
+ # Calibrated against historical scrub-debug.jsonl entries: every real
41
+ # round-1/round-2 verdict from past deliberations was >= 60 chars;
42
+ # every banner-only contamination was < 30 chars. 30 is the cutoff
43
+ # the production scrubber already uses; keeping that here means the
44
+ # validator + the scrubber agree.
45
+ MIN_VERDICT_LEN = 30
46
+
47
+
48
+ # Patterns that signal "the response is contamination, not a verdict."
49
+ # Each gets the response REJECTED even if length and scrub passed.
50
+ _CONTAMINATION_MARKERS = (
51
+ re.compile(r"^\[scrub:\s*contaminated\b", re.IGNORECASE),
52
+ re.compile(r"^\[.+\bunavailable\b.+\bnot found in PATH\]", re.IGNORECASE),
53
+ re.compile(r"^\[.+\bskipped under INTERNAL_PYTEST_GUARD", re.IGNORECASE),
54
+ re.compile(r"^\[.+\btimed out after\b", re.IGNORECASE),
55
+ re.compile(r"^\[.+\breturned empty response\]", re.IGNORECASE),
56
+ re.compile(r"^\[.+\berror:.+\]\s*$", re.IGNORECASE),
57
+ )
58
+
59
+
60
+ # A response should contain at least ONE of these markers to be
61
+ # recognizable as a panel verdict. The deliberation engine prompts all
62
+ # models to end with `VERDICT: ...` so we expect to see it. Falling
63
+ # back: "AGREE" / "DISAGREE" / "REMEDIATE" / "AGREE WITH MODIFICATIONS"
64
+ # all appear in real responses even when the trailing VERDICT line is
65
+ # omitted by a chatty model.
66
+ _VERDICT_HINT_RE = re.compile(
67
+ r"\b(VERDICT:|AGREE|DISAGREE|REMEDIATE|APPROVE|REJECT)\b",
68
+ re.IGNORECASE,
69
+ )
70
+
71
+
72
+ @dataclass
73
+ class CliContractResult:
74
+ """Outcome of validating one CLI's response.
75
+
76
+ `ok` is True iff every contract clause passed. `failures` is the
77
+ list of clauses that fired — the smoke script ntfys with this list
78
+ so the operator can see exactly what shape the regression took.
79
+ """
80
+ cli: str
81
+ raw_len: int
82
+ scrubbed_len: int
83
+ ok: bool
84
+ failures: List[str] = field(default_factory=list)
85
+ preview: str = "" # First 200 chars of scrubbed text, for log readability
86
+
87
+
88
+ def validate_cli_contract(
89
+ cli_name: str,
90
+ raw_stdout: str,
91
+ raw_stderr: str = "",
92
+ expect_verdict_hint: bool = True,
93
+ ) -> CliContractResult:
94
+ """Apply the per-CLI contract to one subprocess response.
95
+
96
+ Mirrors the EXACT production scrub path so the validator's view
97
+ matches what ai/deliberation.py's _call_cli sees. Failures append
98
+ a short reason string; an empty failures list means the response
99
+ is contract-clean.
100
+
101
+ Args:
102
+ cli_name: which CLI produced this (claude/codex/gemini/cursor);
103
+ used in the failure messages.
104
+ raw_stdout: subprocess.stdout bytes decoded to str.
105
+ raw_stderr: subprocess.stderr bytes decoded to str. The
106
+ contract is permissive on stderr — banner output is
107
+ ALLOWED there (intentional shim behavior); but completely
108
+ empty stderr + completely empty stdout is suspicious.
109
+ expect_verdict_hint: when True, fail the response if it
110
+ doesn't contain at least one verdict marker. Mock tests
111
+ and the smoke script set this; tests of low-content
112
+ responses (e.g., a `--version` smoke) set False.
113
+
114
+ Returns:
115
+ CliContractResult with `ok`, `failures`, and a preview.
116
+ """
117
+ # Import lazily so this module can be imported in a context where
118
+ # ai.deliberation isn't available (e.g., the smoke script when
119
+ # gateway code path changes).
120
+ failures: List[str] = []
121
+ try:
122
+ from ai.deliberation import _scrub_cli_output
123
+ scrubbed = _scrub_cli_output(raw_stdout, source=cli_name).strip()
124
+ except Exception as exc:
125
+ return CliContractResult(
126
+ cli=cli_name,
127
+ raw_len=len(raw_stdout),
128
+ scrubbed_len=0,
129
+ ok=False,
130
+ failures=[f"scrub_failed:{type(exc).__name__}:{str(exc)[:80]}"],
131
+ preview="",
132
+ )
133
+
134
+ # 1. Contamination markers — if the scrubber returned one, fail.
135
+ for pat in _CONTAMINATION_MARKERS:
136
+ if pat.search(scrubbed):
137
+ failures.append(f"contamination_marker:{pat.pattern[:40]}")
138
+ break
139
+
140
+ # 2. Minimum length. Below MIN_VERDICT_LEN is almost certainly
141
+ # garbage even if scrub didn't tag it.
142
+ if len(scrubbed) < MIN_VERDICT_LEN and "contamination_marker" not in " ".join(failures):
143
+ failures.append(f"too_short:{len(scrubbed)}<{MIN_VERDICT_LEN}")
144
+
145
+ # 3. Verdict hint — at least one of VERDICT:/AGREE/DISAGREE/REMEDIATE/
146
+ # APPROVE/REJECT must appear. Skip when expect_verdict_hint=False.
147
+ if expect_verdict_hint and not _VERDICT_HINT_RE.search(scrubbed):
148
+ failures.append("no_verdict_hint")
149
+
150
+ # 4. Doesn't start with a known banner prefix (defense-in-depth on
151
+ # top of scrub). If a brand-new banner shape lands tomorrow that
152
+ # the scrubber doesn't know about, this should catch it.
153
+ if scrubbed.startswith("["):
154
+ # Bracketed prefix is almost always a tool-emitted status line
155
+ # (e.g. "[Delimit]" / "[claude error: ...]") not a model verdict.
156
+ if not any(scrubbed.lower().startswith(p) for p in (
157
+ "[delimit", "[scrub:", "[claude", "[codex", "[gemini", "[cursor",
158
+ )):
159
+ # Unknown bracketed prefix — surface for inspection
160
+ failures.append(f"unknown_bracketed_prefix:{scrubbed[:40]!r}")
161
+
162
+ return CliContractResult(
163
+ cli=cli_name,
164
+ raw_len=len(raw_stdout),
165
+ scrubbed_len=len(scrubbed),
166
+ ok=not failures,
167
+ failures=failures,
168
+ preview=scrubbed[:200],
169
+ )
170
+
171
+
172
+ def format_contract_report(results: List[CliContractResult]) -> str:
173
+ """Human-readable summary of N validation results for ntfy / logs."""
174
+ lines = []
175
+ n_ok = sum(1 for r in results if r.ok)
176
+ lines.append(f"CLI contract: {n_ok}/{len(results)} clean")
177
+ for r in results:
178
+ flag = "OK" if r.ok else "FAIL"
179
+ lines.append(f" [{flag}] {r.cli:8s} raw={r.raw_len}B scrubbed={r.scrubbed_len}B")
180
+ if not r.ok:
181
+ for f in r.failures:
182
+ lines.append(f" ↳ {f}")
183
+ if r.preview:
184
+ lines.append(f" preview: {r.preview[:100]!r}")
185
+ return "\n".join(lines)
@@ -75,6 +75,7 @@ AUTO_PATTERNS = {
75
75
  "test": ["test", "coverage", "smoke"],
76
76
  "docs": ["docs", "documentation", "readme"],
77
77
  "governance": ["governance", "policy", "compliance"],
78
+ "build": ["feat", "fix", "task", "implementation"],
78
79
  }
79
80
 
80
81
 
@@ -263,6 +264,14 @@ def get_next_automatable_item(
263
264
  return None
264
265
 
265
266
 
267
+
268
+ def _run_build(item_id: str, venture: str = "") -> dict:
269
+ """Run the governed build loop for a specific item (LED-1146)."""
270
+ from ai.loop_engine import run_governed_iteration
271
+ # Use a persistent session for the daemon
272
+ session_id = "daemon-build-loop"
273
+ return run_governed_iteration(session_id=session_id)
274
+
266
275
  def process_item(item: dict, log_path: Optional[Path] = None) -> dict:
267
276
  """Process a single ledger item by running the suggested tool.
268
277
 
@@ -293,6 +302,7 @@ def process_item(item: dict, log_path: Optional[Path] = None) -> dict:
293
302
  "test": _run_test,
294
303
  "governance": _run_governance,
295
304
  "docs": _run_docs,
305
+ "build": _run_build,
296
306
  }
297
307
 
298
308
  runner = tool_map.get(tool)
@@ -20,11 +20,10 @@ Call via MCP: delimit_digest(action="run") or scheduled cron.
20
20
  from __future__ import annotations
21
21
 
22
22
  import json
23
- import time
24
23
  from collections import Counter
25
24
  from datetime import datetime, timedelta, timezone
26
25
  from pathlib import Path
27
- from typing import Any, Dict, List, Optional
26
+ from typing import Any, Dict
28
27
 
29
28
  DIGEST_DIR = Path.home() / ".delimit" / "digest"
30
29
  LEDGER_DIR = Path.home() / ".delimit" / "ledger"