@intentsolutions/audit-harness 0.1.0 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,481 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ audit-harness conform — read-only deterministic conformance gate-runner.
4
+
5
+ For every conformance-dimension gate in a repo's audit-profile/v1, conform locates
6
+ the relevant artifact(s) and validates them against a CONTENT-ADDRESSED schema
7
+ BUNDLED in this harness version (never live-fetched). Each artifact yields one
8
+ `gate-result/v1` Evidence Bundle row (the predicate body in
9
+ schemas/.../gate-result.schema.json / @intentsolutions/core), emitted as a JSON
10
+ array on stdout. NEVER writes to the repo.
11
+
12
+ Design rules (PP-PLAN-040 Phase 2):
13
+ - Deterministic + pure-local. Same commit + same harness version => identical
14
+ verdict. The bundled JSON-Schema is validated by an EMBEDDED subset validator
15
+ (not ajv) precisely BECAUSE ajv's availability/version varies per box and would
16
+ make signed evidence non-reproducible. The embedded validator is complete for
17
+ the closed bundled schemas (which use only the keyword subset it supports).
18
+ - Genuinely-external formats shell out: OpenAPI -> spectral, Action YAML ->
19
+ yamllint. Missing tool => INDETERMINATE (advisory), never a false FAIL.
20
+ - Advisory-first. A conformance violation on an `enforcement: advisory` gate is
21
+ ADVISORY (severity error), exit 0 — the finding is logged, the build is not
22
+ reddened. Only an engineer-promoted `enforcement: blocking` gate (or --strict)
23
+ turns a violation into FAIL (exit 1).
24
+ - conform records the bundled schema's sha256 in the gate-result `policy_hash`,
25
+ so a row re-verifies against the exact schema version that produced it.
26
+ - Stdlib only (PyYAML used for frontmatter when present; absent => indeterminate
27
+ rather than a guessed verdict). No network. No filesystem mutation.
28
+
29
+ Usage:
30
+ python3 scripts/conform.py [REPO_PATH] [--json] [--strict] [--profile PATH|-]
31
+ AUDIT_HARNESS_DISABLE=1 python3 scripts/conform.py # kill-switch (no-op, exit 0)
32
+ """
33
+ import argparse
34
+ import hashlib
35
+ import json
36
+ import os
37
+ import re
38
+ import shutil
39
+ import subprocess
40
+ import sys
41
+ from datetime import datetime, timezone
42
+
43
+ HERE = os.path.dirname(os.path.abspath(__file__))
44
+ if HERE not in sys.path:
45
+ sys.path.insert(0, HERE)
46
+ import classify as C # noqa: E402 (sibling module; reused for the single-source profile)
47
+
48
+ try:
49
+ import yaml # type: ignore
50
+ except Exception: # pragma: no cover - exercised only on boxes without PyYAML
51
+ yaml = None
52
+
53
+ SCHEMA_DIR = os.path.join(HERE, "..", "schemas", "conform", "v1")
54
+ EMPTY_SHA = "sha256:" + hashlib.sha256(b"").hexdigest()
55
+
56
+ # kind -> bundled schema filename (content-addressed in this harness version)
57
+ BUNDLED = {
58
+ "skillmd": "skillmd-frontmatter.schema.json",
59
+ "agent": "agent-frontmatter.schema.json",
60
+ "mcp": "mcp-config.schema.json",
61
+ "plugin": "plugin-manifest.schema.json",
62
+ }
63
+ FRONTMATTER_KINDS = {"skillmd", "agent"} # YAML frontmatter in a .md file
64
+ JSON_KINDS = {"mcp", "plugin", "marketplace", "hook"} # whole-file JSON
65
+ SHELLOUT = { # genuinely-external linters
66
+ "openapi": "spectral",
67
+ "action": "yamllint",
68
+ }
69
+
70
+
71
+ # --------------------------------------------------------------------------- #
72
+ # Embedded JSON-Schema subset validator (complete for the closed bundled schemas)
73
+ # --------------------------------------------------------------------------- #
74
+ def _type_ok(value, t):
75
+ if t == "boolean":
76
+ return isinstance(value, bool)
77
+ if t == "integer":
78
+ return isinstance(value, int) and not isinstance(value, bool)
79
+ if t == "number":
80
+ return isinstance(value, (int, float)) and not isinstance(value, bool)
81
+ if t == "null":
82
+ return value is None
83
+ if t == "object":
84
+ return isinstance(value, dict)
85
+ if t == "array":
86
+ return isinstance(value, list)
87
+ if t == "string":
88
+ return isinstance(value, str)
89
+ return True
90
+
91
+
92
+ def validate_instance(inst, schema, path="$"):
93
+ """Return a list of human-readable violation strings ([] == valid)."""
94
+ errs = []
95
+ if not isinstance(schema, dict):
96
+ return errs
97
+
98
+ t = schema.get("type")
99
+ if t is not None:
100
+ types = t if isinstance(t, list) else [t]
101
+ if not any(_type_ok(inst, x) for x in types):
102
+ errs.append(f"{path}: expected type {t}, got {type(inst).__name__}")
103
+ return errs # downstream keyword checks are meaningless on a type mismatch
104
+
105
+ if "enum" in schema and inst not in schema["enum"]:
106
+ errs.append(f"{path}: {inst!r} not in enum {schema['enum']}")
107
+ if "const" in schema and inst != schema["const"]:
108
+ errs.append(f"{path}: {inst!r} != const {schema['const']!r}")
109
+
110
+ if isinstance(inst, str):
111
+ if "minLength" in schema and len(inst) < schema["minLength"]:
112
+ errs.append(f"{path}: shorter than minLength {schema['minLength']}")
113
+ if "maxLength" in schema and len(inst) > schema["maxLength"]:
114
+ errs.append(f"{path}: longer than maxLength {schema['maxLength']}")
115
+ if "pattern" in schema and re.search(schema["pattern"], inst) is None:
116
+ errs.append(f"{path}: does not match pattern {schema['pattern']!r}")
117
+ if schema.get("format") in ("uri", "url") and not re.match(r"^[a-zA-Z][a-zA-Z0-9+.\-]*:", inst):
118
+ errs.append(f"{path}: not a {schema['format']}")
119
+
120
+ if isinstance(inst, list):
121
+ if "minItems" in schema and len(inst) < schema["minItems"]:
122
+ errs.append(f"{path}: fewer than minItems {schema['minItems']}")
123
+ if "maxItems" in schema and len(inst) > schema["maxItems"]:
124
+ errs.append(f"{path}: more than maxItems {schema['maxItems']}")
125
+ item_schema = schema.get("items")
126
+ if isinstance(item_schema, dict):
127
+ for i, el in enumerate(inst):
128
+ errs += validate_instance(el, item_schema, f"{path}[{i}]")
129
+
130
+ if isinstance(inst, dict):
131
+ for req in schema.get("required", []):
132
+ if req not in inst:
133
+ errs.append(f"{path}: missing required property '{req}'")
134
+ props = schema.get("properties", {})
135
+ for k, sub in props.items():
136
+ if k in inst:
137
+ errs += validate_instance(inst[k], sub, f"{path}.{k}")
138
+ ap = schema.get("additionalProperties", True)
139
+ if ap is False:
140
+ for k in inst:
141
+ if k not in props:
142
+ errs.append(f"{path}: additional property '{k}' not allowed")
143
+ elif isinstance(ap, dict):
144
+ for k, v in inst.items():
145
+ if k not in props:
146
+ errs += validate_instance(v, ap, f"{path}.{k}")
147
+
148
+ for sub in schema.get("allOf", []):
149
+ errs += validate_instance(inst, sub, path)
150
+ if "anyOf" in schema:
151
+ if not any(not validate_instance(inst, sub, path) for sub in schema["anyOf"]):
152
+ errs.append(f"{path}: matches none of anyOf")
153
+ if "oneOf" in schema:
154
+ matches = sum(1 for sub in schema["oneOf"] if not validate_instance(inst, sub, path))
155
+ if matches != 1:
156
+ errs.append(f"{path}: matched {matches} of oneOf branches (need exactly 1)")
157
+ return errs
158
+
159
+
160
+ # --------------------------------------------------------------------------- #
161
+ # Artifact location + parsing
162
+ # --------------------------------------------------------------------------- #
163
+ SKIP_DIRS = ("node_modules", ".git", ".venv", "dist", "build",
164
+ "fixtures", "tests", "test", "__tests__", "examples")
165
+
166
+
167
+ def find_files(repo, name, max_depth=3):
168
+ repo = os.path.abspath(repo)
169
+ out = []
170
+ for root, dirs, files in os.walk(repo):
171
+ dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
172
+ depth = root[len(repo):].count(os.sep)
173
+ if depth > max_depth:
174
+ dirs[:] = []
175
+ continue
176
+ if name in files:
177
+ out.append(os.path.join(root, name))
178
+ return sorted(out)
179
+
180
+
181
+ def locate(kind, repo):
182
+ """Return the list of artifact file paths for a conformance kind."""
183
+ repo = os.path.abspath(repo)
184
+
185
+ def first_existing(*rels):
186
+ return [os.path.join(repo, r) for r in rels if os.path.isfile(os.path.join(repo, r))]
187
+
188
+ if kind == "skillmd":
189
+ return find_files(repo, "SKILL.md")
190
+ if kind == "agent":
191
+ out = []
192
+ for d in (os.path.join(repo, "agents"), os.path.join(repo, ".claude", "agents")):
193
+ if os.path.isdir(d):
194
+ out += [os.path.join(d, f) for f in sorted(os.listdir(d)) if f.endswith(".md")]
195
+ return out
196
+ if kind == "mcp":
197
+ out = first_existing(".mcp.json")
198
+ for sub in C.list_pkg_subdirs(repo):
199
+ p = os.path.join(sub, ".mcp.json")
200
+ if os.path.isfile(p):
201
+ out.append(p)
202
+ return out
203
+ if kind == "plugin":
204
+ return first_existing(".claude-plugin/plugin.json", "plugin.json")
205
+ if kind == "marketplace":
206
+ return first_existing(".claude-plugin/marketplace.json", "marketplace.json")
207
+ if kind == "hook":
208
+ out = first_existing("hooks/hooks.json")
209
+ hd = os.path.join(repo, ".claude", "hooks")
210
+ if os.path.isdir(hd):
211
+ out += [os.path.join(hd, f) for f in sorted(os.listdir(hd)) if f.endswith(".json")]
212
+ return out
213
+ if kind == "openapi":
214
+ return first_existing("openapi.yaml", "openapi.yml", "openapi.json",
215
+ "swagger.yaml", "swagger.json")
216
+ if kind == "action":
217
+ return first_existing("action.yml", "action.yaml")
218
+ return []
219
+
220
+
221
+ def parse_json_file(path):
222
+ try:
223
+ with open(path, "r", encoding="utf-8") as f:
224
+ return json.load(f), None
225
+ except json.JSONDecodeError as e:
226
+ return None, f"json-parse-error: {e}"
227
+ except Exception as e:
228
+ return None, f"read-error: {e}"
229
+
230
+
231
+ def extract_frontmatter(path):
232
+ """Return (dict, None) or (None, reason). Requires PyYAML for a reliable verdict."""
233
+ try:
234
+ with open(path, "r", encoding="utf-8") as f:
235
+ text = f.read()
236
+ except Exception as e:
237
+ return None, f"read-error: {e}"
238
+ if not text.lstrip().startswith("---"):
239
+ return None, "no-frontmatter-block"
240
+ m = re.match(r"^?---[ \t]*\r?\n(.*?)\r?\n---[ \t]*(\r?\n|$)", text, re.DOTALL)
241
+ if not m:
242
+ return None, "unterminated-frontmatter-block"
243
+ block = m.group(1)
244
+ if yaml is None:
245
+ return None, "pyyaml-unavailable" # honest indeterminate, not a guessed parse
246
+ try:
247
+ data = yaml.safe_load(block)
248
+ except Exception as e:
249
+ return None, f"yaml-parse-error: {e}"
250
+ if data is None:
251
+ data = {}
252
+ if not isinstance(data, dict):
253
+ return None, "frontmatter-not-a-mapping"
254
+ return data, None
255
+
256
+
257
+ # --------------------------------------------------------------------------- #
258
+ # gate-result/v1 row construction
259
+ # --------------------------------------------------------------------------- #
260
+ def sha256_path(path):
261
+ try:
262
+ return C.sha256_file(path)
263
+ except Exception:
264
+ return EMPTY_SHA
265
+
266
+
267
+ def sha256_str(s):
268
+ return "sha256:" + hashlib.sha256(s.encode("utf-8")).hexdigest()
269
+
270
+
271
+ def make_row(gate_id, result, *, policy_hash, input_hash, commit_sha, runner,
272
+ metadata=None, failure_mode=None, advisory_severity=None):
273
+ row = {
274
+ "gate_id": gate_id,
275
+ "result": result,
276
+ "policy_hash": policy_hash,
277
+ "input_hash": input_hash,
278
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
279
+ "runner": runner,
280
+ "commit_sha": commit_sha,
281
+ }
282
+ if metadata:
283
+ row["metadata"] = metadata
284
+ if failure_mode is not None:
285
+ row["failure_mode"] = failure_mode
286
+ if advisory_severity is not None:
287
+ row["advisory_severity"] = advisory_severity
288
+ return row
289
+
290
+
291
+ def verdict_for(errs, parse_err, enforcement, strict):
292
+ """Map (violations, parse error, enforcement) -> (result, failure_mode, severity)."""
293
+ violated = bool(errs) or parse_err is not None
294
+ if not violated:
295
+ return "PASS", None, None
296
+ blocking = strict or enforcement == "blocking"
297
+ if blocking:
298
+ fm = "conform:parse-error" if parse_err is not None else "conform:schema-violation"
299
+ return "FAIL", fm, None
300
+ return "ADVISORY", None, "error"
301
+
302
+
303
+ # --------------------------------------------------------------------------- #
304
+ def compute_profile(repo, registry_path, profile_arg):
305
+ if profile_arg == "-":
306
+ return json.load(sys.stdin)
307
+ if profile_arg:
308
+ with open(profile_arg, "r", encoding="utf-8") as f:
309
+ return json.load(f)
310
+ out = subprocess.run(
311
+ [sys.executable, os.path.join(HERE, "classify.py"), repo, "--registry", registry_path],
312
+ capture_output=True, text=True,
313
+ )
314
+ if out.returncode != 0:
315
+ sys.stderr.write(out.stderr)
316
+ raise SystemExit(2)
317
+ return json.loads(out.stdout)
318
+
319
+
320
+ def kind_of(gate_id):
321
+ """audit-harness:local:conform-skillmd -> 'skillmd'."""
322
+ seg = gate_id.rsplit(":", 1)[-1]
323
+ return seg[len("conform-"):] if seg.startswith("conform-") else seg
324
+
325
+
326
+ def run_shellout(kind, gate, files, commit_sha, runner, repo, strict):
327
+ tool = SHELLOUT[kind]
328
+ rows = []
329
+ enforcement = gate.get("enforcement", "advisory")
330
+ if shutil.which(tool) is None:
331
+ rows.append(make_row(
332
+ gate["gate_id"], "ADVISORY",
333
+ policy_hash=sha256_str(f"{tool}:default"),
334
+ input_hash=sha256_path(files[0]) if files else EMPTY_SHA,
335
+ commit_sha=commit_sha, runner=runner, advisory_severity="warn",
336
+ metadata={"kind": kind, "validator": tool, "indeterminate": True,
337
+ "reason": f"{tool} not on PATH — conformance unmeasured"},
338
+ ))
339
+ return rows
340
+ for art in files:
341
+ cmd = [tool, "lint", art] if tool == "spectral" else [tool, art]
342
+ try:
343
+ proc = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
344
+ errs = [] if proc.returncode == 0 else [(proc.stdout or proc.stderr).strip()[:2000]]
345
+ parse_err = None
346
+ except Exception as e:
347
+ errs, parse_err = [], None
348
+ rows.append(make_row(
349
+ gate["gate_id"], "ADVISORY",
350
+ policy_hash=sha256_str(f"{tool}:default"), input_hash=sha256_path(art),
351
+ commit_sha=commit_sha, runner=runner, advisory_severity="warn",
352
+ metadata={"kind": kind, "validator": tool, "indeterminate": True,
353
+ "artifact_path": os.path.relpath(art, repo), "reason": str(e)},
354
+ ))
355
+ continue
356
+ result, fm, sev = verdict_for(errs, parse_err, enforcement, strict)
357
+ rows.append(make_row(
358
+ gate["gate_id"], result,
359
+ policy_hash=sha256_str(f"{tool}:default"), input_hash=sha256_path(art),
360
+ commit_sha=commit_sha, runner=runner, failure_mode=fm, advisory_severity=sev,
361
+ metadata={"kind": kind, "validator": tool,
362
+ "artifact_path": os.path.relpath(art, repo),
363
+ "errors": errs[:20]},
364
+ ))
365
+ return rows
366
+
367
+
368
+ def run_bundled(kind, gate, files, commit_sha, runner, repo, strict):
369
+ rows = []
370
+ enforcement = gate.get("enforcement", "advisory")
371
+ schema_path = os.path.join(SCHEMA_DIR, BUNDLED[kind])
372
+ schema = C.read_json(schema_path)
373
+ if schema is None:
374
+ rows.append(make_row(
375
+ gate["gate_id"], "ADVISORY",
376
+ policy_hash=EMPTY_SHA, input_hash=EMPTY_SHA, commit_sha=commit_sha,
377
+ runner=runner, advisory_severity="warn",
378
+ metadata={"kind": kind, "indeterminate": True,
379
+ "reason": f"bundled schema missing at {BUNDLED[kind]}"},
380
+ ))
381
+ return rows
382
+ policy_hash = C.sha256_file(schema_path)
383
+ schema_id = schema.get("$id", "")
384
+ for art in files:
385
+ if kind in FRONTMATTER_KINDS:
386
+ data, parse_err = extract_frontmatter(art)
387
+ else:
388
+ data, parse_err = parse_json_file(art)
389
+
390
+ if parse_err == "pyyaml-unavailable":
391
+ rows.append(make_row(
392
+ gate["gate_id"], "ADVISORY",
393
+ policy_hash=policy_hash, input_hash=sha256_path(art), commit_sha=commit_sha,
394
+ runner=runner, advisory_severity="warn",
395
+ metadata={"kind": kind, "validator": "audit-harness-embedded-subset",
396
+ "schema_id": schema_id, "indeterminate": True,
397
+ "artifact_path": os.path.relpath(art, repo),
398
+ "reason": "PyYAML unavailable — frontmatter conformance unmeasured"},
399
+ ))
400
+ continue
401
+
402
+ errs = validate_instance(data, schema) if parse_err is None else []
403
+ result, fm, sev = verdict_for(errs, parse_err, enforcement, strict)
404
+ meta = {"kind": kind, "validator": "audit-harness-embedded-subset",
405
+ "schema_id": schema_id, "artifact_path": os.path.relpath(art, repo)}
406
+ if parse_err is not None:
407
+ meta["errors"] = [parse_err]
408
+ elif errs:
409
+ meta["errors"] = errs[:20]
410
+ rows.append(make_row(
411
+ gate["gate_id"], result, policy_hash=policy_hash, input_hash=sha256_path(art),
412
+ commit_sha=commit_sha, runner=runner, failure_mode=fm, advisory_severity=sev,
413
+ metadata=meta,
414
+ ))
415
+ return rows
416
+
417
+
418
+ def main():
419
+ ap = argparse.ArgumentParser(description="Read-only conformance gate-runner -> gate-result/v1 rows")
420
+ ap.add_argument("repo", nargs="?", default=".", help="Repo path (default: cwd)")
421
+ ap.add_argument("--json", action="store_true", help="Emit JSON (default; flag is for CLI symmetry)")
422
+ ap.add_argument("--strict", action="store_true",
423
+ help="Treat every conformance violation as FAIL (exit 1), ignoring advisory default")
424
+ ap.add_argument("--registry", default=C.DEFAULT_REGISTRY, help="Path to the dimension-to-gate registry")
425
+ ap.add_argument("--profile", default=None,
426
+ help="Use a pinned audit-profile/v1 (PATH or '-' for stdin) instead of classifying")
427
+ args = ap.parse_args()
428
+
429
+ repo = os.path.abspath(args.repo)
430
+ registry_path = os.path.abspath(args.registry)
431
+ runner = f"audit-harness@{C.harness_version()}"
432
+
433
+ override_path = os.path.join(repo, ".audit-harness.yml")
434
+ override = C.parse_override(override_path) if os.path.isfile(override_path) else {"disable": False}
435
+ if override.get("disable") or os.environ.get("AUDIT_HARNESS_DISABLE") == "1":
436
+ sys.stderr.write("audit-harness: KILL-SWITCH active — conform skipped (no rows emitted)\n")
437
+ print("[]")
438
+ sys.exit(0)
439
+
440
+ profile = compute_profile(repo, registry_path, args.profile)
441
+ commit_sha = profile.get("subject", {}).get("commit_sha") or C.git_short_sha(repo)
442
+
443
+ conf_gates = [g for g in profile.get("gates", [])
444
+ if g.get("dimension") == "conformance" and g.get("enforcement") != "disabled"]
445
+
446
+ rows = []
447
+ for gate in conf_gates:
448
+ kind = kind_of(gate["gate_id"])
449
+ files = locate(kind, repo)
450
+ if not files:
451
+ rows.append(make_row(
452
+ gate["gate_id"], "NOT_APPLICABLE",
453
+ policy_hash=EMPTY_SHA, input_hash=EMPTY_SHA, commit_sha=commit_sha,
454
+ runner=runner, metadata={"kind": kind, "reason": "no matching artifact found in repo"},
455
+ ))
456
+ continue
457
+ if kind in BUNDLED:
458
+ rows += run_bundled(kind, gate, files, commit_sha, runner, repo, args.strict)
459
+ elif kind in SHELLOUT:
460
+ rows += run_shellout(kind, gate, files, commit_sha, runner, repo, args.strict)
461
+ else:
462
+ rows.append(make_row(
463
+ gate["gate_id"], "ADVISORY",
464
+ policy_hash=EMPTY_SHA, input_hash=sha256_path(files[0]), commit_sha=commit_sha,
465
+ runner=runner, advisory_severity="warn",
466
+ metadata={"kind": kind, "indeterminate": True,
467
+ "reason": f"no bundled conform schema for kind '{kind}' in this harness version"},
468
+ ))
469
+
470
+ print(json.dumps(rows, indent=2))
471
+
472
+ n_fail = sum(1 for r in rows if r["result"] == "FAIL")
473
+ n_adv = sum(1 for r in rows if r["result"] == "ADVISORY")
474
+ n_pass = sum(1 for r in rows if r["result"] == "PASS")
475
+ sys.stderr.write(f"audit-harness conform: {n_pass} PASS, {n_adv} ADVISORY, {n_fail} FAIL "
476
+ f"across {len(rows)} row(s)\n")
477
+ sys.exit(1 if n_fail else 0)
478
+
479
+
480
+ if __name__ == "__main__":
481
+ main()
@@ -16,6 +16,7 @@ from __future__ import annotations
16
16
 
17
17
  import argparse
18
18
  import csv
19
+ import hashlib
19
20
  import json
20
21
  import os
21
22
  import shutil
@@ -36,6 +37,19 @@ class MethodScore:
36
37
  kind: str # "src" or "test"
37
38
 
38
39
 
40
+ # Directories to skip during candidate discovery AND the --json input-hash
41
+ # walk. Single source of truth — both call sites MUST use this set so a repo
42
+ # with `reports/` (or `.next/`, `.nuxt/`, `.cache/`) gets identical treatment
43
+ # in both the candidate scan and the input-hash computation. Adding a dir
44
+ # here removes it from BOTH passes; that's the invariant this constant exists
45
+ # to preserve.
46
+ EXCLUDED_DIRS = {
47
+ ".git", ".venv", "venv", "node_modules", "__pycache__",
48
+ "dist", "build", "target", ".tox", ".mypy_cache", ".pytest_cache",
49
+ ".next", ".nuxt", ".cache", "reports",
50
+ }
51
+
52
+
39
53
  def crap(complexity: int, coverage_pct: float) -> float:
40
54
  cov = max(0.0, min(100.0, coverage_pct)) / 100.0
41
55
  return (complexity ** 2) * ((1.0 - cov) ** 3) + complexity
@@ -81,12 +95,11 @@ def score_python(root: Path, kind: str) -> list[MethodScore]:
81
95
  scanned = [t for t in candidates if (root / t).is_dir()]
82
96
  if not scanned:
83
97
  test_dirs = {"tests", "test", "spec", "specs", "features", "__tests__"}
84
- ignore = {".git", ".venv", "venv", "node_modules", "dist", "build", "target", ".tox", ".mypy_cache", ".pytest_cache", "reports", "__pycache__"}
85
98
  scanned = [
86
99
  p.name for p in root.iterdir()
87
100
  if p.is_dir()
88
101
  and not p.name.startswith(".")
89
- and p.name not in ignore
102
+ and p.name not in EXCLUDED_DIRS
90
103
  and p.name not in test_dirs
91
104
  and any(p.rglob("*.py"))
92
105
  ]
@@ -171,7 +184,7 @@ def score_go(root: Path, kind: str) -> list[MethodScore]:
171
184
 
172
185
  coverage: dict[str, float] = {}
173
186
  cov_out = root / "coverage.out"
174
- if not cov_out.is_file():
187
+ if not cov_out.is_file() and which_or_none("go"):
175
188
  run(["go", "test", "-coverprofile=coverage.out", "-covermode=atomic", "./..."], root)
176
189
  if cov_out.is_file() and which_or_none("go"):
177
190
  rc, out, _ = run(["go", "tool", "cover", "-func=coverage.out"], root)
@@ -263,7 +276,6 @@ def score_rust(root: Path, kind: str) -> list[MethodScore]:
263
276
  except json.JSONDecodeError:
264
277
  continue
265
278
  fpath = rec.get("name", "")
266
- metrics = rec.get("metrics", {}).get("cyclomatic", {})
267
279
  for func in rec.get("spaces", []):
268
280
  c = int(func.get("metrics", {}).get("cyclomatic", {}).get("sum", 1))
269
281
  complexity.append((fpath, func.get("name", "<anon>"), c))
@@ -302,6 +314,10 @@ def main() -> int:
302
314
  help="Test CRAP max (default 15)")
303
315
  ap.add_argument("--threshold-avg", type=float, default=10.0,
304
316
  help="Project average max (default 10)")
317
+ ap.add_argument("--json", action="store_true",
318
+ help="Emit gate-result envelope JSON on stdout (suitable for piping "
319
+ "to `audit-harness emit-evidence`). Preserves existing CSV/JSON "
320
+ "files written under --out.")
305
321
  args = ap.parse_args()
306
322
 
307
323
  root = Path(args.root).resolve()
@@ -377,7 +393,51 @@ def main() -> int:
377
393
  if args.format in ("json", "both"):
378
394
  (out_dir / "summary.json").write_text(json.dumps(summary, indent=2))
379
395
 
380
- print(json.dumps({"pass": pass_, "summary_path": str(out_dir / "summary.json")}))
396
+ if args.json:
397
+ side = os.environ.get("AUDIT_HARNESS_SIDE", "ci")
398
+ # input_hash: SHA256 over all production+test source-file contents under root, sorted.
399
+ # Use os.walk with directory pruning instead of rglob — large vendored trees
400
+ # (node_modules, .venv, .git, build outputs) would otherwise dominate the walk
401
+ # cost on big repos and waste IO on files we already filter out by extension.
402
+ digest = hashlib.sha256()
403
+ exts = (".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".java", ".kt", ".cs", ".php", ".rb")
404
+ collected: list[Path] = []
405
+ for dirpath, dirs, files in os.walk(root):
406
+ dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
407
+ for fn in files:
408
+ if fn.endswith(exts):
409
+ collected.append(Path(dirpath) / fn)
410
+ for fp in sorted(collected):
411
+ digest.update(fp.read_bytes())
412
+ input_hash = f"sha256:{digest.hexdigest()}"
413
+ # policy_hash: SHA256 over the threshold tuple (stable, deterministic)
414
+ policy_repr = f"prod={args.threshold_prod}|test={args.threshold_test}|avg={args.threshold_avg}".encode()
415
+ policy_hash = f"sha256:{hashlib.sha256(policy_repr).hexdigest()}"
416
+ result = "PASS" if pass_ else "FAIL"
417
+ envelope = {
418
+ "gate_id": f"audit-harness:{side}:crap-score",
419
+ "result": result,
420
+ "input_hash": input_hash,
421
+ "policy_hash": policy_hash,
422
+ "metadata": {
423
+ "language": lang,
424
+ "thresholds": summary["thresholds"],
425
+ "production_max_crap": summary["production"]["max_crap"],
426
+ "production_avg_crap": summary["production"]["avg_crap"],
427
+ "production_methods_scored": summary["production"]["methods_scored"],
428
+ "production_blockers_count": len(prod_blockers),
429
+ "test_max_crap": summary["test"]["max_crap"],
430
+ "test_methods_scored": summary["test"]["methods_scored"],
431
+ "test_blockers_count": len(test_blockers),
432
+ "avg_fail": avg_fail,
433
+ "summary_path": str(out_dir / "summary.json"),
434
+ },
435
+ }
436
+ if not pass_:
437
+ envelope["failure_mode"] = "crap-threshold-exceeded"
438
+ print(json.dumps(envelope))
439
+ else:
440
+ print(json.dumps({"pass": pass_, "summary_path": str(out_dir / "summary.json")}))
381
441
  return 0 if pass_ else 1
382
442
 
383
443