@intentsolutions/audit-harness 1.1.5 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,403 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ audit-harness classify — read-only deterministic repository classifier.
4
+
5
+ Emits an `audit-profile/v1` value (the data-first value specified in
6
+ schemas/audit-profile/v1.schema.json) to stdout as JSON. NEVER writes to the repo.
7
+
8
+ A profile is a deterministic function of:
9
+ (repo contents at commit_sha, the canonical dimension-to-gate registry pinned by
10
+ registry_hash, and any engineer .audit-harness.yml overrides).
11
+
12
+ Design rules (PP-PLAN-040):
13
+ - Classifications are a UNION, not a winner. A repo that is a monorepo AND ships a
14
+ SKILL.md AND an MCP server carries all three. Dropping any is a false-negative.
15
+ - unresolved[] is the only surface a Claude inspector may later refine.
16
+ - Stdlib only (json, hashlib, os, re, subprocess, datetime). No third-party deps.
17
+ - No network. No filesystem mutation.
18
+
19
+ Usage:
20
+ python3 scripts/classify.py [REPO_PATH] [--json] [--registry PATH]
21
+ AUDIT_HARNESS_DISABLE=1 python3 scripts/classify.py # kill-switch
22
+ AUDIT_HARNESS_ADVISORY=gate-id,gate-id ... # force-advisory specific gates
23
+ """
24
+ import argparse
25
+ import hashlib
26
+ import json
27
+ import os
28
+ import re
29
+ import subprocess
30
+ import sys
31
+ from datetime import datetime, timezone
32
+
33
+ HERE = os.path.dirname(os.path.abspath(__file__))
34
+ DEFAULT_REGISTRY = os.path.join(HERE, "..", "schemas", "audit-profile", "registry.v1.json")
35
+
36
+ APPLICABILITY_RANK = {"required": 3, "recommended": 2, "conditional": 1, "waived": 0}
37
+ FRONTEND_DEPS = ("react", "vue", "svelte", "solid-js", "next", "nuxt", "@angular/core", "preact")
38
+ SERVER_DEPS = ("express", "fastify", "koa", "@hapi/hapi", "@nestjs/core", "restify")
39
+ PY_SERVER = ("fastapi", "flask", "django")
40
+ REGULATED_MARKERS = ("HIPAA", "SOX", "PCI-DSS", "SOC2", "GDPR", "FedRAMP")
41
+
42
+
43
+ def sha256_file(path):
44
+ h = hashlib.sha256()
45
+ with open(path, "rb") as f:
46
+ for chunk in iter(lambda: f.read(65536), b""):
47
+ h.update(chunk)
48
+ return "sha256:" + h.hexdigest()
49
+
50
+
51
+ def read_json(path):
52
+ try:
53
+ with open(path, "r", encoding="utf-8") as f:
54
+ return json.load(f)
55
+ except Exception:
56
+ return None
57
+
58
+
59
+ def git_short_sha(repo):
60
+ try:
61
+ out = subprocess.run(
62
+ ["git", "-C", repo, "rev-parse", "--short", "HEAD"],
63
+ capture_output=True, text=True, timeout=5,
64
+ )
65
+ sha = out.stdout.strip()
66
+ if re.fullmatch(r"[a-f0-9]{7,40}", sha):
67
+ return sha
68
+ except Exception:
69
+ pass
70
+ return "0000000"
71
+
72
+
73
+ def harness_version():
74
+ vt = os.path.join(HERE, "..", "version.txt")
75
+ try:
76
+ with open(vt, "r", encoding="utf-8") as f:
77
+ v = f.read().strip()
78
+ if v:
79
+ return v
80
+ except Exception:
81
+ pass
82
+ pkg = read_json(os.path.join(HERE, "..", "package.json")) or {}
83
+ return pkg.get("version", "0.0.0")
84
+
85
+
86
+ def parse_override(path):
87
+ """Minimal, well-defined subset parser for .audit-harness.yml.
88
+
89
+ Supported keys ONLY (full YAML is NOT parsed):
90
+ disable: true|false # kill-switch for this repo
91
+ classify_pins: # engineer-declared classification kinds
92
+ - skill
93
+ advisory: # force these gate_ids to enforcement=advisory
94
+ - audit-harness:ci:crap-score
95
+ disable_gates: # force these gate_ids to enforcement=disabled
96
+ - audit-harness:ci:a11y
97
+ Unknown lines are ignored.
98
+ """
99
+ ov = {"disable": False, "classify_pins": [], "advisory": [], "disable_gates": []}
100
+ try:
101
+ with open(path, "r", encoding="utf-8") as f:
102
+ lines = f.readlines()
103
+ except Exception:
104
+ return ov
105
+ current = None
106
+ for raw in lines:
107
+ line = raw.rstrip("\n")
108
+ if not line.strip() or line.strip().startswith("#"):
109
+ continue
110
+ m_item = re.match(r"^\s+-\s+(.+?)\s*$", line)
111
+ if m_item and current in ("classify_pins", "advisory", "disable_gates"):
112
+ ov[current].append(m_item.group(1).strip().strip("\"'"))
113
+ continue
114
+ m_kv = re.match(r"^([A-Za-z_]+)\s*:\s*(.*)$", line)
115
+ if m_kv:
116
+ key, val = m_kv.group(1), m_kv.group(2).strip()
117
+ if key == "disable":
118
+ ov["disable"] = val.lower() in ("true", "yes", "1")
119
+ current = None
120
+ elif key in ("classify_pins", "advisory", "disable_gates"):
121
+ current = key
122
+ if val: # inline list form: advisory: [a, b]
123
+ inner = val.strip("[]")
124
+ ov[key].extend(
125
+ x.strip().strip("\"'") for x in inner.split(",") if x.strip()
126
+ )
127
+ else:
128
+ current = None
129
+ return ov
130
+
131
+
132
+ def list_pkg_subdirs(repo):
133
+ pkgs = []
134
+ pdir = os.path.join(repo, "packages")
135
+ if os.path.isdir(pdir):
136
+ for name in sorted(os.listdir(pdir)):
137
+ sub = os.path.join(pdir, name)
138
+ if os.path.isfile(os.path.join(sub, "package.json")):
139
+ pkgs.append(sub)
140
+ return pkgs
141
+
142
+
143
+ def shallow_glob(repo, filename, max_depth=3):
144
+ """True if `filename` exists anywhere within max_depth dirs of repo."""
145
+ repo = os.path.abspath(repo)
146
+ # Exclude vendor/build dirs AND test/fixture dirs so a repo is never classified
147
+ # by its own test fixtures (e.g. a harness whose fixtures contain SKILL.md files).
148
+ skip = ("node_modules", ".git", ".venv", "dist", "build",
149
+ "fixtures", "tests", "test", "__tests__", "examples")
150
+ for root, dirs, files in os.walk(repo):
151
+ dirs[:] = [d for d in dirs if d not in skip]
152
+ depth = root[len(repo):].count(os.sep)
153
+ if depth > max_depth:
154
+ dirs[:] = []
155
+ continue
156
+ if filename in files:
157
+ return True
158
+ return False
159
+
160
+
161
+ def repo_type_signals(pkg, root):
162
+ """Repo-type classifications from a package.json + its dir. Returns list of (kind, signal)."""
163
+ found = []
164
+ if not isinstance(pkg, dict):
165
+ return found
166
+ deps = {}
167
+ for k in ("dependencies", "devDependencies", "peerDependencies"):
168
+ if isinstance(pkg.get(k), dict):
169
+ deps.update(pkg[k])
170
+ if any(d in deps for d in FRONTEND_DEPS):
171
+ found.append(("frontend", "package.json:frontend-framework"))
172
+ if any(d in deps for d in SERVER_DEPS):
173
+ found.append(("service", "package.json:server-framework"))
174
+ if pkg.get("bin"):
175
+ found.append(("cli", "package.json:bin"))
176
+ # Library = a publishable package: not explicitly private AND declares an entry
177
+ # surface (main/exports/module/types). Catches published packages that omit the
178
+ # "private" field entirely (the common case) — not just "private": false.
179
+ if pkg.get("private") is not True and any(pkg.get(k) for k in ("main", "exports", "module", "types")):
180
+ found.append(("library", "package.json:publishable(main/exports/types)"))
181
+ if isinstance(pkg.get("mcpServers"), dict):
182
+ found.append(("mcp", "package.json:mcpServers"))
183
+ return found
184
+
185
+
186
+ def classify(repo):
187
+ repo = os.path.abspath(repo)
188
+ found = {} # kind -> set(signals)
189
+
190
+ def add(kind, signal):
191
+ found.setdefault(kind, set()).add(signal)
192
+
193
+ def has(*rel):
194
+ return any(os.path.exists(os.path.join(repo, r)) for r in rel)
195
+
196
+ # --- monorepo ---
197
+ if has("pnpm-workspace.yaml", "turbo.json", "nx.json", "lerna.json", "rush.json") or list_pkg_subdirs(repo):
198
+ add("monorepo", "workspace-config")
199
+
200
+ # --- Claude-ecosystem artifact kinds ---
201
+ if shallow_glob(repo, "SKILL.md"):
202
+ add("skill", "SKILL.md present")
203
+ if os.path.isdir(os.path.join(repo, "agents")) or os.path.isdir(os.path.join(repo, ".claude", "agents")):
204
+ add("agent", "agents/ dir")
205
+ if has("hooks/hooks.json") or os.path.isdir(os.path.join(repo, ".claude", "hooks")):
206
+ add("hook", "hooks config")
207
+ if has(".mcp.json"):
208
+ add("mcp", ".mcp.json present")
209
+ if os.path.isdir(os.path.join(repo, ".claude-plugin")) or has("plugin.json", ".claude-plugin/plugin.json"):
210
+ add("plugin", ".claude-plugin/")
211
+ if has(".claude-plugin/marketplace.json", "marketplace.json"):
212
+ add("marketplace", "marketplace.json")
213
+ if has("action.yml", "action.yaml"):
214
+ add("action", "action.yml")
215
+
216
+ # --- API (spec presence) ---
217
+ if has("openapi.yaml", "openapi.json", "openapi.yml", "swagger.yaml", "swagger.json"):
218
+ add("api", "openapi/swagger spec")
219
+
220
+ # --- root package.json repo-types ---
221
+ root_pkg = read_json(os.path.join(repo, "package.json"))
222
+ for kind, sig in repo_type_signals(root_pkg, repo):
223
+ add(kind, sig)
224
+
225
+ # --- python server frameworks ---
226
+ for pyfile in ("requirements.txt", "pyproject.toml", "Pipfile"):
227
+ p = os.path.join(repo, pyfile)
228
+ if os.path.isfile(p):
229
+ try:
230
+ txt = open(p, "r", encoding="utf-8").read().lower()
231
+ if any(f in txt for f in PY_SERVER):
232
+ add("service", "python:server-framework")
233
+ except Exception:
234
+ pass
235
+
236
+ # --- monorepo package-level repo-types (one level deep) ---
237
+ for sub in list_pkg_subdirs(repo):
238
+ sub_pkg = read_json(os.path.join(sub, "package.json"))
239
+ for kind, sig in repo_type_signals(sub_pkg, sub):
240
+ add(kind, "packages/*:" + sig.split(":", 1)[-1])
241
+ if has_mcp(sub):
242
+ add("mcp", "packages/*:.mcp.json")
243
+
244
+ # --- embedded (C/C++) ---
245
+ is_c = shallow_glob(repo, "main.c") or _has_ext(repo, (".c", ".cpp", ".cc"))
246
+ if has("Makefile", "CMakeLists.txt") and is_c and not root_pkg:
247
+ add("embedded", "C/C++ build + sources")
248
+
249
+ # --- regulated overlay ---
250
+ for marker_file in ("README.md", "SECURITY.md"):
251
+ p = os.path.join(repo, marker_file)
252
+ if os.path.isfile(p):
253
+ try:
254
+ txt = open(p, "r", encoding="utf-8").read()
255
+ if any(m in txt for m in REGULATED_MARKERS):
256
+ add("regulated", "compliance marker")
257
+ break
258
+ except Exception:
259
+ pass
260
+
261
+ return found
262
+
263
+
264
+ def has_mcp(sub):
265
+ return os.path.exists(os.path.join(sub, ".mcp.json"))
266
+
267
+
268
+ def _has_ext(repo, exts):
269
+ for _root, dirs, files in os.walk(repo):
270
+ dirs[:] = [d for d in dirs if d not in ("node_modules", ".git", ".venv")]
271
+ if any(f.endswith(exts) for f in files):
272
+ return True
273
+ return False
274
+
275
+
276
+ def resolve_gates(kinds, registry, regulated, override):
277
+ """UNION base + per-classification gates; dedup by gate_id keeping highest applicability."""
278
+ by_id = {}
279
+
280
+ def merge(gate):
281
+ gid = gate["gate_id"]
282
+ existing = by_id.get(gid)
283
+ new_rank = APPLICABILITY_RANK.get(gate["applicability"], 0)
284
+ old_rank = APPLICABILITY_RANK.get(existing["applicability"], 0) if existing else -1
285
+ if existing is None or new_rank > old_rank:
286
+ by_id[gid] = dict(gate)
287
+
288
+ for gate in registry.get("base", []):
289
+ merge(gate)
290
+ cmap = registry.get("classifications", {})
291
+ for k in kinds:
292
+ for gate in cmap.get(k, []):
293
+ merge(gate)
294
+
295
+ # regulated overlay: promote recommended -> required for listed dimensions
296
+ if regulated:
297
+ promote_dims = set(registry.get("overlays", {}).get("regulated", {}).get("promote_to_required", []))
298
+ for g in by_id.values():
299
+ if g.get("dimension") in promote_dims and g["applicability"] == "recommended":
300
+ g["applicability"] = "required"
301
+
302
+ # engineer overrides: force advisory / disabled per gate_id
303
+ for gid in override.get("advisory", []):
304
+ if gid in by_id:
305
+ by_id[gid]["enforcement"] = "advisory"
306
+ for gid in override.get("disable_gates", []):
307
+ if gid in by_id:
308
+ by_id[gid]["enforcement"] = "disabled"
309
+
310
+ # invariant: waived -> disabled
311
+ for g in by_id.values():
312
+ if g["applicability"] == "waived":
313
+ g["enforcement"] = "disabled"
314
+
315
+ return [by_id[k] for k in sorted(by_id)]
316
+
317
+
318
+ def main():
319
+ ap = argparse.ArgumentParser(description="Read-only repository classifier -> audit-profile/v1")
320
+ ap.add_argument("repo", nargs="?", default=".", help="Repo path (default: cwd)")
321
+ ap.add_argument("--json", action="store_true", help="Emit JSON (default; flag is for symmetry with other gates)")
322
+ ap.add_argument("--registry", default=DEFAULT_REGISTRY, help="Path to the dimension-to-gate registry datum")
323
+ args = ap.parse_args()
324
+
325
+ repo = os.path.abspath(args.repo)
326
+ registry_path = os.path.abspath(args.registry)
327
+ registry = read_json(registry_path)
328
+ if registry is None:
329
+ print(f"classify: registry not found at {registry_path}", file=sys.stderr)
330
+ sys.exit(2)
331
+ registry_hash = sha256_file(registry_path)
332
+
333
+ # engineer override file
334
+ override_path = os.path.join(repo, ".audit-harness.yml")
335
+ override = parse_override(override_path) if os.path.isfile(override_path) else {
336
+ "disable": False, "classify_pins": [], "advisory": [], "disable_gates": []
337
+ }
338
+ kill = override.get("disable") or os.environ.get("AUDIT_HARNESS_DISABLE") == "1"
339
+ for gid in os.environ.get("AUDIT_HARNESS_ADVISORY", "").split(","):
340
+ if gid.strip():
341
+ override["advisory"].append(gid.strip())
342
+
343
+ found = classify(repo)
344
+
345
+ # engineer classification pins (declared)
346
+ for k in override.get("classify_pins", []):
347
+ found.setdefault(k, set()).add("override:classify_pin")
348
+
349
+ classifications = []
350
+ unresolved = []
351
+ for kind in sorted(found):
352
+ conf = "declared" if "override:classify_pin" in found[kind] else "detected"
353
+ classifications.append({
354
+ "kind": kind, "confidence": conf, "signals": sorted(found[kind]),
355
+ })
356
+
357
+ if not classifications:
358
+ classifications.append({"kind": "unknown", "confidence": "unresolved", "signals": []})
359
+ unresolved.append({
360
+ "kind": "repo-type",
361
+ "reason": ("no deterministic repo-type or artifact signal matched; "
362
+ "a human (or /audit-tests) must declare the classification"),
363
+ })
364
+
365
+ regulated = "regulated" in found
366
+ gates = resolve_gates([c["kind"] for c in classifications], registry, regulated, override)
367
+
368
+ overrides_block = None
369
+ if os.path.isfile(override_path) or kill:
370
+ overrides_block = {"source": ".audit-harness.yml", "kill_switch": bool(kill)}
371
+ if os.path.isfile(override_path):
372
+ overrides_block["override_hash"] = sha256_file(override_path)
373
+
374
+ if kill:
375
+ for g in gates:
376
+ g["enforcement"] = "disabled"
377
+ print("audit-harness: KILL-SWITCH active — all gates disabled "
378
+ "(AUDIT_HARNESS_DISABLE / .audit-harness.yml)", file=sys.stderr)
379
+
380
+ profile = {
381
+ "schema_version": "audit-profile/v1",
382
+ "subject": {"name": os.path.basename(repo), "commit_sha": git_short_sha(repo), "root": "."},
383
+ "classifier": f"audit-harness@{harness_version()}",
384
+ "registry_hash": registry_hash,
385
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
386
+ "classifications": classifications,
387
+ "dimensions": sorted({g["dimension"] for g in gates}),
388
+ "gates": gates,
389
+ "unresolved": unresolved,
390
+ }
391
+ if overrides_block is not None:
392
+ profile["overrides"] = overrides_block
393
+
394
+ # subject.name from root package.json if present
395
+ if isinstance(root_pkg_name := (read_json(os.path.join(repo, "package.json")) or {}).get("name"), str):
396
+ profile["subject"]["name"] = root_pkg_name
397
+
398
+ print(json.dumps(profile, indent=2))
399
+ sys.exit(0)
400
+
401
+
402
+ if __name__ == "__main__":
403
+ main()