@intentsolutions/audit-harness 0.1.0 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +435 -0
- package/LICENSE +202 -21
- package/NOTICE +15 -0
- package/README.md +36 -4
- package/bin/audit-harness.js +108 -8
- package/docs/gate-promotion.md +45 -0
- package/package.json +13 -9
- package/schemas/audit-profile/layer-applicability.md +146 -0
- package/schemas/audit-profile/registry.v1.json +87 -0
- package/schemas/audit-profile/v1.schema.json +294 -0
- package/schemas/conform/v1/agent-frontmatter.schema.json +24 -0
- package/schemas/conform/v1/mcp-config.schema.json +31 -0
- package/schemas/conform/v1/plugin-manifest.schema.json +26 -0
- package/schemas/conform/v1/skillmd-frontmatter.schema.json +40 -0
- package/schemas/currency/pins.v1.json +55 -0
- package/scripts/arch-check.sh +25 -1
- package/scripts/audit.py +386 -0
- package/scripts/bias-count.sh +50 -4
- package/scripts/classify.py +403 -0
- package/scripts/conform.py +481 -0
- package/scripts/crap-score.py +65 -5
- package/scripts/currency.py +118 -0
- package/scripts/emit-evidence.sh +256 -0
- package/scripts/escape-scan.sh +58 -4
- package/scripts/fp-rate.py +145 -0
- package/scripts/gen-layer-applicability.py +157 -0
- package/scripts/gherkin-lint.sh +53 -9
- package/scripts/harness-hash.sh +78 -5
- package/scripts/scan.py +228 -0
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
audit-harness conform — read-only deterministic conformance gate-runner.
|
|
4
|
+
|
|
5
|
+
For every conformance-dimension gate in a repo's audit-profile/v1, conform locates
|
|
6
|
+
the relevant artifact(s) and validates them against a CONTENT-ADDRESSED schema
|
|
7
|
+
BUNDLED in this harness version (never live-fetched). Each artifact yields one
|
|
8
|
+
`gate-result/v1` Evidence Bundle row (the predicate body in
|
|
9
|
+
schemas/.../gate-result.schema.json / @intentsolutions/core), emitted as a JSON
|
|
10
|
+
array on stdout. NEVER writes to the repo.
|
|
11
|
+
|
|
12
|
+
Design rules (PP-PLAN-040 Phase 2):
|
|
13
|
+
- Deterministic + pure-local. Same commit + same harness version => identical
|
|
14
|
+
verdict. The bundled JSON-Schema is validated by an EMBEDDED subset validator
|
|
15
|
+
(not ajv) precisely BECAUSE ajv's availability/version varies per box and would
|
|
16
|
+
make signed evidence non-reproducible. The embedded validator is complete for
|
|
17
|
+
the closed bundled schemas (which use only the keyword subset it supports).
|
|
18
|
+
- Genuinely-external formats shell out: OpenAPI -> spectral, Action YAML ->
|
|
19
|
+
yamllint. Missing tool => INDETERMINATE (advisory), never a false FAIL.
|
|
20
|
+
- Advisory-first. A conformance violation on an `enforcement: advisory` gate is
|
|
21
|
+
ADVISORY (severity error), exit 0 — the finding is logged, the build is not
|
|
22
|
+
reddened. Only an engineer-promoted `enforcement: blocking` gate (or --strict)
|
|
23
|
+
turns a violation into FAIL (exit 1).
|
|
24
|
+
- conform records the bundled schema's sha256 in the gate-result `policy_hash`,
|
|
25
|
+
so a row re-verifies against the exact schema version that produced it.
|
|
26
|
+
- Stdlib only (PyYAML used for frontmatter when present; absent => indeterminate
|
|
27
|
+
rather than a guessed verdict). No network. No filesystem mutation.
|
|
28
|
+
|
|
29
|
+
Usage:
|
|
30
|
+
python3 scripts/conform.py [REPO_PATH] [--json] [--strict] [--profile PATH|-]
|
|
31
|
+
AUDIT_HARNESS_DISABLE=1 python3 scripts/conform.py # kill-switch (no-op, exit 0)
|
|
32
|
+
"""
|
|
33
|
+
import argparse
|
|
34
|
+
import hashlib
|
|
35
|
+
import json
|
|
36
|
+
import os
|
|
37
|
+
import re
|
|
38
|
+
import shutil
|
|
39
|
+
import subprocess
|
|
40
|
+
import sys
|
|
41
|
+
from datetime import datetime, timezone
|
|
42
|
+
|
|
43
|
+
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
44
|
+
if HERE not in sys.path:
|
|
45
|
+
sys.path.insert(0, HERE)
|
|
46
|
+
import classify as C # noqa: E402 (sibling module; reused for the single-source profile)
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
import yaml # type: ignore
|
|
50
|
+
except Exception: # pragma: no cover - exercised only on boxes without PyYAML
|
|
51
|
+
yaml = None
|
|
52
|
+
|
|
53
|
+
SCHEMA_DIR = os.path.join(HERE, "..", "schemas", "conform", "v1")
|
|
54
|
+
EMPTY_SHA = "sha256:" + hashlib.sha256(b"").hexdigest()
|
|
55
|
+
|
|
56
|
+
# kind -> bundled schema filename (content-addressed in this harness version)
|
|
57
|
+
BUNDLED = {
|
|
58
|
+
"skillmd": "skillmd-frontmatter.schema.json",
|
|
59
|
+
"agent": "agent-frontmatter.schema.json",
|
|
60
|
+
"mcp": "mcp-config.schema.json",
|
|
61
|
+
"plugin": "plugin-manifest.schema.json",
|
|
62
|
+
}
|
|
63
|
+
FRONTMATTER_KINDS = {"skillmd", "agent"} # YAML frontmatter in a .md file
|
|
64
|
+
JSON_KINDS = {"mcp", "plugin", "marketplace", "hook"} # whole-file JSON
|
|
65
|
+
SHELLOUT = { # genuinely-external linters
|
|
66
|
+
"openapi": "spectral",
|
|
67
|
+
"action": "yamllint",
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# --------------------------------------------------------------------------- #
|
|
72
|
+
# Embedded JSON-Schema subset validator (complete for the closed bundled schemas)
|
|
73
|
+
# --------------------------------------------------------------------------- #
|
|
74
|
+
def _type_ok(value, t):
|
|
75
|
+
if t == "boolean":
|
|
76
|
+
return isinstance(value, bool)
|
|
77
|
+
if t == "integer":
|
|
78
|
+
return isinstance(value, int) and not isinstance(value, bool)
|
|
79
|
+
if t == "number":
|
|
80
|
+
return isinstance(value, (int, float)) and not isinstance(value, bool)
|
|
81
|
+
if t == "null":
|
|
82
|
+
return value is None
|
|
83
|
+
if t == "object":
|
|
84
|
+
return isinstance(value, dict)
|
|
85
|
+
if t == "array":
|
|
86
|
+
return isinstance(value, list)
|
|
87
|
+
if t == "string":
|
|
88
|
+
return isinstance(value, str)
|
|
89
|
+
return True
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def validate_instance(inst, schema, path="$"):
|
|
93
|
+
"""Return a list of human-readable violation strings ([] == valid)."""
|
|
94
|
+
errs = []
|
|
95
|
+
if not isinstance(schema, dict):
|
|
96
|
+
return errs
|
|
97
|
+
|
|
98
|
+
t = schema.get("type")
|
|
99
|
+
if t is not None:
|
|
100
|
+
types = t if isinstance(t, list) else [t]
|
|
101
|
+
if not any(_type_ok(inst, x) for x in types):
|
|
102
|
+
errs.append(f"{path}: expected type {t}, got {type(inst).__name__}")
|
|
103
|
+
return errs # downstream keyword checks are meaningless on a type mismatch
|
|
104
|
+
|
|
105
|
+
if "enum" in schema and inst not in schema["enum"]:
|
|
106
|
+
errs.append(f"{path}: {inst!r} not in enum {schema['enum']}")
|
|
107
|
+
if "const" in schema and inst != schema["const"]:
|
|
108
|
+
errs.append(f"{path}: {inst!r} != const {schema['const']!r}")
|
|
109
|
+
|
|
110
|
+
if isinstance(inst, str):
|
|
111
|
+
if "minLength" in schema and len(inst) < schema["minLength"]:
|
|
112
|
+
errs.append(f"{path}: shorter than minLength {schema['minLength']}")
|
|
113
|
+
if "maxLength" in schema and len(inst) > schema["maxLength"]:
|
|
114
|
+
errs.append(f"{path}: longer than maxLength {schema['maxLength']}")
|
|
115
|
+
if "pattern" in schema and re.search(schema["pattern"], inst) is None:
|
|
116
|
+
errs.append(f"{path}: does not match pattern {schema['pattern']!r}")
|
|
117
|
+
if schema.get("format") in ("uri", "url") and not re.match(r"^[a-zA-Z][a-zA-Z0-9+.\-]*:", inst):
|
|
118
|
+
errs.append(f"{path}: not a {schema['format']}")
|
|
119
|
+
|
|
120
|
+
if isinstance(inst, list):
|
|
121
|
+
if "minItems" in schema and len(inst) < schema["minItems"]:
|
|
122
|
+
errs.append(f"{path}: fewer than minItems {schema['minItems']}")
|
|
123
|
+
if "maxItems" in schema and len(inst) > schema["maxItems"]:
|
|
124
|
+
errs.append(f"{path}: more than maxItems {schema['maxItems']}")
|
|
125
|
+
item_schema = schema.get("items")
|
|
126
|
+
if isinstance(item_schema, dict):
|
|
127
|
+
for i, el in enumerate(inst):
|
|
128
|
+
errs += validate_instance(el, item_schema, f"{path}[{i}]")
|
|
129
|
+
|
|
130
|
+
if isinstance(inst, dict):
|
|
131
|
+
for req in schema.get("required", []):
|
|
132
|
+
if req not in inst:
|
|
133
|
+
errs.append(f"{path}: missing required property '{req}'")
|
|
134
|
+
props = schema.get("properties", {})
|
|
135
|
+
for k, sub in props.items():
|
|
136
|
+
if k in inst:
|
|
137
|
+
errs += validate_instance(inst[k], sub, f"{path}.{k}")
|
|
138
|
+
ap = schema.get("additionalProperties", True)
|
|
139
|
+
if ap is False:
|
|
140
|
+
for k in inst:
|
|
141
|
+
if k not in props:
|
|
142
|
+
errs.append(f"{path}: additional property '{k}' not allowed")
|
|
143
|
+
elif isinstance(ap, dict):
|
|
144
|
+
for k, v in inst.items():
|
|
145
|
+
if k not in props:
|
|
146
|
+
errs += validate_instance(v, ap, f"{path}.{k}")
|
|
147
|
+
|
|
148
|
+
for sub in schema.get("allOf", []):
|
|
149
|
+
errs += validate_instance(inst, sub, path)
|
|
150
|
+
if "anyOf" in schema:
|
|
151
|
+
if not any(not validate_instance(inst, sub, path) for sub in schema["anyOf"]):
|
|
152
|
+
errs.append(f"{path}: matches none of anyOf")
|
|
153
|
+
if "oneOf" in schema:
|
|
154
|
+
matches = sum(1 for sub in schema["oneOf"] if not validate_instance(inst, sub, path))
|
|
155
|
+
if matches != 1:
|
|
156
|
+
errs.append(f"{path}: matched {matches} of oneOf branches (need exactly 1)")
|
|
157
|
+
return errs
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# --------------------------------------------------------------------------- #
|
|
161
|
+
# Artifact location + parsing
|
|
162
|
+
# --------------------------------------------------------------------------- #
|
|
163
|
+
SKIP_DIRS = ("node_modules", ".git", ".venv", "dist", "build",
|
|
164
|
+
"fixtures", "tests", "test", "__tests__", "examples")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def find_files(repo, name, max_depth=3):
|
|
168
|
+
repo = os.path.abspath(repo)
|
|
169
|
+
out = []
|
|
170
|
+
for root, dirs, files in os.walk(repo):
|
|
171
|
+
dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
|
|
172
|
+
depth = root[len(repo):].count(os.sep)
|
|
173
|
+
if depth > max_depth:
|
|
174
|
+
dirs[:] = []
|
|
175
|
+
continue
|
|
176
|
+
if name in files:
|
|
177
|
+
out.append(os.path.join(root, name))
|
|
178
|
+
return sorted(out)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def locate(kind, repo):
|
|
182
|
+
"""Return the list of artifact file paths for a conformance kind."""
|
|
183
|
+
repo = os.path.abspath(repo)
|
|
184
|
+
|
|
185
|
+
def first_existing(*rels):
|
|
186
|
+
return [os.path.join(repo, r) for r in rels if os.path.isfile(os.path.join(repo, r))]
|
|
187
|
+
|
|
188
|
+
if kind == "skillmd":
|
|
189
|
+
return find_files(repo, "SKILL.md")
|
|
190
|
+
if kind == "agent":
|
|
191
|
+
out = []
|
|
192
|
+
for d in (os.path.join(repo, "agents"), os.path.join(repo, ".claude", "agents")):
|
|
193
|
+
if os.path.isdir(d):
|
|
194
|
+
out += [os.path.join(d, f) for f in sorted(os.listdir(d)) if f.endswith(".md")]
|
|
195
|
+
return out
|
|
196
|
+
if kind == "mcp":
|
|
197
|
+
out = first_existing(".mcp.json")
|
|
198
|
+
for sub in C.list_pkg_subdirs(repo):
|
|
199
|
+
p = os.path.join(sub, ".mcp.json")
|
|
200
|
+
if os.path.isfile(p):
|
|
201
|
+
out.append(p)
|
|
202
|
+
return out
|
|
203
|
+
if kind == "plugin":
|
|
204
|
+
return first_existing(".claude-plugin/plugin.json", "plugin.json")
|
|
205
|
+
if kind == "marketplace":
|
|
206
|
+
return first_existing(".claude-plugin/marketplace.json", "marketplace.json")
|
|
207
|
+
if kind == "hook":
|
|
208
|
+
out = first_existing("hooks/hooks.json")
|
|
209
|
+
hd = os.path.join(repo, ".claude", "hooks")
|
|
210
|
+
if os.path.isdir(hd):
|
|
211
|
+
out += [os.path.join(hd, f) for f in sorted(os.listdir(hd)) if f.endswith(".json")]
|
|
212
|
+
return out
|
|
213
|
+
if kind == "openapi":
|
|
214
|
+
return first_existing("openapi.yaml", "openapi.yml", "openapi.json",
|
|
215
|
+
"swagger.yaml", "swagger.json")
|
|
216
|
+
if kind == "action":
|
|
217
|
+
return first_existing("action.yml", "action.yaml")
|
|
218
|
+
return []
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def parse_json_file(path):
|
|
222
|
+
try:
|
|
223
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
224
|
+
return json.load(f), None
|
|
225
|
+
except json.JSONDecodeError as e:
|
|
226
|
+
return None, f"json-parse-error: {e}"
|
|
227
|
+
except Exception as e:
|
|
228
|
+
return None, f"read-error: {e}"
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def extract_frontmatter(path):
|
|
232
|
+
"""Return (dict, None) or (None, reason). Requires PyYAML for a reliable verdict."""
|
|
233
|
+
try:
|
|
234
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
235
|
+
text = f.read()
|
|
236
|
+
except Exception as e:
|
|
237
|
+
return None, f"read-error: {e}"
|
|
238
|
+
if not text.lstrip().startswith("---"):
|
|
239
|
+
return None, "no-frontmatter-block"
|
|
240
|
+
m = re.match(r"^?---[ \t]*\r?\n(.*?)\r?\n---[ \t]*(\r?\n|$)", text, re.DOTALL)
|
|
241
|
+
if not m:
|
|
242
|
+
return None, "unterminated-frontmatter-block"
|
|
243
|
+
block = m.group(1)
|
|
244
|
+
if yaml is None:
|
|
245
|
+
return None, "pyyaml-unavailable" # honest indeterminate, not a guessed parse
|
|
246
|
+
try:
|
|
247
|
+
data = yaml.safe_load(block)
|
|
248
|
+
except Exception as e:
|
|
249
|
+
return None, f"yaml-parse-error: {e}"
|
|
250
|
+
if data is None:
|
|
251
|
+
data = {}
|
|
252
|
+
if not isinstance(data, dict):
|
|
253
|
+
return None, "frontmatter-not-a-mapping"
|
|
254
|
+
return data, None
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
# --------------------------------------------------------------------------- #
|
|
258
|
+
# gate-result/v1 row construction
|
|
259
|
+
# --------------------------------------------------------------------------- #
|
|
260
|
+
def sha256_path(path):
|
|
261
|
+
try:
|
|
262
|
+
return C.sha256_file(path)
|
|
263
|
+
except Exception:
|
|
264
|
+
return EMPTY_SHA
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def sha256_str(s):
|
|
268
|
+
return "sha256:" + hashlib.sha256(s.encode("utf-8")).hexdigest()
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def make_row(gate_id, result, *, policy_hash, input_hash, commit_sha, runner,
|
|
272
|
+
metadata=None, failure_mode=None, advisory_severity=None):
|
|
273
|
+
row = {
|
|
274
|
+
"gate_id": gate_id,
|
|
275
|
+
"result": result,
|
|
276
|
+
"policy_hash": policy_hash,
|
|
277
|
+
"input_hash": input_hash,
|
|
278
|
+
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
279
|
+
"runner": runner,
|
|
280
|
+
"commit_sha": commit_sha,
|
|
281
|
+
}
|
|
282
|
+
if metadata:
|
|
283
|
+
row["metadata"] = metadata
|
|
284
|
+
if failure_mode is not None:
|
|
285
|
+
row["failure_mode"] = failure_mode
|
|
286
|
+
if advisory_severity is not None:
|
|
287
|
+
row["advisory_severity"] = advisory_severity
|
|
288
|
+
return row
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def verdict_for(errs, parse_err, enforcement, strict):
|
|
292
|
+
"""Map (violations, parse error, enforcement) -> (result, failure_mode, severity)."""
|
|
293
|
+
violated = bool(errs) or parse_err is not None
|
|
294
|
+
if not violated:
|
|
295
|
+
return "PASS", None, None
|
|
296
|
+
blocking = strict or enforcement == "blocking"
|
|
297
|
+
if blocking:
|
|
298
|
+
fm = "conform:parse-error" if parse_err is not None else "conform:schema-violation"
|
|
299
|
+
return "FAIL", fm, None
|
|
300
|
+
return "ADVISORY", None, "error"
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
# --------------------------------------------------------------------------- #
|
|
304
|
+
def compute_profile(repo, registry_path, profile_arg):
|
|
305
|
+
if profile_arg == "-":
|
|
306
|
+
return json.load(sys.stdin)
|
|
307
|
+
if profile_arg:
|
|
308
|
+
with open(profile_arg, "r", encoding="utf-8") as f:
|
|
309
|
+
return json.load(f)
|
|
310
|
+
out = subprocess.run(
|
|
311
|
+
[sys.executable, os.path.join(HERE, "classify.py"), repo, "--registry", registry_path],
|
|
312
|
+
capture_output=True, text=True,
|
|
313
|
+
)
|
|
314
|
+
if out.returncode != 0:
|
|
315
|
+
sys.stderr.write(out.stderr)
|
|
316
|
+
raise SystemExit(2)
|
|
317
|
+
return json.loads(out.stdout)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def kind_of(gate_id):
|
|
321
|
+
"""audit-harness:local:conform-skillmd -> 'skillmd'."""
|
|
322
|
+
seg = gate_id.rsplit(":", 1)[-1]
|
|
323
|
+
return seg[len("conform-"):] if seg.startswith("conform-") else seg
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def run_shellout(kind, gate, files, commit_sha, runner, repo, strict):
|
|
327
|
+
tool = SHELLOUT[kind]
|
|
328
|
+
rows = []
|
|
329
|
+
enforcement = gate.get("enforcement", "advisory")
|
|
330
|
+
if shutil.which(tool) is None:
|
|
331
|
+
rows.append(make_row(
|
|
332
|
+
gate["gate_id"], "ADVISORY",
|
|
333
|
+
policy_hash=sha256_str(f"{tool}:default"),
|
|
334
|
+
input_hash=sha256_path(files[0]) if files else EMPTY_SHA,
|
|
335
|
+
commit_sha=commit_sha, runner=runner, advisory_severity="warn",
|
|
336
|
+
metadata={"kind": kind, "validator": tool, "indeterminate": True,
|
|
337
|
+
"reason": f"{tool} not on PATH — conformance unmeasured"},
|
|
338
|
+
))
|
|
339
|
+
return rows
|
|
340
|
+
for art in files:
|
|
341
|
+
cmd = [tool, "lint", art] if tool == "spectral" else [tool, art]
|
|
342
|
+
try:
|
|
343
|
+
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
|
344
|
+
errs = [] if proc.returncode == 0 else [(proc.stdout or proc.stderr).strip()[:2000]]
|
|
345
|
+
parse_err = None
|
|
346
|
+
except Exception as e:
|
|
347
|
+
errs, parse_err = [], None
|
|
348
|
+
rows.append(make_row(
|
|
349
|
+
gate["gate_id"], "ADVISORY",
|
|
350
|
+
policy_hash=sha256_str(f"{tool}:default"), input_hash=sha256_path(art),
|
|
351
|
+
commit_sha=commit_sha, runner=runner, advisory_severity="warn",
|
|
352
|
+
metadata={"kind": kind, "validator": tool, "indeterminate": True,
|
|
353
|
+
"artifact_path": os.path.relpath(art, repo), "reason": str(e)},
|
|
354
|
+
))
|
|
355
|
+
continue
|
|
356
|
+
result, fm, sev = verdict_for(errs, parse_err, enforcement, strict)
|
|
357
|
+
rows.append(make_row(
|
|
358
|
+
gate["gate_id"], result,
|
|
359
|
+
policy_hash=sha256_str(f"{tool}:default"), input_hash=sha256_path(art),
|
|
360
|
+
commit_sha=commit_sha, runner=runner, failure_mode=fm, advisory_severity=sev,
|
|
361
|
+
metadata={"kind": kind, "validator": tool,
|
|
362
|
+
"artifact_path": os.path.relpath(art, repo),
|
|
363
|
+
"errors": errs[:20]},
|
|
364
|
+
))
|
|
365
|
+
return rows
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def run_bundled(kind, gate, files, commit_sha, runner, repo, strict):
|
|
369
|
+
rows = []
|
|
370
|
+
enforcement = gate.get("enforcement", "advisory")
|
|
371
|
+
schema_path = os.path.join(SCHEMA_DIR, BUNDLED[kind])
|
|
372
|
+
schema = C.read_json(schema_path)
|
|
373
|
+
if schema is None:
|
|
374
|
+
rows.append(make_row(
|
|
375
|
+
gate["gate_id"], "ADVISORY",
|
|
376
|
+
policy_hash=EMPTY_SHA, input_hash=EMPTY_SHA, commit_sha=commit_sha,
|
|
377
|
+
runner=runner, advisory_severity="warn",
|
|
378
|
+
metadata={"kind": kind, "indeterminate": True,
|
|
379
|
+
"reason": f"bundled schema missing at {BUNDLED[kind]}"},
|
|
380
|
+
))
|
|
381
|
+
return rows
|
|
382
|
+
policy_hash = C.sha256_file(schema_path)
|
|
383
|
+
schema_id = schema.get("$id", "")
|
|
384
|
+
for art in files:
|
|
385
|
+
if kind in FRONTMATTER_KINDS:
|
|
386
|
+
data, parse_err = extract_frontmatter(art)
|
|
387
|
+
else:
|
|
388
|
+
data, parse_err = parse_json_file(art)
|
|
389
|
+
|
|
390
|
+
if parse_err == "pyyaml-unavailable":
|
|
391
|
+
rows.append(make_row(
|
|
392
|
+
gate["gate_id"], "ADVISORY",
|
|
393
|
+
policy_hash=policy_hash, input_hash=sha256_path(art), commit_sha=commit_sha,
|
|
394
|
+
runner=runner, advisory_severity="warn",
|
|
395
|
+
metadata={"kind": kind, "validator": "audit-harness-embedded-subset",
|
|
396
|
+
"schema_id": schema_id, "indeterminate": True,
|
|
397
|
+
"artifact_path": os.path.relpath(art, repo),
|
|
398
|
+
"reason": "PyYAML unavailable — frontmatter conformance unmeasured"},
|
|
399
|
+
))
|
|
400
|
+
continue
|
|
401
|
+
|
|
402
|
+
errs = validate_instance(data, schema) if parse_err is None else []
|
|
403
|
+
result, fm, sev = verdict_for(errs, parse_err, enforcement, strict)
|
|
404
|
+
meta = {"kind": kind, "validator": "audit-harness-embedded-subset",
|
|
405
|
+
"schema_id": schema_id, "artifact_path": os.path.relpath(art, repo)}
|
|
406
|
+
if parse_err is not None:
|
|
407
|
+
meta["errors"] = [parse_err]
|
|
408
|
+
elif errs:
|
|
409
|
+
meta["errors"] = errs[:20]
|
|
410
|
+
rows.append(make_row(
|
|
411
|
+
gate["gate_id"], result, policy_hash=policy_hash, input_hash=sha256_path(art),
|
|
412
|
+
commit_sha=commit_sha, runner=runner, failure_mode=fm, advisory_severity=sev,
|
|
413
|
+
metadata=meta,
|
|
414
|
+
))
|
|
415
|
+
return rows
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def main():
|
|
419
|
+
ap = argparse.ArgumentParser(description="Read-only conformance gate-runner -> gate-result/v1 rows")
|
|
420
|
+
ap.add_argument("repo", nargs="?", default=".", help="Repo path (default: cwd)")
|
|
421
|
+
ap.add_argument("--json", action="store_true", help="Emit JSON (default; flag is for CLI symmetry)")
|
|
422
|
+
ap.add_argument("--strict", action="store_true",
|
|
423
|
+
help="Treat every conformance violation as FAIL (exit 1), ignoring advisory default")
|
|
424
|
+
ap.add_argument("--registry", default=C.DEFAULT_REGISTRY, help="Path to the dimension-to-gate registry")
|
|
425
|
+
ap.add_argument("--profile", default=None,
|
|
426
|
+
help="Use a pinned audit-profile/v1 (PATH or '-' for stdin) instead of classifying")
|
|
427
|
+
args = ap.parse_args()
|
|
428
|
+
|
|
429
|
+
repo = os.path.abspath(args.repo)
|
|
430
|
+
registry_path = os.path.abspath(args.registry)
|
|
431
|
+
runner = f"audit-harness@{C.harness_version()}"
|
|
432
|
+
|
|
433
|
+
override_path = os.path.join(repo, ".audit-harness.yml")
|
|
434
|
+
override = C.parse_override(override_path) if os.path.isfile(override_path) else {"disable": False}
|
|
435
|
+
if override.get("disable") or os.environ.get("AUDIT_HARNESS_DISABLE") == "1":
|
|
436
|
+
sys.stderr.write("audit-harness: KILL-SWITCH active — conform skipped (no rows emitted)\n")
|
|
437
|
+
print("[]")
|
|
438
|
+
sys.exit(0)
|
|
439
|
+
|
|
440
|
+
profile = compute_profile(repo, registry_path, args.profile)
|
|
441
|
+
commit_sha = profile.get("subject", {}).get("commit_sha") or C.git_short_sha(repo)
|
|
442
|
+
|
|
443
|
+
conf_gates = [g for g in profile.get("gates", [])
|
|
444
|
+
if g.get("dimension") == "conformance" and g.get("enforcement") != "disabled"]
|
|
445
|
+
|
|
446
|
+
rows = []
|
|
447
|
+
for gate in conf_gates:
|
|
448
|
+
kind = kind_of(gate["gate_id"])
|
|
449
|
+
files = locate(kind, repo)
|
|
450
|
+
if not files:
|
|
451
|
+
rows.append(make_row(
|
|
452
|
+
gate["gate_id"], "NOT_APPLICABLE",
|
|
453
|
+
policy_hash=EMPTY_SHA, input_hash=EMPTY_SHA, commit_sha=commit_sha,
|
|
454
|
+
runner=runner, metadata={"kind": kind, "reason": "no matching artifact found in repo"},
|
|
455
|
+
))
|
|
456
|
+
continue
|
|
457
|
+
if kind in BUNDLED:
|
|
458
|
+
rows += run_bundled(kind, gate, files, commit_sha, runner, repo, args.strict)
|
|
459
|
+
elif kind in SHELLOUT:
|
|
460
|
+
rows += run_shellout(kind, gate, files, commit_sha, runner, repo, args.strict)
|
|
461
|
+
else:
|
|
462
|
+
rows.append(make_row(
|
|
463
|
+
gate["gate_id"], "ADVISORY",
|
|
464
|
+
policy_hash=EMPTY_SHA, input_hash=sha256_path(files[0]), commit_sha=commit_sha,
|
|
465
|
+
runner=runner, advisory_severity="warn",
|
|
466
|
+
metadata={"kind": kind, "indeterminate": True,
|
|
467
|
+
"reason": f"no bundled conform schema for kind '{kind}' in this harness version"},
|
|
468
|
+
))
|
|
469
|
+
|
|
470
|
+
print(json.dumps(rows, indent=2))
|
|
471
|
+
|
|
472
|
+
n_fail = sum(1 for r in rows if r["result"] == "FAIL")
|
|
473
|
+
n_adv = sum(1 for r in rows if r["result"] == "ADVISORY")
|
|
474
|
+
n_pass = sum(1 for r in rows if r["result"] == "PASS")
|
|
475
|
+
sys.stderr.write(f"audit-harness conform: {n_pass} PASS, {n_adv} ADVISORY, {n_fail} FAIL "
|
|
476
|
+
f"across {len(rows)} row(s)\n")
|
|
477
|
+
sys.exit(1 if n_fail else 0)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
if __name__ == "__main__":
|
|
481
|
+
main()
|
package/scripts/crap-score.py
CHANGED
|
@@ -16,6 +16,7 @@ from __future__ import annotations
|
|
|
16
16
|
|
|
17
17
|
import argparse
|
|
18
18
|
import csv
|
|
19
|
+
import hashlib
|
|
19
20
|
import json
|
|
20
21
|
import os
|
|
21
22
|
import shutil
|
|
@@ -36,6 +37,19 @@ class MethodScore:
|
|
|
36
37
|
kind: str # "src" or "test"
|
|
37
38
|
|
|
38
39
|
|
|
40
|
+
# Directories to skip during candidate discovery AND the --json input-hash
|
|
41
|
+
# walk. Single source of truth — both call sites MUST use this set so a repo
|
|
42
|
+
# with `reports/` (or `.next/`, `.nuxt/`, `.cache/`) gets identical treatment
|
|
43
|
+
# in both the candidate scan and the input-hash computation. Adding a dir
|
|
44
|
+
# here removes it from BOTH passes; that's the invariant this constant exists
|
|
45
|
+
# to preserve.
|
|
46
|
+
EXCLUDED_DIRS = {
|
|
47
|
+
".git", ".venv", "venv", "node_modules", "__pycache__",
|
|
48
|
+
"dist", "build", "target", ".tox", ".mypy_cache", ".pytest_cache",
|
|
49
|
+
".next", ".nuxt", ".cache", "reports",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
39
53
|
def crap(complexity: int, coverage_pct: float) -> float:
|
|
40
54
|
cov = max(0.0, min(100.0, coverage_pct)) / 100.0
|
|
41
55
|
return (complexity ** 2) * ((1.0 - cov) ** 3) + complexity
|
|
@@ -81,12 +95,11 @@ def score_python(root: Path, kind: str) -> list[MethodScore]:
|
|
|
81
95
|
scanned = [t for t in candidates if (root / t).is_dir()]
|
|
82
96
|
if not scanned:
|
|
83
97
|
test_dirs = {"tests", "test", "spec", "specs", "features", "__tests__"}
|
|
84
|
-
ignore = {".git", ".venv", "venv", "node_modules", "dist", "build", "target", ".tox", ".mypy_cache", ".pytest_cache", "reports", "__pycache__"}
|
|
85
98
|
scanned = [
|
|
86
99
|
p.name for p in root.iterdir()
|
|
87
100
|
if p.is_dir()
|
|
88
101
|
and not p.name.startswith(".")
|
|
89
|
-
and p.name not in
|
|
102
|
+
and p.name not in EXCLUDED_DIRS
|
|
90
103
|
and p.name not in test_dirs
|
|
91
104
|
and any(p.rglob("*.py"))
|
|
92
105
|
]
|
|
@@ -171,7 +184,7 @@ def score_go(root: Path, kind: str) -> list[MethodScore]:
|
|
|
171
184
|
|
|
172
185
|
coverage: dict[str, float] = {}
|
|
173
186
|
cov_out = root / "coverage.out"
|
|
174
|
-
if not cov_out.is_file():
|
|
187
|
+
if not cov_out.is_file() and which_or_none("go"):
|
|
175
188
|
run(["go", "test", "-coverprofile=coverage.out", "-covermode=atomic", "./..."], root)
|
|
176
189
|
if cov_out.is_file() and which_or_none("go"):
|
|
177
190
|
rc, out, _ = run(["go", "tool", "cover", "-func=coverage.out"], root)
|
|
@@ -263,7 +276,6 @@ def score_rust(root: Path, kind: str) -> list[MethodScore]:
|
|
|
263
276
|
except json.JSONDecodeError:
|
|
264
277
|
continue
|
|
265
278
|
fpath = rec.get("name", "")
|
|
266
|
-
metrics = rec.get("metrics", {}).get("cyclomatic", {})
|
|
267
279
|
for func in rec.get("spaces", []):
|
|
268
280
|
c = int(func.get("metrics", {}).get("cyclomatic", {}).get("sum", 1))
|
|
269
281
|
complexity.append((fpath, func.get("name", "<anon>"), c))
|
|
@@ -302,6 +314,10 @@ def main() -> int:
|
|
|
302
314
|
help="Test CRAP max (default 15)")
|
|
303
315
|
ap.add_argument("--threshold-avg", type=float, default=10.0,
|
|
304
316
|
help="Project average max (default 10)")
|
|
317
|
+
ap.add_argument("--json", action="store_true",
|
|
318
|
+
help="Emit gate-result envelope JSON on stdout (suitable for piping "
|
|
319
|
+
"to `audit-harness emit-evidence`). Preserves existing CSV/JSON "
|
|
320
|
+
"files written under --out.")
|
|
305
321
|
args = ap.parse_args()
|
|
306
322
|
|
|
307
323
|
root = Path(args.root).resolve()
|
|
@@ -377,7 +393,51 @@ def main() -> int:
|
|
|
377
393
|
if args.format in ("json", "both"):
|
|
378
394
|
(out_dir / "summary.json").write_text(json.dumps(summary, indent=2))
|
|
379
395
|
|
|
380
|
-
|
|
396
|
+
if args.json:
|
|
397
|
+
side = os.environ.get("AUDIT_HARNESS_SIDE", "ci")
|
|
398
|
+
# input_hash: SHA256 over all production+test source-file contents under root, sorted.
|
|
399
|
+
# Use os.walk with directory pruning instead of rglob — large vendored trees
|
|
400
|
+
# (node_modules, .venv, .git, build outputs) would otherwise dominate the walk
|
|
401
|
+
# cost on big repos and waste IO on files we already filter out by extension.
|
|
402
|
+
digest = hashlib.sha256()
|
|
403
|
+
exts = (".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".java", ".kt", ".cs", ".php", ".rb")
|
|
404
|
+
collected: list[Path] = []
|
|
405
|
+
for dirpath, dirs, files in os.walk(root):
|
|
406
|
+
dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
|
|
407
|
+
for fn in files:
|
|
408
|
+
if fn.endswith(exts):
|
|
409
|
+
collected.append(Path(dirpath) / fn)
|
|
410
|
+
for fp in sorted(collected):
|
|
411
|
+
digest.update(fp.read_bytes())
|
|
412
|
+
input_hash = f"sha256:{digest.hexdigest()}"
|
|
413
|
+
# policy_hash: SHA256 over the threshold tuple (stable, deterministic)
|
|
414
|
+
policy_repr = f"prod={args.threshold_prod}|test={args.threshold_test}|avg={args.threshold_avg}".encode()
|
|
415
|
+
policy_hash = f"sha256:{hashlib.sha256(policy_repr).hexdigest()}"
|
|
416
|
+
result = "PASS" if pass_ else "FAIL"
|
|
417
|
+
envelope = {
|
|
418
|
+
"gate_id": f"audit-harness:{side}:crap-score",
|
|
419
|
+
"result": result,
|
|
420
|
+
"input_hash": input_hash,
|
|
421
|
+
"policy_hash": policy_hash,
|
|
422
|
+
"metadata": {
|
|
423
|
+
"language": lang,
|
|
424
|
+
"thresholds": summary["thresholds"],
|
|
425
|
+
"production_max_crap": summary["production"]["max_crap"],
|
|
426
|
+
"production_avg_crap": summary["production"]["avg_crap"],
|
|
427
|
+
"production_methods_scored": summary["production"]["methods_scored"],
|
|
428
|
+
"production_blockers_count": len(prod_blockers),
|
|
429
|
+
"test_max_crap": summary["test"]["max_crap"],
|
|
430
|
+
"test_methods_scored": summary["test"]["methods_scored"],
|
|
431
|
+
"test_blockers_count": len(test_blockers),
|
|
432
|
+
"avg_fail": avg_fail,
|
|
433
|
+
"summary_path": str(out_dir / "summary.json"),
|
|
434
|
+
},
|
|
435
|
+
}
|
|
436
|
+
if not pass_:
|
|
437
|
+
envelope["failure_mode"] = "crap-threshold-exceeded"
|
|
438
|
+
print(json.dumps(envelope))
|
|
439
|
+
else:
|
|
440
|
+
print(json.dumps({"pass": pass_, "summary_path": str(out_dir / "summary.json")}))
|
|
381
441
|
return 0 if pass_ else 1
|
|
382
442
|
|
|
383
443
|
|