@intentsolutions/audit-harness 0.1.0 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +435 -0
- package/LICENSE +202 -21
- package/NOTICE +15 -0
- package/README.md +36 -4
- package/bin/audit-harness.js +108 -8
- package/docs/gate-promotion.md +45 -0
- package/package.json +13 -9
- package/schemas/audit-profile/layer-applicability.md +146 -0
- package/schemas/audit-profile/registry.v1.json +87 -0
- package/schemas/audit-profile/v1.schema.json +294 -0
- package/schemas/conform/v1/agent-frontmatter.schema.json +24 -0
- package/schemas/conform/v1/mcp-config.schema.json +31 -0
- package/schemas/conform/v1/plugin-manifest.schema.json +26 -0
- package/schemas/conform/v1/skillmd-frontmatter.schema.json +40 -0
- package/schemas/currency/pins.v1.json +55 -0
- package/scripts/arch-check.sh +25 -1
- package/scripts/audit.py +386 -0
- package/scripts/bias-count.sh +50 -4
- package/scripts/classify.py +403 -0
- package/scripts/conform.py +481 -0
- package/scripts/crap-score.py +65 -5
- package/scripts/currency.py +118 -0
- package/scripts/emit-evidence.sh +256 -0
- package/scripts/escape-scan.sh +58 -4
- package/scripts/fp-rate.py +145 -0
- package/scripts/gen-layer-applicability.py +157 -0
- package/scripts/gherkin-lint.sh +53 -9
- package/scripts/harness-hash.sh +78 -5
- package/scripts/scan.py +228 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://evals.intentsolutions.io/conform/v1/skillmd-frontmatter.schema.json",
|
|
4
|
+
"title": "SKILL.md frontmatter — deterministic structural floor (conform/v1)",
|
|
5
|
+
"description": "The DETERMINISTIC STRUCTURAL FLOOR for a SKILL.md frontmatter block: it parses, carries the Anthropic-spec required pair (name + description), and the known optional fields are the right type. This is NOT the IS 100-point marketplace rubric or the 8-field enterprise required set — that grading is judgment-bearing and stays in /validate-skillmd (Tier 1) + the SAK authoring kernel. conform validates the floor; Claude adds judgment on top. additionalProperties is true because SKILL.md frontmatter is intentionally extensible (model, effort, context, hooks, metadata, user-invocable, etc.).",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["name", "description"],
|
|
8
|
+
"additionalProperties": true,
|
|
9
|
+
"properties": {
|
|
10
|
+
"name": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"minLength": 1,
|
|
13
|
+
"pattern": "^[a-zA-Z0-9][a-zA-Z0-9_-]*$"
|
|
14
|
+
},
|
|
15
|
+
"description": {
|
|
16
|
+
"type": "string",
|
|
17
|
+
"minLength": 1
|
|
18
|
+
},
|
|
19
|
+
"allowed-tools": {
|
|
20
|
+
"anyOf": [
|
|
21
|
+
{ "type": "string" },
|
|
22
|
+
{ "type": "array", "items": { "type": "string" } }
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
"disallowed-tools": {
|
|
26
|
+
"anyOf": [
|
|
27
|
+
{ "type": "string" },
|
|
28
|
+
{ "type": "array", "items": { "type": "string" } }
|
|
29
|
+
]
|
|
30
|
+
},
|
|
31
|
+
"version": { "type": "string" },
|
|
32
|
+
"author": { "type": "string" },
|
|
33
|
+
"license": { "type": "string" },
|
|
34
|
+
"compatibility": { "type": "string", "maxLength": 500 },
|
|
35
|
+
"tags": { "type": "array", "items": { "type": "string" } },
|
|
36
|
+
"user-invocable": { "type": "boolean" },
|
|
37
|
+
"disable-model-invocation": { "type": "boolean" },
|
|
38
|
+
"argument-hint": { "type": "string" }
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"pins_version": "currency-pins/v1",
|
|
3
|
+
"description": "Per-upstream-identity pin relation. Each upstream the harness/skills depend on carries ITS OWN pinned version + the date it was last verified against upstream (checked_at) + a staleness window. The `currency` advisory report reads this datum and flags pins whose checked_at is older than their window — i.e. it makes the PIN'S OWN STALENESS detectable, without ever live-fetching. Currency is advisory-only: it reports + (in /sync-testing-harness) opens PRs; it has no exit-code authority and never auto-fixes. Updating a pin (after a human re-verifies against upstream) is an engineer edit to this file + a fresh checked_at.",
|
|
4
|
+
"default_staleness_window_days": 90,
|
|
5
|
+
"pins": [
|
|
6
|
+
{
|
|
7
|
+
"identity": "mcp-spec",
|
|
8
|
+
"pinned_version": "2025-06-18",
|
|
9
|
+
"source": "https://spec.modelcontextprotocol.io/ (protocol revision)",
|
|
10
|
+
"checked_at": "2026-06-06",
|
|
11
|
+
"staleness_window_days": 90,
|
|
12
|
+
"notes": "MCP protocol spec revision the .mcp.json conform schema targets."
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"identity": "skill-md-schema",
|
|
16
|
+
"pinned_version": "3.7.0",
|
|
17
|
+
"source": "claude-code-plugins 000-docs/SCHEMA_CHANGELOG.md",
|
|
18
|
+
"checked_at": "2026-06-06",
|
|
19
|
+
"staleness_window_days": 90,
|
|
20
|
+
"notes": "IS SKILL.md schema the conform skillmd-frontmatter floor tracks (full rubric stays in /validate-skillmd)."
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"identity": "claude-code",
|
|
24
|
+
"pinned_version": "2.1.152",
|
|
25
|
+
"source": "https://code.claude.com/docs/en/changelog",
|
|
26
|
+
"checked_at": "2026-06-06",
|
|
27
|
+
"staleness_window_days": 60,
|
|
28
|
+
"notes": "Claude Code release (added disallowed-tools frontmatter at 2.1.152)."
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"identity": "gate-result-predicate",
|
|
32
|
+
"pinned_version": "v1",
|
|
33
|
+
"source": "@intentsolutions/core gate-result/v1 (https://evals.intentsolutions.io/gate-result/v1)",
|
|
34
|
+
"checked_at": "2026-06-06",
|
|
35
|
+
"staleness_window_days": 180,
|
|
36
|
+
"notes": "Evidence Bundle predicate every gate emits against. Immutable URI; bump = new version, never overwrite."
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"identity": "anthropic-sdk",
|
|
40
|
+
"pinned_version": "unverified",
|
|
41
|
+
"source": "https://github.com/anthropics/anthropic-sdk-python (+ -typescript)",
|
|
42
|
+
"checked_at": "2026-06-06",
|
|
43
|
+
"staleness_window_days": 90,
|
|
44
|
+
"notes": "Anthropic SDK surface referenced by downstream skills; pinned_version=unverified until first deliberate verification."
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"identity": "agentskills-spec",
|
|
48
|
+
"pinned_version": "1.0.0",
|
|
49
|
+
"source": "https://agentskills.io/specification",
|
|
50
|
+
"checked_at": "2026-06-06",
|
|
51
|
+
"staleness_window_days": 90,
|
|
52
|
+
"notes": "Open SKILL.md standard (compatibility/metadata/license fields)."
|
|
53
|
+
}
|
|
54
|
+
]
|
|
55
|
+
}
|
package/scripts/arch-check.sh
CHANGED
|
@@ -39,7 +39,31 @@ mkdir -p "$REPORT_DIR"
|
|
|
39
39
|
emit_result() {
|
|
40
40
|
local tool="$1" status="$2" violations="$3" log="$4"
|
|
41
41
|
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
42
|
-
|
|
42
|
+
# status: pass / fail / missing-tool / not-configured
|
|
43
|
+
local result
|
|
44
|
+
case "$status" in
|
|
45
|
+
pass) result="PASS" ;;
|
|
46
|
+
fail) result="FAIL" ;;
|
|
47
|
+
missing-tool|not-configured) result="NOT_APPLICABLE" ;;
|
|
48
|
+
*) result="ADVISORY" ;;
|
|
49
|
+
esac
|
|
50
|
+
local input_hash="sha256:0000000000000000000000000000000000000000000000000000000000000000"
|
|
51
|
+
local policy_hash="sha256:0000000000000000000000000000000000000000000000000000000000000000"
|
|
52
|
+
# Best-effort: input_hash is the source tree fingerprint when running against ROOT/src
|
|
53
|
+
if [[ -d "${ROOT}/src" ]]; then
|
|
54
|
+
input_hash=$(find "${ROOT}/src" -type f \( -name "*.ts" -o -name "*.tsx" -o -name "*.js" -o -name "*.py" -o -name "*.go" -o -name "*.rs" -o -name "*.java" -o -name "*.kt" -o -name "*.cs" -o -name "*.php" \) -exec sha256sum {} \; 2>/dev/null | sort | sha256sum | awk '{print "sha256:"$1}')
|
|
55
|
+
fi
|
|
56
|
+
# Hash the architecture rule config (whichever tool's config was used)
|
|
57
|
+
for cfg in .dependency-cruiser.js .dependency-cruiser.cjs .importlinter deptrac.yaml arch-go.yml; do
|
|
58
|
+
if [[ -f "${ROOT}/${cfg}" ]]; then
|
|
59
|
+
policy_hash=$(sha256sum "${ROOT}/${cfg}" | awk '{print "sha256:"$1}')
|
|
60
|
+
break
|
|
61
|
+
fi
|
|
62
|
+
done
|
|
63
|
+
local fail_block=""
|
|
64
|
+
[[ "$result" == "FAIL" ]] && fail_block=',"failure_mode":"arch-violation"'
|
|
65
|
+
printf '{"gate_id":"audit-harness:%s:arch-check","result":"%s"%s,"input_hash":"%s","policy_hash":"%s","metadata":{"tool":"%s","status":"%s","violations":%s,"log":"%s"}}\n' \
|
|
66
|
+
"${AUDIT_HARNESS_SIDE:-ci}" "$result" "$fail_block" "$input_hash" "$policy_hash" \
|
|
43
67
|
"$tool" "$status" "$violations" "$log"
|
|
44
68
|
else
|
|
45
69
|
echo "arch-check: tool=$tool status=$status violations=$violations"
|
package/scripts/audit.py
ADDED
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
audit-harness audit — read-only testing-depth gate-runner (PP-PLAN-040 Phase 3 / E5).
|
|
4
|
+
|
|
5
|
+
For every `dimension: testing-depth` gate in a repo's audit-profile/v1, audit
|
|
6
|
+
assesses the gate and emits a `gate-result/v1` row (JSON array, stdout). It is the
|
|
7
|
+
"finish the pyramid" diagnostic: it reports which testing-depth LAYERS a repo has
|
|
8
|
+
infrastructure for, advisory-first.
|
|
9
|
+
|
|
10
|
+
Two assessment strategies, both read-only:
|
|
11
|
+
- crap-score -> runs the bundled `crap` scorer (static complexity x coverage).
|
|
12
|
+
- presence -> a per-layer static heuristic (test dirs, framework configs,
|
|
13
|
+
dependency markers). Layer infra present -> PASS; absent ->
|
|
14
|
+
ADVISORY(warn) "testing-depth gap"; unknowable statically ->
|
|
15
|
+
ADVISORY indeterminate.
|
|
16
|
+
|
|
17
|
+
What audit deliberately does NOT do: execute the repo's test suite. Running
|
|
18
|
+
arbitrary, untrusted test suites is the job of the repo's own CI test step — the
|
|
19
|
+
harness wraps that step's verdict into Evidence, it does not replace it. audit
|
|
20
|
+
reports COVERAGE PRESENCE; execution stays in CI. Each row records its
|
|
21
|
+
`metadata.method` so the assessment provenance is explicit.
|
|
22
|
+
|
|
23
|
+
--fast (default): presence heuristics only (<10s on a reference repo).
|
|
24
|
+
--deep: presence + crap-score.
|
|
25
|
+
--strict: a testing-depth gap on an `enforcement: blocking` gate -> FAIL.
|
|
26
|
+
|
|
27
|
+
Stdlib only. No network. No filesystem mutation.
|
|
28
|
+
"""
|
|
29
|
+
import argparse
|
|
30
|
+
import hashlib
|
|
31
|
+
import json
|
|
32
|
+
import os
|
|
33
|
+
import subprocess
|
|
34
|
+
import sys
|
|
35
|
+
from datetime import datetime, timezone
|
|
36
|
+
|
|
37
|
+
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
38
|
+
if HERE not in sys.path:
|
|
39
|
+
sys.path.insert(0, HERE)
|
|
40
|
+
import classify as C # noqa: E402
|
|
41
|
+
|
|
42
|
+
EMPTY_SHA = "sha256:" + hashlib.sha256(b"").hexdigest()
|
|
43
|
+
|
|
44
|
+
SKIP_DIRS = ("node_modules", ".git", ".venv", "dist", "build", "vendor", "target")
|
|
45
|
+
|
|
46
|
+
# Gates assessed quickly (presence heuristics) belong to the fast tier; crap-score
|
|
47
|
+
# is deep-only because it shells out to radon/gocyclo and can be slow.
|
|
48
|
+
DEEP_ONLY = {"crap-score"}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def sha256_str(s):
|
|
52
|
+
return "sha256:" + hashlib.sha256(s.encode("utf-8")).hexdigest()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# --------------------------------------------------------------------------- #
|
|
56
|
+
# repo signal collectors
|
|
57
|
+
# --------------------------------------------------------------------------- #
|
|
58
|
+
def collect_node_deps(repo):
|
|
59
|
+
deps = {}
|
|
60
|
+
for pkgpath in [os.path.join(repo, "package.json")] + [
|
|
61
|
+
os.path.join(s, "package.json") for s in C.list_pkg_subdirs(repo)
|
|
62
|
+
]:
|
|
63
|
+
pkg = C.read_json(pkgpath)
|
|
64
|
+
if isinstance(pkg, dict):
|
|
65
|
+
for k in ("dependencies", "devDependencies"):
|
|
66
|
+
if isinstance(pkg.get(k), dict):
|
|
67
|
+
deps.update(pkg[k])
|
|
68
|
+
return deps
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def node_test_script(repo):
|
|
72
|
+
pkg = C.read_json(os.path.join(repo, "package.json")) or {}
|
|
73
|
+
scripts = pkg.get("scripts") if isinstance(pkg, dict) else None
|
|
74
|
+
return isinstance(scripts, dict) and bool(scripts.get("test"))
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def py_dep_text(repo):
|
|
78
|
+
txt = ""
|
|
79
|
+
for f in ("requirements.txt", "pyproject.toml", "Pipfile", "setup.cfg", "tox.ini"):
|
|
80
|
+
p = os.path.join(repo, f)
|
|
81
|
+
if os.path.isfile(p):
|
|
82
|
+
try:
|
|
83
|
+
txt += open(p, "r", encoding="utf-8").read().lower()
|
|
84
|
+
except Exception:
|
|
85
|
+
pass
|
|
86
|
+
return txt
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def walk_names(repo, max_depth=4):
|
|
90
|
+
"""Yield (dirpath_rel, dirnames, filenames) skipping vendor/build dirs."""
|
|
91
|
+
repo = os.path.abspath(repo)
|
|
92
|
+
for root, dirs, files in os.walk(repo):
|
|
93
|
+
dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
|
|
94
|
+
depth = root[len(repo):].count(os.sep)
|
|
95
|
+
if depth > max_depth:
|
|
96
|
+
dirs[:] = []
|
|
97
|
+
continue
|
|
98
|
+
yield root, dirs, files
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def has_dir(repo, *names):
|
|
102
|
+
targets = set(names)
|
|
103
|
+
for _root, dirs, _files in walk_names(repo):
|
|
104
|
+
if targets & set(dirs):
|
|
105
|
+
return True
|
|
106
|
+
return False
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def has_file_matching(repo, predicate):
|
|
110
|
+
for _root, _dirs, files in walk_names(repo):
|
|
111
|
+
if any(predicate(f) for f in files):
|
|
112
|
+
return True
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def has_glob_suffix(repo, *suffixes):
|
|
117
|
+
return has_file_matching(repo, lambda f: any(f.endswith(s) for s in suffixes))
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# --------------------------------------------------------------------------- #
|
|
121
|
+
# per-layer presence detectors -> (present: bool|None, signal: str)
|
|
122
|
+
# present True -> infra detected (PASS)
|
|
123
|
+
# present False -> no infra detected (ADVISORY gap)
|
|
124
|
+
# present None -> not assessable statically (ADVISORY indeterminate)
|
|
125
|
+
# --------------------------------------------------------------------------- #
|
|
126
|
+
def d_unit(repo, deps):
|
|
127
|
+
if node_test_script(repo):
|
|
128
|
+
return True, "package.json scripts.test"
|
|
129
|
+
if any(x in deps for x in ("vitest", "jest", "mocha", "ava", "@jest/core", "node:test")):
|
|
130
|
+
return True, "node test framework dep"
|
|
131
|
+
txt = py_dep_text(repo)
|
|
132
|
+
if any(x in txt for x in ("pytest", "unittest", "nose")):
|
|
133
|
+
return True, "python test framework"
|
|
134
|
+
if has_glob_suffix(repo, "_test.go"):
|
|
135
|
+
return True, "go *_test.go"
|
|
136
|
+
if has_dir(repo, "tests", "test", "__tests__") or \
|
|
137
|
+
has_glob_suffix(repo, ".test.ts", ".test.js", ".spec.ts", ".spec.js"):
|
|
138
|
+
return True, "test dir / *.test|spec file"
|
|
139
|
+
if has_file_matching(repo, lambda f: f.startswith("test_") and f.endswith(".py")):
|
|
140
|
+
return True, "python test_*.py"
|
|
141
|
+
return False, "no unit test infrastructure detected"
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def d_integration(repo, deps):
|
|
145
|
+
if has_dir(repo, "integration") or \
|
|
146
|
+
has_glob_suffix(repo, ".integration.test.ts", ".integration.test.js", ".int.test.ts"):
|
|
147
|
+
return True, "integration test dir/files"
|
|
148
|
+
if any(x in deps for x in ("testcontainers", "supertest")):
|
|
149
|
+
return True, "integration tooling dep"
|
|
150
|
+
if "tests/integration" in py_dep_text(repo):
|
|
151
|
+
return True, "python integration tests"
|
|
152
|
+
return False, "no integration test infrastructure detected"
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def d_e2e(repo, deps):
|
|
156
|
+
if any(x in deps for x in ("@playwright/test", "playwright", "cypress", "puppeteer", "@testing-library/react")):
|
|
157
|
+
return True, "e2e framework dep"
|
|
158
|
+
cfgs = ("playwright.config.ts", "cypress.config.ts", "cypress.config.js")
|
|
159
|
+
if has_dir(repo, "e2e") or any(os.path.isfile(os.path.join(repo, c)) for c in cfgs):
|
|
160
|
+
return True, "e2e config/dir"
|
|
161
|
+
return False, "no e2e test infrastructure detected"
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def d_smoke(repo, deps):
|
|
165
|
+
pkg = C.read_json(os.path.join(repo, "package.json")) or {}
|
|
166
|
+
scripts = pkg.get("scripts") if isinstance(pkg, dict) else {}
|
|
167
|
+
if isinstance(scripts, dict) and any("smoke" in k for k in scripts):
|
|
168
|
+
return True, "package.json smoke script"
|
|
169
|
+
if has_dir(repo, "smoke") or has_file_matching(repo, lambda f: "smoke" in f.lower()):
|
|
170
|
+
return True, "smoke test dir/file"
|
|
171
|
+
return False, "no smoke test detected"
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def d_perf(repo, deps):
|
|
175
|
+
if any(x in deps for x in ("benchmark", "tinybench", "vitest-bench", "k6", "autocannon")):
|
|
176
|
+
return True, "perf/bench dep"
|
|
177
|
+
if has_dir(repo, "bench", "benchmark", "benchmarks", "perf") or \
|
|
178
|
+
has_glob_suffix(repo, ".bench.ts", ".bench.js", "_bench.go"):
|
|
179
|
+
return True, "bench dir/files"
|
|
180
|
+
return False, "no performance test infrastructure detected"
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def d_a11y(repo, deps):
|
|
184
|
+
if any(x in deps for x in ("axe-core", "@axe-core/playwright", "jest-axe", "pa11y")):
|
|
185
|
+
return True, "a11y tooling dep"
|
|
186
|
+
return False, "no accessibility test infrastructure detected"
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def d_contract(repo, deps):
|
|
190
|
+
if any(x in deps for x in ("@pact-foundation/pact", "pact")):
|
|
191
|
+
return True, "contract testing dep (pact)"
|
|
192
|
+
if has_dir(repo, "contract", "contracts", "pacts"):
|
|
193
|
+
return True, "contract test dir"
|
|
194
|
+
return False, "no contract test infrastructure detected"
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def d_migration(repo, deps):
|
|
198
|
+
if has_dir(repo, "migrations", "migration"):
|
|
199
|
+
return True, "migrations dir"
|
|
200
|
+
if any(x in deps for x in ("prisma", "knex", "typeorm", "drizzle-kit")):
|
|
201
|
+
return True, "migration tooling dep"
|
|
202
|
+
if any(x in py_dep_text(repo) for x in ("alembic", "django")):
|
|
203
|
+
return True, "python migration tooling"
|
|
204
|
+
return False, "no migration test infrastructure detected"
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def d_property(repo, deps):
|
|
208
|
+
if any(x in deps for x in ("fast-check", "jsverify")):
|
|
209
|
+
return True, "property-based dep (fast-check)"
|
|
210
|
+
if any(x in py_dep_text(repo) for x in ("hypothesis",)):
|
|
211
|
+
return True, "python hypothesis"
|
|
212
|
+
if "proptest" in py_dep_text(repo) or has_glob_suffix(repo, "_proptest.rs"):
|
|
213
|
+
return True, "rust proptest"
|
|
214
|
+
return False, "no property-based test infrastructure detected"
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def d_fuzz(repo, deps):
|
|
218
|
+
if has_dir(repo, "fuzz") or has_glob_suffix(repo, "_fuzz.go", "fuzz_target.rs"):
|
|
219
|
+
return True, "fuzz dir/targets"
|
|
220
|
+
if any(x in deps for x in ("@jazzer.js/core", "jazzer")) or "atheris" in py_dep_text(repo):
|
|
221
|
+
return True, "fuzz tooling dep"
|
|
222
|
+
return False, "no fuzz test infrastructure detected"
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def d_sanitizers(repo, deps):
|
|
226
|
+
for f in ("Makefile", "CMakeLists.txt"):
|
|
227
|
+
p = os.path.join(repo, f)
|
|
228
|
+
if os.path.isfile(p):
|
|
229
|
+
try:
|
|
230
|
+
if "-fsanitize" in open(p, "r", encoding="utf-8").read():
|
|
231
|
+
return True, "-fsanitize in build config"
|
|
232
|
+
except Exception:
|
|
233
|
+
pass
|
|
234
|
+
return False, "no sanitizer configuration detected"
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
DETECTORS = {
|
|
238
|
+
"unit": d_unit, "integration": d_integration, "e2e": d_e2e, "smoke": d_smoke,
|
|
239
|
+
"perf": d_perf, "a11y": d_a11y, "contract": d_contract, "migration": d_migration,
|
|
240
|
+
"property-based": d_property, "fuzz": d_fuzz, "sanitizers": d_sanitizers,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# --------------------------------------------------------------------------- #
|
|
245
|
+
def make_row(gate_id, result, *, policy_hash, input_hash, commit_sha, runner,
|
|
246
|
+
metadata=None, failure_mode=None, advisory_severity=None):
|
|
247
|
+
row = {
|
|
248
|
+
"gate_id": gate_id, "result": result, "policy_hash": policy_hash,
|
|
249
|
+
"input_hash": input_hash,
|
|
250
|
+
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
251
|
+
"runner": runner, "commit_sha": commit_sha,
|
|
252
|
+
}
|
|
253
|
+
if metadata:
|
|
254
|
+
row["metadata"] = metadata
|
|
255
|
+
if failure_mode is not None:
|
|
256
|
+
row["failure_mode"] = failure_mode
|
|
257
|
+
if advisory_severity is not None:
|
|
258
|
+
row["advisory_severity"] = advisory_severity
|
|
259
|
+
return row
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def gate_suffix(gate_id):
|
|
263
|
+
return gate_id.rsplit(":", 1)[-1]
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def run_crap(repo, gate, commit_sha, runner, strict):
|
|
267
|
+
enforcement = gate.get("enforcement", "advisory")
|
|
268
|
+
try:
|
|
269
|
+
proc = subprocess.run([sys.executable, os.path.join(HERE, "crap-score.py")],
|
|
270
|
+
cwd=repo, capture_output=True, text=True, timeout=120)
|
|
271
|
+
ok = proc.returncode == 0
|
|
272
|
+
detail = (proc.stdout or proc.stderr).strip().splitlines()[-1:] if (proc.stdout or proc.stderr) else []
|
|
273
|
+
except Exception as e:
|
|
274
|
+
return make_row(gate["gate_id"], "ADVISORY", policy_hash=sha256_str("crap:default"),
|
|
275
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
276
|
+
advisory_severity="warn",
|
|
277
|
+
metadata={"method": "crap-static", "indeterminate": True, "reason": str(e)})
|
|
278
|
+
if ok:
|
|
279
|
+
return make_row(gate["gate_id"], "PASS", policy_hash=sha256_str("crap:default"),
|
|
280
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
281
|
+
metadata={"method": "crap-static", "detail": detail})
|
|
282
|
+
result, fm, sev = ("FAIL", "testing-depth:crap-threshold", None) if (strict or enforcement == "blocking") \
|
|
283
|
+
else ("ADVISORY", None, "error")
|
|
284
|
+
return make_row(gate["gate_id"], result, policy_hash=sha256_str("crap:default"),
|
|
285
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
286
|
+
failure_mode=fm, advisory_severity=sev,
|
|
287
|
+
metadata={"method": "crap-static", "detail": detail})
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def run_presence(suffix, repo, deps, gate, commit_sha, runner, strict):
|
|
291
|
+
enforcement = gate.get("enforcement", "advisory")
|
|
292
|
+
present, signal = DETECTORS[suffix](repo, deps)
|
|
293
|
+
if present is True:
|
|
294
|
+
return make_row(gate["gate_id"], "PASS", policy_hash=sha256_str(f"presence:{suffix}"),
|
|
295
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
296
|
+
metadata={"method": "presence-heuristic", "layer": suffix, "signal": signal})
|
|
297
|
+
if present is None:
|
|
298
|
+
return make_row(gate["gate_id"], "ADVISORY", policy_hash=sha256_str(f"presence:{suffix}"),
|
|
299
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
300
|
+
advisory_severity="warn",
|
|
301
|
+
metadata={"method": "presence-heuristic", "layer": suffix,
|
|
302
|
+
"indeterminate": True, "reason": signal})
|
|
303
|
+
# gap
|
|
304
|
+
result, fm, sev = ("FAIL", f"testing-depth:{suffix}-gap", None) if (strict or enforcement == "blocking") \
|
|
305
|
+
else ("ADVISORY", None, "warn")
|
|
306
|
+
return make_row(gate["gate_id"], result, policy_hash=sha256_str(f"presence:{suffix}"),
|
|
307
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
308
|
+
failure_mode=fm, advisory_severity=sev,
|
|
309
|
+
metadata={"method": "presence-heuristic", "layer": suffix, "reason": signal})
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def compute_profile(repo, registry_path, profile_arg):
|
|
313
|
+
if profile_arg == "-":
|
|
314
|
+
return json.load(sys.stdin)
|
|
315
|
+
if profile_arg:
|
|
316
|
+
with open(profile_arg, "r", encoding="utf-8") as f:
|
|
317
|
+
return json.load(f)
|
|
318
|
+
out = subprocess.run([sys.executable, os.path.join(HERE, "classify.py"), repo,
|
|
319
|
+
"--registry", registry_path], capture_output=True, text=True)
|
|
320
|
+
if out.returncode != 0:
|
|
321
|
+
sys.stderr.write(out.stderr)
|
|
322
|
+
raise SystemExit(2)
|
|
323
|
+
return json.loads(out.stdout)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def main():
|
|
327
|
+
ap = argparse.ArgumentParser(description="Read-only testing-depth gate-runner -> gate-result/v1 rows")
|
|
328
|
+
ap.add_argument("repo", nargs="?", default=".")
|
|
329
|
+
ap.add_argument("--fast", action="store_true", help="presence heuristics only (default tier)")
|
|
330
|
+
ap.add_argument("--deep", action="store_true", help="presence + crap-score")
|
|
331
|
+
ap.add_argument("--strict", action="store_true", help="treat a testing-depth gap as FAIL (exit 1)")
|
|
332
|
+
ap.add_argument("--registry", default=C.DEFAULT_REGISTRY)
|
|
333
|
+
ap.add_argument("--profile", default=None, help="pinned audit-profile/v1 (PATH or '-')")
|
|
334
|
+
args = ap.parse_args()
|
|
335
|
+
|
|
336
|
+
deep = args.deep and not args.fast
|
|
337
|
+
repo = os.path.abspath(args.repo)
|
|
338
|
+
runner = f"audit-harness@{C.harness_version()}"
|
|
339
|
+
|
|
340
|
+
override_path = os.path.join(repo, ".audit-harness.yml")
|
|
341
|
+
override = C.parse_override(override_path) if os.path.isfile(override_path) else {"disable": False}
|
|
342
|
+
if override.get("disable") or os.environ.get("AUDIT_HARNESS_DISABLE") == "1":
|
|
343
|
+
sys.stderr.write("audit-harness: KILL-SWITCH active — audit skipped (no rows emitted)\n")
|
|
344
|
+
print("[]")
|
|
345
|
+
sys.exit(0)
|
|
346
|
+
|
|
347
|
+
profile = compute_profile(repo, os.path.abspath(args.registry), args.profile)
|
|
348
|
+
commit_sha = profile.get("subject", {}).get("commit_sha") or C.git_short_sha(repo)
|
|
349
|
+
deps = collect_node_deps(repo)
|
|
350
|
+
|
|
351
|
+
gates = [g for g in profile.get("gates", [])
|
|
352
|
+
if g.get("dimension") == "testing-depth" and g.get("enforcement") != "disabled"]
|
|
353
|
+
|
|
354
|
+
rows = []
|
|
355
|
+
for gate in gates:
|
|
356
|
+
suffix = gate_suffix(gate["gate_id"])
|
|
357
|
+
if suffix == "crap-score":
|
|
358
|
+
if not deep:
|
|
359
|
+
rows.append(make_row(gate["gate_id"], "ADVISORY", policy_hash=sha256_str("crap:default"),
|
|
360
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
361
|
+
advisory_severity="info",
|
|
362
|
+
metadata={"method": "crap-static", "skipped": "deep-only (run with --deep)"}))
|
|
363
|
+
else:
|
|
364
|
+
rows.append(run_crap(repo, gate, commit_sha, runner, args.strict))
|
|
365
|
+
elif suffix in DETECTORS:
|
|
366
|
+
rows.append(run_presence(suffix, repo, deps, gate, commit_sha, runner, args.strict))
|
|
367
|
+
else:
|
|
368
|
+
# e.g. per-package-classify — assessment delegated, not a static signal
|
|
369
|
+
rows.append(make_row(gate["gate_id"], "ADVISORY", policy_hash=sha256_str(f"audit:{suffix}"),
|
|
370
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
371
|
+
advisory_severity="info",
|
|
372
|
+
metadata={"method": "delegated", "indeterminate": True,
|
|
373
|
+
"reason": f"'{suffix}' has no static testing-depth heuristic "
|
|
374
|
+
f"in this harness version"}))
|
|
375
|
+
|
|
376
|
+
print(json.dumps(rows, indent=2))
|
|
377
|
+
n_fail = sum(1 for r in rows if r["result"] == "FAIL")
|
|
378
|
+
n_gap = sum(1 for r in rows if r["result"] == "ADVISORY" and r.get("advisory_severity") == "warn")
|
|
379
|
+
n_pass = sum(1 for r in rows if r["result"] == "PASS")
|
|
380
|
+
sys.stderr.write(f"audit-harness audit ({'deep' if deep else 'fast'}): {n_pass} PASS, "
|
|
381
|
+
f"{n_gap} gap(s), {n_fail} FAIL across {len(rows)} testing-depth gate(s)\n")
|
|
382
|
+
sys.exit(1 if n_fail else 0)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
if __name__ == "__main__":
|
|
386
|
+
main()
|
package/scripts/bias-count.sh
CHANGED
|
@@ -1,20 +1,48 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
# Quick test bias pattern counter
|
|
3
|
-
# Usage: bash bias-count.sh [test-directory]
|
|
3
|
+
# Usage: bash bias-count.sh [test-directory] [--json]
|
|
4
4
|
#
|
|
5
5
|
# Scans test files for common bias patterns that weaken test suites.
|
|
6
6
|
# See references/test-quality-deep-audit.md Section 1 for full details.
|
|
7
|
+
#
|
|
8
|
+
# JSON mode:
|
|
9
|
+
# stdout = single JSON object suitable for piping to `audit-harness emit-evidence`
|
|
10
|
+
# stderr = unchanged human-readable summary (preserves backward-compat)
|
|
11
|
+
# exit code unchanged (always 0; advisory gate)
|
|
7
12
|
|
|
8
13
|
set -euo pipefail
|
|
9
14
|
|
|
10
|
-
|
|
15
|
+
JSON_OUT=0
|
|
16
|
+
TEST_DIR="tests"
|
|
17
|
+
|
|
18
|
+
# Peel --json from anywhere; first non-flag positional is TEST_DIR.
|
|
19
|
+
_pos=()
|
|
20
|
+
for arg in "$@"; do
|
|
21
|
+
case "$arg" in
|
|
22
|
+
--json) JSON_OUT=1 ;;
|
|
23
|
+
*) _pos+=("$arg") ;;
|
|
24
|
+
esac
|
|
25
|
+
done
|
|
26
|
+
[[ "${#_pos[@]}" -gt 0 ]] && TEST_DIR="${_pos[0]}"
|
|
11
27
|
|
|
12
28
|
if [ ! -d "$TEST_DIR" ]; then
|
|
13
|
-
|
|
14
|
-
|
|
29
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
30
|
+
printf '{"gate_id":"audit-harness:%s:bias-count","result":"NOT_APPLICABLE","input_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","policy_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","metadata":{"reason":"test directory not found","path":"%s"}}\n' \
|
|
31
|
+
"${AUDIT_HARNESS_SIDE:-ci}" "$TEST_DIR"
|
|
32
|
+
fi
|
|
33
|
+
echo "ERROR: Test directory '$TEST_DIR' not found" >&2
|
|
34
|
+
echo "Usage: bash bias-count.sh [test-directory] [--json]" >&2
|
|
15
35
|
exit 1
|
|
16
36
|
fi
|
|
17
37
|
|
|
38
|
+
# Hash the test directory tree as the "input"
|
|
39
|
+
INPUT_HASH=$(find "$TEST_DIR" -type f \( -name "*.py" -o -name "*.ts" -o -name "*.js" -o -name "*.tsx" -o -name "*.jsx" -o -name "*.go" -o -name "*.rs" -o -name "*.java" -o -name "*.kt" -o -name "*.cs" -o -name "*.php" -o -name "*.rb" \) -exec sha256sum {} + 2>/dev/null | sort | sha256sum | awk '{print "sha256:"$1}')
|
|
40
|
+
|
|
41
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
42
|
+
exec 3>&1 # save stdout for the JSON object
|
|
43
|
+
exec 1>&2 # redirect human-readable to stderr
|
|
44
|
+
fi
|
|
45
|
+
|
|
18
46
|
echo "═══════════════════════════════════════"
|
|
19
47
|
echo " TEST BIAS SCAN — $TEST_DIR"
|
|
20
48
|
echo "═══════════════════════════════════════"
|
|
@@ -75,14 +103,32 @@ printf " %-30s %s\n" "Per-100-tests rate" "$RATE"
|
|
|
75
103
|
echo
|
|
76
104
|
|
|
77
105
|
# Grade
|
|
106
|
+
GRADE="LOW"
|
|
78
107
|
if [ "$(echo "$RATE <= 5" | bc)" -eq 1 ]; then
|
|
108
|
+
GRADE="LOW"
|
|
79
109
|
echo " Grade: LOW — no action needed"
|
|
80
110
|
elif [ "$(echo "$RATE <= 15" | bc)" -eq 1 ]; then
|
|
111
|
+
GRADE="MODERATE"
|
|
81
112
|
echo " Grade: MODERATE — review flagged tests"
|
|
82
113
|
elif [ "$(echo "$RATE <= 30" | bc)" -eq 1 ]; then
|
|
114
|
+
GRADE="HIGH"
|
|
83
115
|
echo " Grade: HIGH — systematic remediation needed"
|
|
84
116
|
else
|
|
117
|
+
GRADE="CRITICAL"
|
|
85
118
|
echo " Grade: CRITICAL — full rewrite of flagged tests"
|
|
86
119
|
fi
|
|
87
120
|
echo
|
|
88
121
|
echo "═══════════════════════════════════════"
|
|
122
|
+
|
|
123
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
124
|
+
# Restore stdout for JSON emission
|
|
125
|
+
exec 1>&3 3>&-
|
|
126
|
+
# bias-count is advisory — never FAILs, severity rises with grade
|
|
127
|
+
case "$GRADE" in
|
|
128
|
+
LOW) sev="info" ;;
|
|
129
|
+
MODERATE) sev="warn" ;;
|
|
130
|
+
HIGH|CRITICAL) sev="error" ;;
|
|
131
|
+
esac
|
|
132
|
+
printf '{"gate_id":"audit-harness:%s:bias-count","result":"ADVISORY","advisory_severity":"%s","input_hash":"%s","policy_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","metadata":{"test_count":%d,"assertion_count":%d,"assertion_density":"%s","bias_total":%d,"per_100_rate":"%s","grade":"%s"}}\n' \
|
|
133
|
+
"${AUDIT_HARNESS_SIDE:-ci}" "$sev" "$INPUT_HASH" "$TEST_COUNT" "$ASSERT_COUNT" "$DENSITY" "$TOTAL_BIAS" "$RATE" "$GRADE"
|
|
134
|
+
fi
|