@intentsolutions/audit-harness 0.1.0 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +364 -0
- package/LICENSE +202 -21
- package/NOTICE +15 -0
- package/README.md +36 -4
- package/bin/audit-harness.js +18 -8
- package/package.json +11 -9
- package/scripts/arch-check.sh +25 -1
- package/scripts/bias-count.sh +50 -4
- package/scripts/crap-score.py +65 -5
- package/scripts/emit-evidence.sh +256 -0
- package/scripts/escape-scan.sh +58 -4
- package/scripts/gherkin-lint.sh +53 -9
- package/scripts/harness-hash.sh +78 -5
package/README.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# @intentsolutions/audit-harness
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/@intentsolutions/audit-harness)
|
|
4
|
+
[](LICENSE)
|
|
5
|
+
[](https://www.npmjs.com/package/@intentsolutions/audit-harness)
|
|
6
|
+
|
|
7
|
+
Part of the **[Intent Eval Platform](https://github.com/intent-solutions-io/intent-eval-platform)** — the umbrella mapping the six repos that converge via a shared Evidence Bundle schema.
|
|
8
|
+
|
|
3
9
|
Deterministic test-enforcement toolkit. Companion to the `audit-tests` and `implement-tests` Claude Code skills — but usable standalone in any repo that wants hash-pinned, escape-scanned, AI-proof quality gates.
|
|
4
10
|
|
|
5
11
|
## What it is
|
|
@@ -19,12 +25,36 @@ A small CLI wrapping 6 deterministic scripts:
|
|
|
19
25
|
|
|
20
26
|
## Install
|
|
21
27
|
|
|
28
|
+
Pick the install flavor that matches your repo's ecosystem — all three publish the same CLI surface.
|
|
29
|
+
|
|
30
|
+
**Node / JS / TS** (from npm):
|
|
31
|
+
|
|
22
32
|
```bash
|
|
23
33
|
pnpm add -D @intentsolutions/audit-harness
|
|
24
34
|
# or: npm install --save-dev @intentsolutions/audit-harness
|
|
25
35
|
# or: yarn add --dev @intentsolutions/audit-harness
|
|
26
36
|
```
|
|
27
37
|
|
|
38
|
+
**Python** (from PyPI):
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install intent-audit-harness
|
|
42
|
+
# or inside a project venv:
|
|
43
|
+
python -m pip install intent-audit-harness
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**Rust** (from crates.io):
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
cargo install intent-audit-harness
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
**Any other language** (Go, Ruby, PHP, Java, .NET, shell, etc.) — vendor the scripts:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
curl -sSL https://raw.githubusercontent.com/jeremylongshore/intent-audit-harness/main/install.sh | bash
|
|
56
|
+
```
|
|
57
|
+
|
|
28
58
|
## Quick usage
|
|
29
59
|
|
|
30
60
|
### Pre-commit hook (`.husky/pre-commit`)
|
|
@@ -75,7 +105,7 @@ See `audit-tests/references/philosophy.md` in the companion skill for the full r
|
|
|
75
105
|
|
|
76
106
|
This harness sits inside a larger framework:
|
|
77
107
|
|
|
78
|
-
```
|
|
108
|
+
```text
|
|
79
109
|
L7 Acceptance / RTM / Personas / Journeys ← WHAT are we proving?
|
|
80
110
|
L6 E2E / BDD / Visual regression ← User-level guarantees
|
|
81
111
|
L5 Perf / Security (SAST/DAST) / A11y / Chaos ← Non-functional
|
|
@@ -115,12 +145,14 @@ Most scripts are language-agnostic (shell + regex). CRAP has per-language backen
|
|
|
115
145
|
|
|
116
146
|
## License
|
|
117
147
|
|
|
118
|
-
|
|
148
|
+
Apache 2.0 — see [LICENSE](./LICENSE) and [NOTICE](./NOTICE).
|
|
149
|
+
|
|
150
|
+
**Note:** versions `0.x` shipped under the MIT license. Starting with `v1.0.0`, the project is licensed under Apache 2.0. Existing `0.x` releases on npm remain available under their original MIT terms; new releases (`>= 1.0.0`) are Apache 2.0.
|
|
119
151
|
|
|
120
152
|
## Related
|
|
121
153
|
|
|
122
|
-
- [`audit-tests` Claude Code skill](https://github.com/jeremylongshore/audit-harness#related) — diagnostic pipeline that uses this harness
|
|
123
|
-
- [`implement-tests` Claude Code skill](https://github.com/jeremylongshore/audit-harness#related) — filesystem-mutating installer that installs this harness as part of L1/L3 setup
|
|
154
|
+
- [`audit-tests` Claude Code skill](https://github.com/jeremylongshore/intent-audit-harness#related) — diagnostic pipeline that uses this harness
|
|
155
|
+
- [`implement-tests` Claude Code skill](https://github.com/jeremylongshore/intent-audit-harness#related) — filesystem-mutating installer that installs this harness as part of L1/L3 setup
|
|
124
156
|
|
|
125
157
|
## Versioning
|
|
126
158
|
|
package/bin/audit-harness.js
CHANGED
|
@@ -13,14 +13,15 @@ const { existsSync } = require('node:fs');
|
|
|
13
13
|
const SCRIPTS = resolve(__dirname, '..', 'scripts');
|
|
14
14
|
|
|
15
15
|
const COMMANDS = {
|
|
16
|
-
'verify':
|
|
17
|
-
'init':
|
|
18
|
-
'list':
|
|
19
|
-
'escape-scan':
|
|
20
|
-
'arch':
|
|
21
|
-
'bias':
|
|
22
|
-
'gherkin-lint':
|
|
23
|
-
'crap':
|
|
16
|
+
'verify': { script: 'harness-hash.sh', args: ['--verify'] },
|
|
17
|
+
'init': { script: 'harness-hash.sh', args: ['--init'] },
|
|
18
|
+
'list': { script: 'harness-hash.sh', args: ['--list'] },
|
|
19
|
+
'escape-scan': { script: 'escape-scan.sh', args: [] },
|
|
20
|
+
'arch': { script: 'arch-check.sh', args: [] },
|
|
21
|
+
'bias': { script: 'bias-count.sh', args: [] },
|
|
22
|
+
'gherkin-lint': { script: 'gherkin-lint.sh', args: [] },
|
|
23
|
+
'crap': { script: 'crap-score.py', args: [] },
|
|
24
|
+
'emit-evidence': { script: 'emit-evidence.sh', args: [] },
|
|
24
25
|
};
|
|
25
26
|
|
|
26
27
|
function usage() {
|
|
@@ -39,6 +40,15 @@ Commands:
|
|
|
39
40
|
bias Count test-bias patterns (tautology, smoke-only, etc.)
|
|
40
41
|
gherkin-lint Advisory Gherkin quality check
|
|
41
42
|
crap [args...] CRAP complexity × coverage scorer (multi-language)
|
|
43
|
+
emit-evidence Wrap a gate-result JSON envelope in an in-toto
|
|
44
|
+
Statement v1 (predicate https://evals.intentsolutions.io/gate-result/v1)
|
|
45
|
+
Read JSON on stdin: <gate> --json | audit-harness emit-evidence
|
|
46
|
+
|
|
47
|
+
Evidence Bundle (v0.3.0+):
|
|
48
|
+
All gates support --json to emit machine-readable gate-result envelopes
|
|
49
|
+
suitable for piping to emit-evidence. See SEMVER.md for compatibility rules
|
|
50
|
+
and intent-eval-lab/specs/evidence-bundle/v0.1.0-draft/SPEC.md for the
|
|
51
|
+
envelope schema.
|
|
42
52
|
|
|
43
53
|
Options:
|
|
44
54
|
--version, -v Print version
|
package/package.json
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@intentsolutions/audit-harness",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "1.1.5",
|
|
4
4
|
"description": "Deterministic test-enforcement harness — escape-scan, hash-pinning, CRAP, architecture checks, bias detection, Gherkin lint. Companion to the audit-tests and implement-tests Claude Code skills.",
|
|
5
|
-
"license": "
|
|
5
|
+
"license": "Apache-2.0",
|
|
6
6
|
"author": "Jeremy Longshore <jeremy@intentsolutions.io>",
|
|
7
|
-
"homepage": "https://github.com/jeremylongshore/audit-harness",
|
|
7
|
+
"homepage": "https://github.com/jeremylongshore/intent-audit-harness",
|
|
8
8
|
"repository": {
|
|
9
9
|
"type": "git",
|
|
10
|
-
"url": "git+https://github.com/jeremylongshore/audit-harness.git"
|
|
10
|
+
"url": "git+https://github.com/jeremylongshore/intent-audit-harness.git"
|
|
11
11
|
},
|
|
12
12
|
"bugs": {
|
|
13
|
-
"url": "https://github.com/jeremylongshore/audit-harness/issues"
|
|
13
|
+
"url": "https://github.com/jeremylongshore/intent-audit-harness/issues"
|
|
14
14
|
},
|
|
15
15
|
"keywords": [
|
|
16
16
|
"testing",
|
|
@@ -33,15 +33,17 @@
|
|
|
33
33
|
"scripts",
|
|
34
34
|
"README.md",
|
|
35
35
|
"LICENSE",
|
|
36
|
+
"NOTICE",
|
|
36
37
|
"CHANGELOG.md"
|
|
37
38
|
],
|
|
39
|
+
"scripts": {
|
|
40
|
+
"test": "bash scripts/escape-scan.sh --staged || true",
|
|
41
|
+
"prepublishOnly": "node bin/audit-harness.js --version"
|
|
42
|
+
},
|
|
38
43
|
"publishConfig": {
|
|
39
44
|
"access": "public"
|
|
40
45
|
},
|
|
41
46
|
"engines": {
|
|
42
47
|
"node": ">=18"
|
|
43
|
-
},
|
|
44
|
-
"scripts": {
|
|
45
|
-
"test": "bash scripts/escape-scan.sh --staged || true"
|
|
46
48
|
}
|
|
47
|
-
}
|
|
49
|
+
}
|
package/scripts/arch-check.sh
CHANGED
|
@@ -39,7 +39,31 @@ mkdir -p "$REPORT_DIR"
|
|
|
39
39
|
emit_result() {
|
|
40
40
|
local tool="$1" status="$2" violations="$3" log="$4"
|
|
41
41
|
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
42
|
-
|
|
42
|
+
# status: pass / fail / missing-tool / not-configured
|
|
43
|
+
local result
|
|
44
|
+
case "$status" in
|
|
45
|
+
pass) result="PASS" ;;
|
|
46
|
+
fail) result="FAIL" ;;
|
|
47
|
+
missing-tool|not-configured) result="NOT_APPLICABLE" ;;
|
|
48
|
+
*) result="ADVISORY" ;;
|
|
49
|
+
esac
|
|
50
|
+
local input_hash="sha256:0000000000000000000000000000000000000000000000000000000000000000"
|
|
51
|
+
local policy_hash="sha256:0000000000000000000000000000000000000000000000000000000000000000"
|
|
52
|
+
# Best-effort: input_hash is the source tree fingerprint when running against ROOT/src
|
|
53
|
+
if [[ -d "${ROOT}/src" ]]; then
|
|
54
|
+
input_hash=$(find "${ROOT}/src" -type f \( -name "*.ts" -o -name "*.tsx" -o -name "*.js" -o -name "*.py" -o -name "*.go" -o -name "*.rs" -o -name "*.java" -o -name "*.kt" -o -name "*.cs" -o -name "*.php" \) -exec sha256sum {} \; 2>/dev/null | sort | sha256sum | awk '{print "sha256:"$1}')
|
|
55
|
+
fi
|
|
56
|
+
# Hash the architecture rule config (whichever tool's config was used)
|
|
57
|
+
for cfg in .dependency-cruiser.js .dependency-cruiser.cjs .importlinter deptrac.yaml arch-go.yml; do
|
|
58
|
+
if [[ -f "${ROOT}/${cfg}" ]]; then
|
|
59
|
+
policy_hash=$(sha256sum "${ROOT}/${cfg}" | awk '{print "sha256:"$1}')
|
|
60
|
+
break
|
|
61
|
+
fi
|
|
62
|
+
done
|
|
63
|
+
local fail_block=""
|
|
64
|
+
[[ "$result" == "FAIL" ]] && fail_block=',"failure_mode":"arch-violation"'
|
|
65
|
+
printf '{"gate_id":"audit-harness:%s:arch-check","result":"%s"%s,"input_hash":"%s","policy_hash":"%s","metadata":{"tool":"%s","status":"%s","violations":%s,"log":"%s"}}\n' \
|
|
66
|
+
"${AUDIT_HARNESS_SIDE:-ci}" "$result" "$fail_block" "$input_hash" "$policy_hash" \
|
|
43
67
|
"$tool" "$status" "$violations" "$log"
|
|
44
68
|
else
|
|
45
69
|
echo "arch-check: tool=$tool status=$status violations=$violations"
|
package/scripts/bias-count.sh
CHANGED
|
@@ -1,20 +1,48 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
# Quick test bias pattern counter
|
|
3
|
-
# Usage: bash bias-count.sh [test-directory]
|
|
3
|
+
# Usage: bash bias-count.sh [test-directory] [--json]
|
|
4
4
|
#
|
|
5
5
|
# Scans test files for common bias patterns that weaken test suites.
|
|
6
6
|
# See references/test-quality-deep-audit.md Section 1 for full details.
|
|
7
|
+
#
|
|
8
|
+
# JSON mode:
|
|
9
|
+
# stdout = single JSON object suitable for piping to `audit-harness emit-evidence`
|
|
10
|
+
# stderr = unchanged human-readable summary (preserves backward-compat)
|
|
11
|
+
# exit code unchanged (always 0; advisory gate)
|
|
7
12
|
|
|
8
13
|
set -euo pipefail
|
|
9
14
|
|
|
10
|
-
|
|
15
|
+
JSON_OUT=0
|
|
16
|
+
TEST_DIR="tests"
|
|
17
|
+
|
|
18
|
+
# Peel --json from anywhere; first non-flag positional is TEST_DIR.
|
|
19
|
+
_pos=()
|
|
20
|
+
for arg in "$@"; do
|
|
21
|
+
case "$arg" in
|
|
22
|
+
--json) JSON_OUT=1 ;;
|
|
23
|
+
*) _pos+=("$arg") ;;
|
|
24
|
+
esac
|
|
25
|
+
done
|
|
26
|
+
[[ "${#_pos[@]}" -gt 0 ]] && TEST_DIR="${_pos[0]}"
|
|
11
27
|
|
|
12
28
|
if [ ! -d "$TEST_DIR" ]; then
|
|
13
|
-
|
|
14
|
-
|
|
29
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
30
|
+
printf '{"gate_id":"audit-harness:%s:bias-count","result":"NOT_APPLICABLE","input_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","policy_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","metadata":{"reason":"test directory not found","path":"%s"}}\n' \
|
|
31
|
+
"${AUDIT_HARNESS_SIDE:-ci}" "$TEST_DIR"
|
|
32
|
+
fi
|
|
33
|
+
echo "ERROR: Test directory '$TEST_DIR' not found" >&2
|
|
34
|
+
echo "Usage: bash bias-count.sh [test-directory] [--json]" >&2
|
|
15
35
|
exit 1
|
|
16
36
|
fi
|
|
17
37
|
|
|
38
|
+
# Hash the test directory tree as the "input"
|
|
39
|
+
INPUT_HASH=$(find "$TEST_DIR" -type f \( -name "*.py" -o -name "*.ts" -o -name "*.js" -o -name "*.tsx" -o -name "*.jsx" -o -name "*.go" -o -name "*.rs" -o -name "*.java" -o -name "*.kt" -o -name "*.cs" -o -name "*.php" -o -name "*.rb" \) -exec sha256sum {} + 2>/dev/null | sort | sha256sum | awk '{print "sha256:"$1}')
|
|
40
|
+
|
|
41
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
42
|
+
exec 3>&1 # save stdout for the JSON object
|
|
43
|
+
exec 1>&2 # redirect human-readable to stderr
|
|
44
|
+
fi
|
|
45
|
+
|
|
18
46
|
echo "═══════════════════════════════════════"
|
|
19
47
|
echo " TEST BIAS SCAN — $TEST_DIR"
|
|
20
48
|
echo "═══════════════════════════════════════"
|
|
@@ -75,14 +103,32 @@ printf " %-30s %s\n" "Per-100-tests rate" "$RATE"
|
|
|
75
103
|
echo
|
|
76
104
|
|
|
77
105
|
# Grade
|
|
106
|
+
GRADE="LOW"
|
|
78
107
|
if [ "$(echo "$RATE <= 5" | bc)" -eq 1 ]; then
|
|
108
|
+
GRADE="LOW"
|
|
79
109
|
echo " Grade: LOW — no action needed"
|
|
80
110
|
elif [ "$(echo "$RATE <= 15" | bc)" -eq 1 ]; then
|
|
111
|
+
GRADE="MODERATE"
|
|
81
112
|
echo " Grade: MODERATE — review flagged tests"
|
|
82
113
|
elif [ "$(echo "$RATE <= 30" | bc)" -eq 1 ]; then
|
|
114
|
+
GRADE="HIGH"
|
|
83
115
|
echo " Grade: HIGH — systematic remediation needed"
|
|
84
116
|
else
|
|
117
|
+
GRADE="CRITICAL"
|
|
85
118
|
echo " Grade: CRITICAL — full rewrite of flagged tests"
|
|
86
119
|
fi
|
|
87
120
|
echo
|
|
88
121
|
echo "═══════════════════════════════════════"
|
|
122
|
+
|
|
123
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
124
|
+
# Restore stdout for JSON emission
|
|
125
|
+
exec 1>&3 3>&-
|
|
126
|
+
# bias-count is advisory — never FAILs, severity rises with grade
|
|
127
|
+
case "$GRADE" in
|
|
128
|
+
LOW) sev="info" ;;
|
|
129
|
+
MODERATE) sev="warn" ;;
|
|
130
|
+
HIGH|CRITICAL) sev="error" ;;
|
|
131
|
+
esac
|
|
132
|
+
printf '{"gate_id":"audit-harness:%s:bias-count","result":"ADVISORY","advisory_severity":"%s","input_hash":"%s","policy_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","metadata":{"test_count":%d,"assertion_count":%d,"assertion_density":"%s","bias_total":%d,"per_100_rate":"%s","grade":"%s"}}\n' \
|
|
133
|
+
"${AUDIT_HARNESS_SIDE:-ci}" "$sev" "$INPUT_HASH" "$TEST_COUNT" "$ASSERT_COUNT" "$DENSITY" "$TOTAL_BIAS" "$RATE" "$GRADE"
|
|
134
|
+
fi
|
package/scripts/crap-score.py
CHANGED
|
@@ -16,6 +16,7 @@ from __future__ import annotations
|
|
|
16
16
|
|
|
17
17
|
import argparse
|
|
18
18
|
import csv
|
|
19
|
+
import hashlib
|
|
19
20
|
import json
|
|
20
21
|
import os
|
|
21
22
|
import shutil
|
|
@@ -36,6 +37,19 @@ class MethodScore:
|
|
|
36
37
|
kind: str # "src" or "test"
|
|
37
38
|
|
|
38
39
|
|
|
40
|
+
# Directories to skip during candidate discovery AND the --json input-hash
|
|
41
|
+
# walk. Single source of truth — both call sites MUST use this set so a repo
|
|
42
|
+
# with `reports/` (or `.next/`, `.nuxt/`, `.cache/`) gets identical treatment
|
|
43
|
+
# in both the candidate scan and the input-hash computation. Adding a dir
|
|
44
|
+
# here removes it from BOTH passes; that's the invariant this constant exists
|
|
45
|
+
# to preserve.
|
|
46
|
+
EXCLUDED_DIRS = {
|
|
47
|
+
".git", ".venv", "venv", "node_modules", "__pycache__",
|
|
48
|
+
"dist", "build", "target", ".tox", ".mypy_cache", ".pytest_cache",
|
|
49
|
+
".next", ".nuxt", ".cache", "reports",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
39
53
|
def crap(complexity: int, coverage_pct: float) -> float:
|
|
40
54
|
cov = max(0.0, min(100.0, coverage_pct)) / 100.0
|
|
41
55
|
return (complexity ** 2) * ((1.0 - cov) ** 3) + complexity
|
|
@@ -81,12 +95,11 @@ def score_python(root: Path, kind: str) -> list[MethodScore]:
|
|
|
81
95
|
scanned = [t for t in candidates if (root / t).is_dir()]
|
|
82
96
|
if not scanned:
|
|
83
97
|
test_dirs = {"tests", "test", "spec", "specs", "features", "__tests__"}
|
|
84
|
-
ignore = {".git", ".venv", "venv", "node_modules", "dist", "build", "target", ".tox", ".mypy_cache", ".pytest_cache", "reports", "__pycache__"}
|
|
85
98
|
scanned = [
|
|
86
99
|
p.name for p in root.iterdir()
|
|
87
100
|
if p.is_dir()
|
|
88
101
|
and not p.name.startswith(".")
|
|
89
|
-
and p.name not in
|
|
102
|
+
and p.name not in EXCLUDED_DIRS
|
|
90
103
|
and p.name not in test_dirs
|
|
91
104
|
and any(p.rglob("*.py"))
|
|
92
105
|
]
|
|
@@ -171,7 +184,7 @@ def score_go(root: Path, kind: str) -> list[MethodScore]:
|
|
|
171
184
|
|
|
172
185
|
coverage: dict[str, float] = {}
|
|
173
186
|
cov_out = root / "coverage.out"
|
|
174
|
-
if not cov_out.is_file():
|
|
187
|
+
if not cov_out.is_file() and which_or_none("go"):
|
|
175
188
|
run(["go", "test", "-coverprofile=coverage.out", "-covermode=atomic", "./..."], root)
|
|
176
189
|
if cov_out.is_file() and which_or_none("go"):
|
|
177
190
|
rc, out, _ = run(["go", "tool", "cover", "-func=coverage.out"], root)
|
|
@@ -263,7 +276,6 @@ def score_rust(root: Path, kind: str) -> list[MethodScore]:
|
|
|
263
276
|
except json.JSONDecodeError:
|
|
264
277
|
continue
|
|
265
278
|
fpath = rec.get("name", "")
|
|
266
|
-
metrics = rec.get("metrics", {}).get("cyclomatic", {})
|
|
267
279
|
for func in rec.get("spaces", []):
|
|
268
280
|
c = int(func.get("metrics", {}).get("cyclomatic", {}).get("sum", 1))
|
|
269
281
|
complexity.append((fpath, func.get("name", "<anon>"), c))
|
|
@@ -302,6 +314,10 @@ def main() -> int:
|
|
|
302
314
|
help="Test CRAP max (default 15)")
|
|
303
315
|
ap.add_argument("--threshold-avg", type=float, default=10.0,
|
|
304
316
|
help="Project average max (default 10)")
|
|
317
|
+
ap.add_argument("--json", action="store_true",
|
|
318
|
+
help="Emit gate-result envelope JSON on stdout (suitable for piping "
|
|
319
|
+
"to `audit-harness emit-evidence`). Preserves existing CSV/JSON "
|
|
320
|
+
"files written under --out.")
|
|
305
321
|
args = ap.parse_args()
|
|
306
322
|
|
|
307
323
|
root = Path(args.root).resolve()
|
|
@@ -377,7 +393,51 @@ def main() -> int:
|
|
|
377
393
|
if args.format in ("json", "both"):
|
|
378
394
|
(out_dir / "summary.json").write_text(json.dumps(summary, indent=2))
|
|
379
395
|
|
|
380
|
-
|
|
396
|
+
if args.json:
|
|
397
|
+
side = os.environ.get("AUDIT_HARNESS_SIDE", "ci")
|
|
398
|
+
# input_hash: SHA256 over all production+test source-file contents under root, sorted.
|
|
399
|
+
# Use os.walk with directory pruning instead of rglob — large vendored trees
|
|
400
|
+
# (node_modules, .venv, .git, build outputs) would otherwise dominate the walk
|
|
401
|
+
# cost on big repos and waste IO on files we already filter out by extension.
|
|
402
|
+
digest = hashlib.sha256()
|
|
403
|
+
exts = (".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".java", ".kt", ".cs", ".php", ".rb")
|
|
404
|
+
collected: list[Path] = []
|
|
405
|
+
for dirpath, dirs, files in os.walk(root):
|
|
406
|
+
dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
|
|
407
|
+
for fn in files:
|
|
408
|
+
if fn.endswith(exts):
|
|
409
|
+
collected.append(Path(dirpath) / fn)
|
|
410
|
+
for fp in sorted(collected):
|
|
411
|
+
digest.update(fp.read_bytes())
|
|
412
|
+
input_hash = f"sha256:{digest.hexdigest()}"
|
|
413
|
+
# policy_hash: SHA256 over the threshold tuple (stable, deterministic)
|
|
414
|
+
policy_repr = f"prod={args.threshold_prod}|test={args.threshold_test}|avg={args.threshold_avg}".encode()
|
|
415
|
+
policy_hash = f"sha256:{hashlib.sha256(policy_repr).hexdigest()}"
|
|
416
|
+
result = "PASS" if pass_ else "FAIL"
|
|
417
|
+
envelope = {
|
|
418
|
+
"gate_id": f"audit-harness:{side}:crap-score",
|
|
419
|
+
"result": result,
|
|
420
|
+
"input_hash": input_hash,
|
|
421
|
+
"policy_hash": policy_hash,
|
|
422
|
+
"metadata": {
|
|
423
|
+
"language": lang,
|
|
424
|
+
"thresholds": summary["thresholds"],
|
|
425
|
+
"production_max_crap": summary["production"]["max_crap"],
|
|
426
|
+
"production_avg_crap": summary["production"]["avg_crap"],
|
|
427
|
+
"production_methods_scored": summary["production"]["methods_scored"],
|
|
428
|
+
"production_blockers_count": len(prod_blockers),
|
|
429
|
+
"test_max_crap": summary["test"]["max_crap"],
|
|
430
|
+
"test_methods_scored": summary["test"]["methods_scored"],
|
|
431
|
+
"test_blockers_count": len(test_blockers),
|
|
432
|
+
"avg_fail": avg_fail,
|
|
433
|
+
"summary_path": str(out_dir / "summary.json"),
|
|
434
|
+
},
|
|
435
|
+
}
|
|
436
|
+
if not pass_:
|
|
437
|
+
envelope["failure_mode"] = "crap-threshold-exceeded"
|
|
438
|
+
print(json.dumps(envelope))
|
|
439
|
+
else:
|
|
440
|
+
print(json.dumps({"pass": pass_, "summary_path": str(out_dir / "summary.json")}))
|
|
381
441
|
return 0 if pass_ else 1
|
|
382
442
|
|
|
383
443
|
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# emit-evidence.sh — wrap a gate-result JSON envelope in an in-toto Statement v1.
|
|
3
|
+
#
|
|
4
|
+
# Reads a gate-result envelope JSON document from stdin (or --input), augments it
|
|
5
|
+
# with the fields the runner knows (timestamp, runner version, commit_sha), and
|
|
6
|
+
# emits a complete in-toto Statement v1 to stdout. Optionally signs the Statement
|
|
7
|
+
# via `cosign sign-blob` and/or pushes to the Rekor transparency log.
|
|
8
|
+
#
|
|
9
|
+
# Per intent-eval-lab/specs/evidence-bundle/v0.1.0-draft/SPEC.md the emitted
|
|
10
|
+
# Statement carries predicateType https://evals.intentsolutions.io/gate-result/v1.
|
|
11
|
+
#
|
|
12
|
+
# Usage:
|
|
13
|
+
# <gate> --json | bash emit-evidence.sh # unsigned, prints Statement
|
|
14
|
+
# bash emit-evidence.sh --input gate.json # read from file
|
|
15
|
+
# bash emit-evidence.sh --sign --key cosign.key < gate.json # cosign key-based sign
|
|
16
|
+
# bash emit-evidence.sh --sign --keyless < gate.json # cosign keyless (Fulcio OIDC)
|
|
17
|
+
# bash emit-evidence.sh --sign --rekor-url https://rekor.sigstore.dev < gate.json
|
|
18
|
+
# bash emit-evidence.sh --output bundle/row.json < gate.json
|
|
19
|
+
#
|
|
20
|
+
# Flags:
|
|
21
|
+
# --input PATH Read gate-result JSON from PATH instead of stdin
|
|
22
|
+
# --output PATH Write Statement (DSSE envelope if --sign) to PATH instead of stdout
|
|
23
|
+
# --sign Sign the Statement via cosign. Default: --keyless.
|
|
24
|
+
# --keyless Force cosign keyless signing (OIDC). Default when --sign and no --key.
|
|
25
|
+
# --key PATH Cosign keyref. Use instead of --keyless.
|
|
26
|
+
# --rekor-url URL Push the signed attestation to Rekor at URL. Implies --sign.
|
|
27
|
+
# Default Rekor URL when present without value: https://rekor.sigstore.dev
|
|
28
|
+
# --no-sign Explicitly skip signing (default behavior; documents the choice)
|
|
29
|
+
# --runner-version V Override the runner version string (default: from package.json)
|
|
30
|
+
# --commit-sha SHA Override the commit SHA (default: git rev-parse HEAD)
|
|
31
|
+
# --help, -h Print help
|
|
32
|
+
#
|
|
33
|
+
# Exit codes:
|
|
34
|
+
# 0 — Statement emitted successfully
|
|
35
|
+
# 1 — input JSON malformed or missing required fields
|
|
36
|
+
# 2 — signing requested but cosign not available
|
|
37
|
+
# 3 — Rekor push requested but failed
|
|
38
|
+
#
|
|
39
|
+
# CISO gate (per ISEDC v1 Q1, 2026-05-10): pushing to a public transparency log
|
|
40
|
+
# (Rekor) against the predicate URI https://evals.intentsolutions.io/gate-result/v1
|
|
41
|
+
# is BLOCKED until DNSSEC + CAA records are verified on the namespace. The script
|
|
42
|
+
# does NOT enforce this — that is operator discipline. See bead `iel-4zr` in
|
|
43
|
+
# intent-eval-platform/intent-eval-lab/.beads/.
|
|
44
|
+
|
|
45
|
+
set -euo pipefail
|
|
46
|
+
|
|
47
|
+
INPUT="-"
|
|
48
|
+
OUTPUT=""
|
|
49
|
+
SIGN=0
|
|
50
|
+
KEYLESS=0
|
|
51
|
+
KEYREF=""
|
|
52
|
+
REKOR_URL=""
|
|
53
|
+
RUNNER_VERSION_OVERRIDE=""
|
|
54
|
+
COMMIT_SHA_OVERRIDE=""
|
|
55
|
+
PREDICATE_URI="https://evals.intentsolutions.io/gate-result/v1"
|
|
56
|
+
STATEMENT_TYPE="https://in-toto.io/Statement/v1"
|
|
57
|
+
|
|
58
|
+
while [[ $# -gt 0 ]]; do
|
|
59
|
+
case "$1" in
|
|
60
|
+
--input) INPUT="$2"; shift 2 ;;
|
|
61
|
+
--output) OUTPUT="$2"; shift 2 ;;
|
|
62
|
+
--sign) SIGN=1; shift ;;
|
|
63
|
+
--keyless) SIGN=1; KEYLESS=1; shift ;;
|
|
64
|
+
--key) SIGN=1; KEYREF="$2"; shift 2 ;;
|
|
65
|
+
--rekor-url)
|
|
66
|
+
SIGN=1
|
|
67
|
+
if [[ "${2:-}" =~ ^-- ]] || [[ -z "${2:-}" ]]; then
|
|
68
|
+
REKOR_URL="https://rekor.sigstore.dev"
|
|
69
|
+
shift
|
|
70
|
+
else
|
|
71
|
+
REKOR_URL="$2"
|
|
72
|
+
shift 2
|
|
73
|
+
fi
|
|
74
|
+
;;
|
|
75
|
+
--no-sign) SIGN=0; shift ;;
|
|
76
|
+
--runner-version) RUNNER_VERSION_OVERRIDE="$2"; shift 2 ;;
|
|
77
|
+
--commit-sha) COMMIT_SHA_OVERRIDE="$2"; shift 2 ;;
|
|
78
|
+
--help|-h) sed -n '2,40p' "$0"; exit 0 ;;
|
|
79
|
+
*) echo "emit-evidence: unknown flag $1" >&2; exit 1 ;;
|
|
80
|
+
esac
|
|
81
|
+
done
|
|
82
|
+
|
|
83
|
+
# --- Read input ---
|
|
84
|
+
if [[ "$INPUT" == "-" ]]; then
|
|
85
|
+
GATE_JSON=$(cat)
|
|
86
|
+
else
|
|
87
|
+
if [[ ! -r "$INPUT" ]]; then
|
|
88
|
+
echo "emit-evidence: cannot read $INPUT" >&2
|
|
89
|
+
exit 1
|
|
90
|
+
fi
|
|
91
|
+
GATE_JSON=$(cat "$INPUT")
|
|
92
|
+
fi
|
|
93
|
+
|
|
94
|
+
if [[ -z "$GATE_JSON" ]]; then
|
|
95
|
+
echo "emit-evidence: empty input" >&2
|
|
96
|
+
exit 1
|
|
97
|
+
fi
|
|
98
|
+
|
|
99
|
+
# --- Resolve runner + commit metadata ---
|
|
100
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
101
|
+
PKG_JSON="${SCRIPT_DIR}/../package.json"
|
|
102
|
+
|
|
103
|
+
if [[ -n "$RUNNER_VERSION_OVERRIDE" ]]; then
|
|
104
|
+
RUNNER="$RUNNER_VERSION_OVERRIDE"
|
|
105
|
+
elif [[ -f "$PKG_JSON" ]]; then
|
|
106
|
+
# Pass PKG_JSON via argv so paths with quotes/spaces/specials don't break the python source.
|
|
107
|
+
VER=$(python3 -c "import json, sys; print(json.load(open(sys.argv[1]))['version'])" "$PKG_JSON" 2>/dev/null || echo "unknown")
|
|
108
|
+
RUNNER="audit-harness@${VER}"
|
|
109
|
+
else
|
|
110
|
+
RUNNER="audit-harness@unknown"
|
|
111
|
+
fi
|
|
112
|
+
|
|
113
|
+
if [[ -n "$COMMIT_SHA_OVERRIDE" ]]; then
|
|
114
|
+
COMMIT_SHA="$COMMIT_SHA_OVERRIDE"
|
|
115
|
+
else
|
|
116
|
+
COMMIT_SHA=$(git rev-parse HEAD 2>/dev/null || echo "0000000")
|
|
117
|
+
fi
|
|
118
|
+
|
|
119
|
+
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
120
|
+
|
|
121
|
+
# --- Compose the Statement via python (deterministic JSON shape, escaping handled) ---
|
|
122
|
+
STATEMENT=$(GATE_JSON="$GATE_JSON" PREDICATE_URI="$PREDICATE_URI" STATEMENT_TYPE="$STATEMENT_TYPE" \
|
|
123
|
+
RUNNER="$RUNNER" COMMIT_SHA="$COMMIT_SHA" TIMESTAMP="$TIMESTAMP" \
|
|
124
|
+
python3 - <<'PY'
|
|
125
|
+
import json, os, sys
|
|
126
|
+
|
|
127
|
+
gate = json.loads(os.environ["GATE_JSON"])
|
|
128
|
+
|
|
129
|
+
required = ["gate_id", "result", "input_hash", "policy_hash"]
|
|
130
|
+
missing = [k for k in required if k not in gate]
|
|
131
|
+
if missing:
|
|
132
|
+
sys.stderr.write(f"emit-evidence: gate-result missing required keys: {missing}\n")
|
|
133
|
+
sys.exit(1)
|
|
134
|
+
|
|
135
|
+
# Augment predicate with runner-supplied fields
|
|
136
|
+
predicate = {
|
|
137
|
+
"gate_id": gate["gate_id"],
|
|
138
|
+
"result": gate["result"],
|
|
139
|
+
"policy_hash": gate["policy_hash"],
|
|
140
|
+
"input_hash": gate["input_hash"],
|
|
141
|
+
"timestamp": os.environ["TIMESTAMP"],
|
|
142
|
+
"runner": os.environ["RUNNER"],
|
|
143
|
+
"commit_sha": os.environ["COMMIT_SHA"],
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
# Carry forward optional fields if present
|
|
147
|
+
for opt in ("metadata", "failure_mode", "advisory_severity"):
|
|
148
|
+
if opt in gate:
|
|
149
|
+
predicate[opt] = gate[opt]
|
|
150
|
+
|
|
151
|
+
# Subject naming: subject.name MUST equal predicate.gate_id (SPEC § 6 R8)
|
|
152
|
+
# Subject digest: subject.digest.sha256 MUST equal predicate.input_hash (SPEC § 6 R9)
|
|
153
|
+
input_hash = gate["input_hash"]
|
|
154
|
+
if not input_hash.startswith("sha256:"):
|
|
155
|
+
sys.stderr.write(f"emit-evidence: input_hash must be sha256:-prefixed, got: {input_hash}\n")
|
|
156
|
+
sys.exit(1)
|
|
157
|
+
digest_hex = input_hash[len("sha256:"):]
|
|
158
|
+
|
|
159
|
+
statement = {
|
|
160
|
+
"_type": os.environ["STATEMENT_TYPE"],
|
|
161
|
+
"subject": [{
|
|
162
|
+
"name": gate["gate_id"],
|
|
163
|
+
"digest": {"sha256": digest_hex},
|
|
164
|
+
}],
|
|
165
|
+
"predicateType": os.environ["PREDICATE_URI"],
|
|
166
|
+
"predicate": predicate,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
print(json.dumps(statement))
|
|
170
|
+
PY
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if [[ -z "$STATEMENT" ]]; then
|
|
174
|
+
echo "emit-evidence: failed to compose Statement" >&2
|
|
175
|
+
exit 1
|
|
176
|
+
fi
|
|
177
|
+
|
|
178
|
+
# --- OTel event (best-effort no-op if collector absent) ---
|
|
179
|
+
# Fire agent.rollout.gate.evaluated per intent-eval-lab/000-docs/001-DR-RFC-...md.
|
|
180
|
+
# We emit a single OTLP-shaped JSON line to stderr when AUDIT_HARNESS_OTEL=1
|
|
181
|
+
# OR an OTEL_EXPORTER_OTLP_ENDPOINT is set. Real exporter wiring is consumer-side;
|
|
182
|
+
# we emit a structured signal that any collector can scrape via stderr capture.
|
|
183
|
+
if [[ "${AUDIT_HARNESS_OTEL:-0}" == "1" ]] || [[ -n "${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ]]; then
|
|
184
|
+
GATE_ID=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('gate_id',''))" 2>/dev/null || echo "")
|
|
185
|
+
RESULT=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('result',''))" 2>/dev/null || echo "")
|
|
186
|
+
printf '[OTEL] {"name":"agent.rollout.gate.evaluated","attributes":{"gate.id":"%s","gate.result":"%s","gate.runner":"%s","gate.commit_sha":"%s"},"timestamp":"%s"}\n' \
|
|
187
|
+
"$GATE_ID" "$RESULT" "$RUNNER" "$COMMIT_SHA" "$TIMESTAMP" >&2
|
|
188
|
+
fi
|
|
189
|
+
|
|
190
|
+
# --- Sign + emit ---
|
|
191
|
+
emit() {
|
|
192
|
+
local content="$1"
|
|
193
|
+
if [[ -n "$OUTPUT" ]]; then
|
|
194
|
+
mkdir -p "$(dirname "$OUTPUT")"
|
|
195
|
+
printf '%s\n' "$content" > "$OUTPUT"
|
|
196
|
+
echo "emit-evidence: wrote $OUTPUT" >&2
|
|
197
|
+
else
|
|
198
|
+
printf '%s\n' "$content"
|
|
199
|
+
fi
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if [[ "$SIGN" -eq 0 ]]; then
|
|
203
|
+
emit "$STATEMENT"
|
|
204
|
+
exit 0
|
|
205
|
+
fi
|
|
206
|
+
|
|
207
|
+
# Signing requires cosign. We use `cosign attest-blob` if available (canonical
|
|
208
|
+
# in-toto signing), falling back to `cosign sign-blob` with the Statement as the
|
|
209
|
+
# blob (less canonical but functional for verification round-trip).
|
|
210
|
+
if ! command -v cosign >/dev/null 2>&1; then
|
|
211
|
+
echo "emit-evidence: --sign requested but cosign is not installed (https://docs.sigstore.dev/cosign/installation/)" >&2
|
|
212
|
+
exit 2
|
|
213
|
+
fi
|
|
214
|
+
|
|
215
|
+
# Stage the Statement to a temp file for cosign to consume
|
|
216
|
+
TMP=$(mktemp -d)
|
|
217
|
+
trap 'rm -rf "$TMP"' EXIT
|
|
218
|
+
STATEMENT_FILE="$TMP/statement.json"
|
|
219
|
+
printf '%s\n' "$STATEMENT" > "$STATEMENT_FILE"
|
|
220
|
+
ENVELOPE_FILE="$TMP/envelope.dsse.json"
|
|
221
|
+
|
|
222
|
+
COSIGN_ARGS=("attest-blob" "--predicate" "$STATEMENT_FILE" "--type" "$PREDICATE_URI")
|
|
223
|
+
if [[ -n "$KEYREF" ]]; then
|
|
224
|
+
COSIGN_ARGS+=("--key" "$KEYREF")
|
|
225
|
+
elif [[ "$KEYLESS" -eq 1 ]] || [[ -z "$KEYREF" ]]; then
|
|
226
|
+
COSIGN_ARGS+=("--yes") # accept Fulcio OIDC keyless
|
|
227
|
+
fi
|
|
228
|
+
if [[ -n "$REKOR_URL" ]]; then
|
|
229
|
+
COSIGN_ARGS+=("--rekor-url" "$REKOR_URL")
|
|
230
|
+
COSIGN_ARGS+=("--tlog-upload=true")
|
|
231
|
+
else
|
|
232
|
+
COSIGN_ARGS+=("--tlog-upload=false")
|
|
233
|
+
fi
|
|
234
|
+
COSIGN_ARGS+=("--output-signature" "$ENVELOPE_FILE")
|
|
235
|
+
# `cosign attest-blob` needs a "blob" — the input the predicate attests to.
|
|
236
|
+
# Per SPEC subject naming, that's the input_hash; we use a virtual artifact name.
|
|
237
|
+
ARTIFACT_NAME="$(echo "$STATEMENT" | python3 -c "import json,sys; print(json.load(sys.stdin)['subject'][0]['name'])")"
|
|
238
|
+
|
|
239
|
+
# Write a placeholder blob whose sha256 == the declared input_hash. This makes
|
|
240
|
+
# the DSSE envelope's subject coherent with the predicate.
|
|
241
|
+
# (Cosign re-hashes the blob; we trust the gate's input_hash to be the canonical
|
|
242
|
+
# subject. For v0.x we accept this round-trip-by-construction.)
|
|
243
|
+
BLOB_FILE="$TMP/$ARTIFACT_NAME.blob"
|
|
244
|
+
# A real subject artifact would be the file the gate evaluated; for the envelope
|
|
245
|
+
# we use the in-band predicate as the blob. Verification only needs the DSSE
|
|
246
|
+
# wrap + the predicate, not the original artifact bytes.
|
|
247
|
+
cp "$STATEMENT_FILE" "$BLOB_FILE"
|
|
248
|
+
|
|
249
|
+
if ! cosign "${COSIGN_ARGS[@]}" "$BLOB_FILE" >&2; then
|
|
250
|
+
echo "emit-evidence: cosign signing failed" >&2
|
|
251
|
+
exit 3
|
|
252
|
+
fi
|
|
253
|
+
|
|
254
|
+
emit "$(cat "$ENVELOPE_FILE")"
|
|
255
|
+
echo "emit-evidence: signed envelope emitted${REKOR_URL:+ (Rekor: $REKOR_URL)}" >&2
|
|
256
|
+
exit 0
|