@intentsolutions/audit-harness 0.1.0 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +435 -0
- package/LICENSE +202 -21
- package/NOTICE +15 -0
- package/README.md +36 -4
- package/bin/audit-harness.js +108 -8
- package/docs/gate-promotion.md +45 -0
- package/package.json +13 -9
- package/schemas/audit-profile/layer-applicability.md +146 -0
- package/schemas/audit-profile/registry.v1.json +87 -0
- package/schemas/audit-profile/v1.schema.json +294 -0
- package/schemas/conform/v1/agent-frontmatter.schema.json +24 -0
- package/schemas/conform/v1/mcp-config.schema.json +31 -0
- package/schemas/conform/v1/plugin-manifest.schema.json +26 -0
- package/schemas/conform/v1/skillmd-frontmatter.schema.json +40 -0
- package/schemas/currency/pins.v1.json +55 -0
- package/scripts/arch-check.sh +25 -1
- package/scripts/audit.py +386 -0
- package/scripts/bias-count.sh +50 -4
- package/scripts/classify.py +403 -0
- package/scripts/conform.py +481 -0
- package/scripts/crap-score.py +65 -5
- package/scripts/currency.py +118 -0
- package/scripts/emit-evidence.sh +256 -0
- package/scripts/escape-scan.sh +58 -4
- package/scripts/fp-rate.py +145 -0
- package/scripts/gen-layer-applicability.py +157 -0
- package/scripts/gherkin-lint.sh +53 -9
- package/scripts/harness-hash.sh +78 -5
- package/scripts/scan.py +228 -0
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
audit-harness gen-layer-applicability — project the canonical registry datum into
|
|
4
|
+
the human-readable layer-applicability matrix.
|
|
5
|
+
|
|
6
|
+
`schemas/audit-profile/registry.v1.json` is THE single source of truth for "which
|
|
7
|
+
gates apply to repo-type X, in which dimension, at what applicability". This
|
|
8
|
+
generator renders `schemas/audit-profile/layer-applicability.md` as a PROJECTION
|
|
9
|
+
of that datum so the doc can never silently drift from the registry the classifier
|
|
10
|
+
actually resolves against (PP-PLAN-040 Phase 0, bead c2b).
|
|
11
|
+
|
|
12
|
+
Modes:
|
|
13
|
+
(default) print the rendered markdown to stdout
|
|
14
|
+
--write write it to schemas/audit-profile/layer-applicability.md
|
|
15
|
+
--check regenerate in-memory and diff against the committed file;
|
|
16
|
+
exit 1 on drift (the CI `layer-applicability-drift` gate)
|
|
17
|
+
|
|
18
|
+
Stdlib only. Read-only except in --write mode (which only writes the one doc).
|
|
19
|
+
"""
|
|
20
|
+
import argparse
|
|
21
|
+
import difflib
|
|
22
|
+
import hashlib
|
|
23
|
+
import json
|
|
24
|
+
import os
|
|
25
|
+
import sys
|
|
26
|
+
|
|
27
|
+
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
28
|
+
REGISTRY = os.path.join(HERE, "..", "schemas", "audit-profile", "registry.v1.json")
|
|
29
|
+
DOC = os.path.join(HERE, "..", "schemas", "audit-profile", "layer-applicability.md")
|
|
30
|
+
|
|
31
|
+
GLYPH = {"required": "✅", "recommended": "⭕", "conditional": "⚠", "waived": "❌"}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def sha256_file(path):
|
|
35
|
+
h = hashlib.sha256()
|
|
36
|
+
with open(path, "rb") as f:
|
|
37
|
+
for chunk in iter(lambda: f.read(65536), b""):
|
|
38
|
+
h.update(chunk)
|
|
39
|
+
return "sha256:" + h.hexdigest()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def row(gate):
|
|
43
|
+
app = gate.get("applicability", "")
|
|
44
|
+
return "| `{gid}` | {dim} | {glyph} {app} | {enf} | {tool} |".format(
|
|
45
|
+
gid=gate["gate_id"],
|
|
46
|
+
dim=gate.get("dimension", ""),
|
|
47
|
+
glyph=GLYPH.get(app, ""),
|
|
48
|
+
app=app,
|
|
49
|
+
enf=gate.get("enforcement", "advisory"),
|
|
50
|
+
tool=("`" + gate["tool"] + "`") if gate.get("tool") else "—",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def table(gates):
|
|
55
|
+
out = ["| Gate | Dimension | Applicability | Enforcement | Tool |",
|
|
56
|
+
"|---|---|---|---|---|"]
|
|
57
|
+
out += [row(g) for g in sorted(gates, key=lambda g: (g.get("dimension", ""), g["gate_id"]))]
|
|
58
|
+
return "\n".join(out)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def render(registry, registry_hash):
|
|
62
|
+
lines = []
|
|
63
|
+
a = lines.append
|
|
64
|
+
a("# Layer Applicability — GENERATED from `registry.v1.json`")
|
|
65
|
+
a("")
|
|
66
|
+
a("> ⚠️ **GENERATED FILE — do not edit by hand.**")
|
|
67
|
+
a("> Source of truth: [`registry.v1.json`](registry.v1.json) "
|
|
68
|
+
"(the canonical dimension→gate datum; `classify` resolves against it).")
|
|
69
|
+
a("> Regenerate: `audit-harness gen-layer-applicability --write` "
|
|
70
|
+
"(or `python3 scripts/gen-layer-applicability.py --write`).")
|
|
71
|
+
a("> CI gate `layer-applicability-drift` fails the build if this file drifts from the registry.")
|
|
72
|
+
a(">")
|
|
73
|
+
a(f"> registry `{registry_hash}`")
|
|
74
|
+
a("")
|
|
75
|
+
a(registry.get("description", "").strip())
|
|
76
|
+
a("")
|
|
77
|
+
a("**Legend (applicability):** "
|
|
78
|
+
+ " · ".join(f"{GLYPH[k]} {k}" for k in ("required", "recommended", "conditional", "waived")))
|
|
79
|
+
a("")
|
|
80
|
+
a("Every gate defaults to `enforcement: advisory`. Blocking is **earned** — "
|
|
81
|
+
"engineer-pinned in the target repo's `tests/TESTING.md`, FP-rate-gated "
|
|
82
|
+
"(see [`gate-promotion.md`](../../docs/gate-promotion.md)).")
|
|
83
|
+
a("")
|
|
84
|
+
a("## Base gates (apply to every repo)")
|
|
85
|
+
a("")
|
|
86
|
+
a(table(registry.get("base", [])))
|
|
87
|
+
a("")
|
|
88
|
+
a("## By classification")
|
|
89
|
+
a("")
|
|
90
|
+
a("A repo carries the **UNION** of every classification it matches "
|
|
91
|
+
"(`classify` never picks a single winner). Gates dedup by `gate_id`, "
|
|
92
|
+
"keeping the highest applicability.")
|
|
93
|
+
a("")
|
|
94
|
+
for kind in sorted(registry.get("classifications", {})):
|
|
95
|
+
a(f"### `{kind}`")
|
|
96
|
+
a("")
|
|
97
|
+
a(table(registry["classifications"][kind]))
|
|
98
|
+
a("")
|
|
99
|
+
overlays = registry.get("overlays", {})
|
|
100
|
+
if overlays:
|
|
101
|
+
a("## Overlays")
|
|
102
|
+
a("")
|
|
103
|
+
for name in sorted(overlays):
|
|
104
|
+
ov = overlays[name]
|
|
105
|
+
a(f"### `{name}`")
|
|
106
|
+
a("")
|
|
107
|
+
a(ov.get("description", "").strip())
|
|
108
|
+
promote = ov.get("promote_to_required", [])
|
|
109
|
+
if promote:
|
|
110
|
+
a("")
|
|
111
|
+
a("Promotes to **required**: " + ", ".join(f"`{d}`" for d in promote) + ".")
|
|
112
|
+
a("")
|
|
113
|
+
return "\n".join(lines).rstrip() + "\n"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def main():
|
|
117
|
+
ap = argparse.ArgumentParser(description="Project registry.v1.json -> layer-applicability.md")
|
|
118
|
+
ap.add_argument("--write", action="store_true", help="write the doc to its canonical path")
|
|
119
|
+
ap.add_argument("--check", action="store_true", help="fail (exit 1) if the committed doc drifts")
|
|
120
|
+
ap.add_argument("--registry", default=REGISTRY)
|
|
121
|
+
ap.add_argument("--out", default=DOC)
|
|
122
|
+
args = ap.parse_args()
|
|
123
|
+
|
|
124
|
+
registry_path = os.path.abspath(args.registry)
|
|
125
|
+
with open(registry_path, "r", encoding="utf-8") as f:
|
|
126
|
+
registry = json.load(f)
|
|
127
|
+
rendered = render(registry, sha256_file(registry_path))
|
|
128
|
+
|
|
129
|
+
if args.check:
|
|
130
|
+
try:
|
|
131
|
+
with open(args.out, "r", encoding="utf-8") as f:
|
|
132
|
+
current = f.read()
|
|
133
|
+
except FileNotFoundError:
|
|
134
|
+
print(f"gen-layer-applicability: {args.out} missing — run --write", file=sys.stderr)
|
|
135
|
+
sys.exit(1)
|
|
136
|
+
if current != rendered:
|
|
137
|
+
diff = difflib.unified_diff(
|
|
138
|
+
current.splitlines(True), rendered.splitlines(True),
|
|
139
|
+
fromfile="committed", tofile="generated",
|
|
140
|
+
)
|
|
141
|
+
sys.stderr.write("".join(diff))
|
|
142
|
+
sys.stderr.write("\ngen-layer-applicability: DRIFT — regenerate with --write\n")
|
|
143
|
+
sys.exit(1)
|
|
144
|
+
print("gen-layer-applicability: layer-applicability.md matches the registry datum")
|
|
145
|
+
sys.exit(0)
|
|
146
|
+
|
|
147
|
+
if args.write:
|
|
148
|
+
with open(args.out, "w", encoding="utf-8") as f:
|
|
149
|
+
f.write(rendered)
|
|
150
|
+
print(f"gen-layer-applicability: wrote {os.path.relpath(args.out, os.path.join(HERE, '..'))}")
|
|
151
|
+
sys.exit(0)
|
|
152
|
+
|
|
153
|
+
sys.stdout.write(rendered)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
if __name__ == "__main__":
|
|
157
|
+
main()
|
package/scripts/gherkin-lint.sh
CHANGED
|
@@ -15,11 +15,13 @@ set -euo pipefail
|
|
|
15
15
|
|
|
16
16
|
PATH_ARG="features/"
|
|
17
17
|
STRICT=0
|
|
18
|
+
JSON_OUT=0
|
|
18
19
|
|
|
19
20
|
while [[ $# -gt 0 ]]; do
|
|
20
21
|
case "$1" in
|
|
21
22
|
--path) PATH_ARG="$2"; shift 2 ;;
|
|
22
23
|
--strict) STRICT=1; shift ;;
|
|
24
|
+
--json) JSON_OUT=1; shift ;;
|
|
23
25
|
--help|-h)
|
|
24
26
|
sed -n '2,15p' "$0"; exit 0 ;;
|
|
25
27
|
*) echo "gherkin-lint: unknown flag $1" >&2; exit 2 ;;
|
|
@@ -27,15 +29,40 @@ while [[ $# -gt 0 ]]; do
|
|
|
27
29
|
done
|
|
28
30
|
|
|
29
31
|
if [[ ! -d "$PATH_ARG" ]]; then
|
|
32
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
33
|
+
printf '{"gate_id":"audit-harness:%s:gherkin-lint","result":"NOT_APPLICABLE","input_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","policy_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","metadata":{"reason":"path not found","path":"%s"}}\n' \
|
|
34
|
+
"${AUDIT_HARNESS_SIDE:-ci}" "$PATH_ARG"
|
|
35
|
+
fi
|
|
30
36
|
echo "gherkin-lint: path not found: $PATH_ARG" >&2
|
|
31
37
|
exit 2
|
|
32
38
|
fi
|
|
33
39
|
|
|
40
|
+
INPUT_HASH=$(find "$PATH_ARG" -name "*.feature" -type f -exec sha256sum {} \; 2>/dev/null | sort | sha256sum | awk '{print "sha256:"$1}')
|
|
41
|
+
|
|
42
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
43
|
+
exec 3>&1
|
|
44
|
+
exec 1>&2
|
|
45
|
+
fi
|
|
46
|
+
|
|
34
47
|
WARN_COUNT=0
|
|
35
48
|
ERROR_COUNT=0
|
|
36
49
|
|
|
37
50
|
warn() { echo "WARN $1:$2 $3"; WARN_COUNT=$((WARN_COUNT + 1)); }
|
|
38
|
-
|
|
51
|
+
|
|
52
|
+
# process_awk_output — funnel awk-printed WARN/ERROR lines through the bash
|
|
53
|
+
# counters so the summary + exit code reflect awk-fallback findings (the
|
|
54
|
+
# subprocesses below can't otherwise touch the parent-shell counters).
|
|
55
|
+
# Single-pass awk counts both at once; no-match handled cleanly under
|
|
56
|
+
# set -euo pipefail via the `+0` numeric coercions.
|
|
57
|
+
process_awk_output() {
|
|
58
|
+
local out="$1"
|
|
59
|
+
[ -z "$out" ] && return 0
|
|
60
|
+
local w=0 e=0
|
|
61
|
+
read -r w e < <(awk '/^WARN /{w++} /^ERROR /{e++} END {print w+0, e+0}' <<< "$out")
|
|
62
|
+
WARN_COUNT=$((WARN_COUNT + w))
|
|
63
|
+
ERROR_COUNT=$((ERROR_COUNT + e))
|
|
64
|
+
printf '%s\n' "$out"
|
|
65
|
+
}
|
|
39
66
|
|
|
40
67
|
# 1. Prefer official gherkin-lint if available
|
|
41
68
|
if command -v gherkin-lint >/dev/null 2>&1; then
|
|
@@ -48,7 +75,7 @@ else
|
|
|
48
75
|
|
|
49
76
|
while IFS= read -r -d '' feature; do
|
|
50
77
|
# Imperative verbs / CSS selectors in steps (declarative warning)
|
|
51
|
-
awk -v file="$feature" '
|
|
78
|
+
process_awk_output "$(awk -v file="$feature" '
|
|
52
79
|
/^[[:space:]]*(Given|When|Then|And|But)/ {
|
|
53
80
|
line = $0
|
|
54
81
|
if (line ~ /click|type|fill[ _]in|press|select.*from[ _]dropdown/) {
|
|
@@ -58,10 +85,10 @@ else
|
|
|
58
85
|
printf "WARN %s:%d CSS selector / xpath in step (prefer business language)\n", file, NR
|
|
59
86
|
}
|
|
60
87
|
}
|
|
61
|
-
' "$feature"
|
|
88
|
+
' "$feature")"
|
|
62
89
|
|
|
63
90
|
# Scenario length (> 10 steps)
|
|
64
|
-
awk -v file="$feature" '
|
|
91
|
+
process_awk_output "$(awk -v file="$feature" '
|
|
65
92
|
/^[[:space:]]*Scenario/ { sc = NR; steps = 0; sn = $0; next }
|
|
66
93
|
/^[[:space:]]*(Given|When|Then|And|But)/ { if (sc) steps++ }
|
|
67
94
|
/^[[:space:]]*Scenario|^[[:space:]]*Feature|^$/ {
|
|
@@ -75,7 +102,7 @@ else
|
|
|
75
102
|
printf "WARN %s:%d scenario has %d steps (>10 is too long)\n", file, sc, steps
|
|
76
103
|
}
|
|
77
104
|
}
|
|
78
|
-
' "$feature"
|
|
105
|
+
' "$feature")"
|
|
79
106
|
|
|
80
107
|
# Repeated Givens without Background (3+ identical Given lines)
|
|
81
108
|
dupe=$(awk '/^[[:space:]]*Given/ { print }' "$feature" | sort | uniq -c | awk '$1 >= 3 { print }')
|
|
@@ -84,9 +111,7 @@ else
|
|
|
84
111
|
fi
|
|
85
112
|
|
|
86
113
|
# "And" at scenario start (grammar error)
|
|
87
|
-
awk -v file="$feature" '
|
|
88
|
-
prev_blank = 1
|
|
89
|
-
/^[[:space:]]*$/ { prev_blank = 1; next }
|
|
114
|
+
process_awk_output "$(awk -v file="$feature" '
|
|
90
115
|
/^[[:space:]]*Scenario/ { in_scenario = 1; step_count = 0; next }
|
|
91
116
|
/^[[:space:]]*(Given|When|Then|And|But)/ {
|
|
92
117
|
if (in_scenario && step_count == 0 && /^[[:space:]]*And/) {
|
|
@@ -94,7 +119,7 @@ else
|
|
|
94
119
|
}
|
|
95
120
|
step_count++
|
|
96
121
|
}
|
|
97
|
-
' "$feature"
|
|
122
|
+
' "$feature")"
|
|
98
123
|
|
|
99
124
|
done < <(find "$PATH_ARG" -name "*.feature" -print0)
|
|
100
125
|
fi
|
|
@@ -102,6 +127,25 @@ fi
|
|
|
102
127
|
echo ""
|
|
103
128
|
echo "gherkin-lint summary: $WARN_COUNT warning(s), $ERROR_COUNT error(s)"
|
|
104
129
|
|
|
130
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
131
|
+
exec 1>&3 3>&-
|
|
132
|
+
result="PASS"
|
|
133
|
+
sev_block=""
|
|
134
|
+
if [[ "$ERROR_COUNT" -gt 0 ]]; then
|
|
135
|
+
result="FAIL"
|
|
136
|
+
elif [[ "$WARN_COUNT" -gt 0 ]]; then
|
|
137
|
+
if [[ "$STRICT" -eq 1 ]]; then
|
|
138
|
+
result="FAIL"
|
|
139
|
+
else
|
|
140
|
+
result="ADVISORY"
|
|
141
|
+
sev_block=',"advisory_severity":"warn"'
|
|
142
|
+
fi
|
|
143
|
+
fi
|
|
144
|
+
printf '{"gate_id":"audit-harness:%s:gherkin-lint","result":"%s"%s,"input_hash":"%s","policy_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","metadata":{"warnings":%d,"errors":%d,"strict":%s,"path":"%s"}}\n' \
|
|
145
|
+
"${AUDIT_HARNESS_SIDE:-ci}" "$result" "$sev_block" "$INPUT_HASH" "$WARN_COUNT" "$ERROR_COUNT" \
|
|
146
|
+
"$([[ "$STRICT" -eq 1 ]] && echo true || echo false)" "$PATH_ARG"
|
|
147
|
+
fi
|
|
148
|
+
|
|
105
149
|
if [[ "$ERROR_COUNT" -gt 0 ]]; then
|
|
106
150
|
exit 1
|
|
107
151
|
fi
|
package/scripts/harness-hash.sh
CHANGED
|
@@ -6,19 +6,48 @@
|
|
|
6
6
|
# causes escape-scan.sh to REFUSE the AI diff.
|
|
7
7
|
#
|
|
8
8
|
# Usage:
|
|
9
|
-
# bash harness-hash.sh --init
|
|
10
|
-
# bash harness-hash.sh --verify
|
|
11
|
-
# bash harness-hash.sh --
|
|
9
|
+
# bash harness-hash.sh --init # write manifest (engineer-initiated)
|
|
10
|
+
# bash harness-hash.sh --verify # compare current hashes to manifest
|
|
11
|
+
# bash harness-hash.sh --verify --json # machine-readable JSON to stdout (verify only)
|
|
12
|
+
# bash harness-hash.sh --list # show which files are pinned
|
|
12
13
|
#
|
|
13
14
|
# Exit codes:
|
|
14
15
|
# 0 — OK (pin matches, or init succeeded)
|
|
15
16
|
# 2 — HARNESS_TAMPERED (hash mismatch)
|
|
16
17
|
# 3 — no manifest found (--verify without --init)
|
|
18
|
+
#
|
|
19
|
+
# JSON mode:
|
|
20
|
+
# stdout = single JSON object suitable for piping to `audit-harness emit-evidence`
|
|
21
|
+
# stderr = unchanged human-readable summary (preserves backward-compat)
|
|
22
|
+
# exit codes unchanged
|
|
17
23
|
|
|
18
24
|
set -euo pipefail
|
|
19
25
|
|
|
26
|
+
# Cross-platform SHA-256: `sha256sum` ships with GNU coreutils (Linux);
|
|
27
|
+
# macOS only has `shasum -a 256`. Both produce identical `<hash> <file>`
|
|
28
|
+
# output, so downstream awk parsing is unchanged.
|
|
29
|
+
if command -v sha256sum >/dev/null 2>&1; then
|
|
30
|
+
SHA256_CMD=(sha256sum)
|
|
31
|
+
elif command -v shasum >/dev/null 2>&1; then
|
|
32
|
+
SHA256_CMD=(shasum -a 256)
|
|
33
|
+
else
|
|
34
|
+
echo "harness-hash: neither sha256sum nor shasum found in PATH" >&2
|
|
35
|
+
exit 2
|
|
36
|
+
fi
|
|
37
|
+
|
|
20
38
|
ROOT="${ROOT:-$(pwd)}"
|
|
21
39
|
MANIFEST="${ROOT}/.harness-hash"
|
|
40
|
+
JSON_OUT=0
|
|
41
|
+
|
|
42
|
+
# Peel --json from anywhere in args (additive, doesn't disturb existing arg shape)
|
|
43
|
+
_filtered_args=()
|
|
44
|
+
for arg in "$@"; do
|
|
45
|
+
case "$arg" in
|
|
46
|
+
--json) JSON_OUT=1 ;;
|
|
47
|
+
*) _filtered_args+=("$arg") ;;
|
|
48
|
+
esac
|
|
49
|
+
done
|
|
50
|
+
set -- "${_filtered_args[@]+"${_filtered_args[@]}"}"
|
|
22
51
|
|
|
23
52
|
PATTERNS=(
|
|
24
53
|
# Wall 1: acceptance
|
|
@@ -42,6 +71,27 @@ PATTERNS=(
|
|
|
42
71
|
"stryker.config.js"
|
|
43
72
|
)
|
|
44
73
|
|
|
74
|
+
# Optional per-repo extra patterns appended from .harness-hash-extra-patterns
|
|
75
|
+
# at the repo root. Used by repos whose policy files don't match the default
|
|
76
|
+
# canonical patterns above — e.g., the audit-harness repo itself pins its own
|
|
77
|
+
# scripts (scripts/*.sh + scripts/*.py + bin/audit-harness.js), which are the
|
|
78
|
+
# policy enforcement surface but aren't covered by the consumer-facing
|
|
79
|
+
# defaults. Lines beginning with `#` are comments; blank lines are ignored.
|
|
80
|
+
# This mechanism is additive — repos without the file get exactly the
|
|
81
|
+
# default behavior, so consumer repos are not affected.
|
|
82
|
+
EXTRA_PATTERNS_FILE="${ROOT}/.harness-hash-extra-patterns"
|
|
83
|
+
if [[ -f "${EXTRA_PATTERNS_FILE}" ]]; then
|
|
84
|
+
while IFS= read -r line || [[ -n "${line}" ]]; do
|
|
85
|
+
# strip inline comments
|
|
86
|
+
line="${line%%#*}"
|
|
87
|
+
# trim leading + trailing whitespace
|
|
88
|
+
line="${line#"${line%%[![:space:]]*}"}"
|
|
89
|
+
line="${line%"${line##*[![:space:]]}"}"
|
|
90
|
+
[[ -z "${line}" ]] && continue
|
|
91
|
+
PATTERNS+=("${line}")
|
|
92
|
+
done < "${EXTRA_PATTERNS_FILE}"
|
|
93
|
+
fi
|
|
94
|
+
|
|
45
95
|
collect_files() {
|
|
46
96
|
local out=()
|
|
47
97
|
shopt -s nullglob globstar
|
|
@@ -61,7 +111,7 @@ hash_files() {
|
|
|
61
111
|
return 0
|
|
62
112
|
fi
|
|
63
113
|
while IFS= read -r f; do
|
|
64
|
-
printf '%s %s\n' "$(
|
|
114
|
+
printf '%s %s\n' "$("${SHA256_CMD[@]}" "$f" | awk '{print $1}')" "$f"
|
|
65
115
|
done <<< "$files"
|
|
66
116
|
}
|
|
67
117
|
|
|
@@ -76,6 +126,10 @@ cmd_init() {
|
|
|
76
126
|
cmd_verify() {
|
|
77
127
|
cd "$ROOT"
|
|
78
128
|
if [[ ! -f "$MANIFEST" ]]; then
|
|
129
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
130
|
+
printf '{"gate_id":"audit-harness:%s:harness-hash","result":"NOT_APPLICABLE","input_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","policy_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","metadata":{"reason":"no manifest at %s (run --init)"}}\n' \
|
|
131
|
+
"${AUDIT_HARNESS_SIDE:-ci}" "$MANIFEST"
|
|
132
|
+
fi
|
|
79
133
|
echo "harness-hash: no manifest at $MANIFEST (run --init)" >&2
|
|
80
134
|
exit 3
|
|
81
135
|
fi
|
|
@@ -84,13 +138,32 @@ cmd_verify() {
|
|
|
84
138
|
local expected
|
|
85
139
|
expected=$(cat "$MANIFEST")
|
|
86
140
|
|
|
141
|
+
local manifest_hash
|
|
142
|
+
manifest_hash=$("${SHA256_CMD[@]}" "$MANIFEST" | awk '{print "sha256:"$1}')
|
|
143
|
+
|
|
144
|
+
local pinned_count
|
|
145
|
+
pinned_count=$(echo "$expected" | grep -c '^' || true)
|
|
146
|
+
|
|
87
147
|
# Compare sorted manifests so order doesn't matter
|
|
88
148
|
local diff_out
|
|
89
149
|
diff_out=$(diff <(echo "$expected" | sort) <(echo "$current" | sort) || true)
|
|
90
150
|
if [[ -z "$diff_out" ]]; then
|
|
91
|
-
|
|
151
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
152
|
+
printf '{"gate_id":"audit-harness:%s:harness-hash","result":"PASS","input_hash":"%s","policy_hash":"%s","metadata":{"pinned_count":%d}}\n' \
|
|
153
|
+
"${AUDIT_HARNESS_SIDE:-ci}" "$manifest_hash" "$manifest_hash" "$pinned_count"
|
|
154
|
+
echo "harness-hash: OK" >&2
|
|
155
|
+
else
|
|
156
|
+
echo "harness-hash: OK"
|
|
157
|
+
fi
|
|
92
158
|
exit 0
|
|
93
159
|
fi
|
|
160
|
+
if [[ "$JSON_OUT" -eq 1 ]]; then
|
|
161
|
+
# diff output may contain quotes/newlines; encode as a single-line escaped string
|
|
162
|
+
local diff_escaped
|
|
163
|
+
diff_escaped=$(printf '%s' "$diff_out" | python3 -c 'import sys, json; print(json.dumps(sys.stdin.read()))')
|
|
164
|
+
printf '{"gate_id":"audit-harness:%s:harness-hash","result":"FAIL","failure_mode":"HARNESS_TAMPERED","input_hash":"%s","policy_hash":"%s","metadata":{"pinned_count":%d,"diff":%s}}\n' \
|
|
165
|
+
"${AUDIT_HARNESS_SIDE:-ci}" "$manifest_hash" "$manifest_hash" "$pinned_count" "$diff_escaped"
|
|
166
|
+
fi
|
|
94
167
|
echo "HARNESS_TAMPERED: pinned artifact changed" >&2
|
|
95
168
|
echo "$diff_out" >&2
|
|
96
169
|
exit 2
|
package/scripts/scan.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
audit-harness scan — read-only security / hygiene / skill-quality gate-runner
|
|
4
|
+
(PP-PLAN-040 Phase 4 / E6).
|
|
5
|
+
|
|
6
|
+
For every `dimension: security | hygiene | skill-quality` gate in a repo's
|
|
7
|
+
audit-profile/v1, scan runs the right external tool with the repo present and wraps
|
|
8
|
+
its exit code into a `gate-result/v1` row (JSON array, stdout). Advisory-first; a
|
|
9
|
+
missing tool degrades to ADVISORY indeterminate (never a false FAIL). It NEVER
|
|
10
|
+
fixes anything and NEVER reimplements a scanner.
|
|
11
|
+
|
|
12
|
+
Strategies:
|
|
13
|
+
- local hygiene-readme: deterministic README presence check (no tool).
|
|
14
|
+
- shell-out every gate carrying a `tool` (gitleaks, osv-scanner, semgrep, syft,
|
|
15
|
+
markdownlint, lychee, ...): run it if on PATH; clean exit -> PASS;
|
|
16
|
+
findings -> ADVISORY(error) (or FAIL under --strict / blocking);
|
|
17
|
+
tool absent -> ADVISORY indeterminate.
|
|
18
|
+
- consume skill-quality skill-behavioral (tool j-rig): CONSUME a j-rig
|
|
19
|
+
Evidence Bundle verdict row (--jrig-verdict PATH or a default
|
|
20
|
+
location). The harness does NOT run behavioral judgment itself —
|
|
21
|
+
it ingests j-rig's verdict. No verdict -> ADVISORY indeterminate.
|
|
22
|
+
|
|
23
|
+
Stdlib only. No network beyond whatever the shelled-out tool does (and the only
|
|
24
|
+
network-touching gates fail open to indeterminate). No filesystem mutation.
|
|
25
|
+
"""
|
|
26
|
+
import argparse
|
|
27
|
+
import hashlib
|
|
28
|
+
import json
|
|
29
|
+
import os
|
|
30
|
+
import shutil
|
|
31
|
+
import subprocess
|
|
32
|
+
import sys
|
|
33
|
+
from datetime import datetime, timezone
|
|
34
|
+
|
|
35
|
+
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
36
|
+
if HERE not in sys.path:
|
|
37
|
+
sys.path.insert(0, HERE)
|
|
38
|
+
import classify as C # noqa: E402
|
|
39
|
+
|
|
40
|
+
EMPTY_SHA = "sha256:" + hashlib.sha256(b"").hexdigest()
|
|
41
|
+
SCAN_DIMENSIONS = {"security", "hygiene", "skill-quality"}
|
|
42
|
+
|
|
43
|
+
# tool -> argv (run with cwd=repo). "generation" tools (syft) are PASS on exit 0,
|
|
44
|
+
# INDETERMINATE on failure (they produce an artifact, they don't pass/fail policy).
|
|
45
|
+
TOOL_CMD = {
|
|
46
|
+
"gitleaks": (["gitleaks", "detect", "--no-banner"], "scan"),
|
|
47
|
+
"osv-scanner": (["osv-scanner", "-r", "."], "scan"),
|
|
48
|
+
"semgrep": (["semgrep", "scan", "--error", "--quiet"], "scan"),
|
|
49
|
+
"syft": (["syft", "."], "generation"),
|
|
50
|
+
"markdownlint": (["markdownlint", "."], "scan"),
|
|
51
|
+
"lychee": (["lychee", "--offline", "--no-progress", "."], "scan"),
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def sha256_str(s):
|
|
56
|
+
return "sha256:" + hashlib.sha256(s.encode("utf-8")).hexdigest()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def make_row(gate_id, result, *, policy_hash, input_hash, commit_sha, runner,
|
|
60
|
+
metadata=None, failure_mode=None, advisory_severity=None):
|
|
61
|
+
row = {
|
|
62
|
+
"gate_id": gate_id, "result": result, "policy_hash": policy_hash,
|
|
63
|
+
"input_hash": input_hash,
|
|
64
|
+
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
65
|
+
"runner": runner, "commit_sha": commit_sha,
|
|
66
|
+
}
|
|
67
|
+
if metadata:
|
|
68
|
+
row["metadata"] = metadata
|
|
69
|
+
if failure_mode is not None:
|
|
70
|
+
row["failure_mode"] = failure_mode
|
|
71
|
+
if advisory_severity is not None:
|
|
72
|
+
row["advisory_severity"] = advisory_severity
|
|
73
|
+
return row
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def gate_suffix(gate_id):
|
|
77
|
+
return gate_id.rsplit(":", 1)[-1]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def indeterminate(gate, commit_sha, runner, reason, policy):
|
|
81
|
+
return make_row(gate["gate_id"], "ADVISORY", policy_hash=sha256_str(policy),
|
|
82
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
83
|
+
advisory_severity="warn",
|
|
84
|
+
metadata={"indeterminate": True, "reason": reason})
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def run_readme(repo, gate, commit_sha, runner, strict):
|
|
88
|
+
enforcement = gate.get("enforcement", "advisory")
|
|
89
|
+
present = any(os.path.isfile(os.path.join(repo, n))
|
|
90
|
+
for n in ("README.md", "README.rst", "README.txt", "README"))
|
|
91
|
+
if present:
|
|
92
|
+
return make_row(gate["gate_id"], "PASS", policy_hash=sha256_str("hygiene:readme"),
|
|
93
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
94
|
+
metadata={"method": "local-presence", "signal": "README present"})
|
|
95
|
+
result, fm, sev = ("FAIL", "hygiene:readme-missing", None) if (strict or enforcement == "blocking") \
|
|
96
|
+
else ("ADVISORY", None, "warn")
|
|
97
|
+
return make_row(gate["gate_id"], result, policy_hash=sha256_str("hygiene:readme"),
|
|
98
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
99
|
+
failure_mode=fm, advisory_severity=sev,
|
|
100
|
+
metadata={"method": "local-presence", "reason": "no README found"})
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def run_tool(tool, repo, gate, commit_sha, runner, strict):
|
|
104
|
+
enforcement = gate.get("enforcement", "advisory")
|
|
105
|
+
policy = f"tool:{tool}"
|
|
106
|
+
if tool not in TOOL_CMD:
|
|
107
|
+
return indeterminate(gate, commit_sha, runner,
|
|
108
|
+
f"no invocation wired for tool '{tool}'", policy)
|
|
109
|
+
if shutil.which(tool) is None:
|
|
110
|
+
return indeterminate(gate, commit_sha, runner,
|
|
111
|
+
f"{tool} not on PATH — {gate.get('dimension')} unmeasured", policy)
|
|
112
|
+
argv, kind = TOOL_CMD[tool]
|
|
113
|
+
try:
|
|
114
|
+
proc = subprocess.run(argv, cwd=repo, capture_output=True, text=True, timeout=300)
|
|
115
|
+
except Exception as e:
|
|
116
|
+
return indeterminate(gate, commit_sha, runner, f"{tool} failed to run: {e}", policy)
|
|
117
|
+
if proc.returncode == 0:
|
|
118
|
+
return make_row(gate["gate_id"], "PASS", policy_hash=sha256_str(policy),
|
|
119
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
120
|
+
metadata={"method": "shell-out", "tool": tool})
|
|
121
|
+
if kind == "generation":
|
|
122
|
+
# syft etc. failing to generate is infra, not a policy violation
|
|
123
|
+
return indeterminate(gate, commit_sha, runner,
|
|
124
|
+
f"{tool} could not generate artifact (exit {proc.returncode})", policy)
|
|
125
|
+
detail = (proc.stdout or proc.stderr).strip()[:2000]
|
|
126
|
+
result, fm, sev = ("FAIL", f"scan:{tool}-findings", None) if (strict or enforcement == "blocking") \
|
|
127
|
+
else ("ADVISORY", None, "error")
|
|
128
|
+
return make_row(gate["gate_id"], result, policy_hash=sha256_str(policy),
|
|
129
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
130
|
+
failure_mode=fm, advisory_severity=sev,
|
|
131
|
+
metadata={"method": "shell-out", "tool": tool, "detail": detail})
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def consume_jrig(repo, gate, commit_sha, runner, strict, verdict_path):
|
|
135
|
+
"""Ingest a j-rig Evidence Bundle verdict row — never run judgment here."""
|
|
136
|
+
policy = "consume:j-rig"
|
|
137
|
+
candidates = [verdict_path] if verdict_path else []
|
|
138
|
+
candidates += [os.path.join(repo, p) for p in
|
|
139
|
+
(".j-rig/verdict.json", ".jrig/verdict.json", "j-rig-verdict.json")]
|
|
140
|
+
path = next((p for p in candidates if p and os.path.isfile(p)), None)
|
|
141
|
+
if path is None:
|
|
142
|
+
return indeterminate(gate, commit_sha, runner,
|
|
143
|
+
"no j-rig verdict available — run j-rig eval and pass --jrig-verdict",
|
|
144
|
+
policy)
|
|
145
|
+
verdict = C.read_json(path)
|
|
146
|
+
if not isinstance(verdict, dict):
|
|
147
|
+
return indeterminate(gate, commit_sha, runner, f"unreadable j-rig verdict at {path}", policy)
|
|
148
|
+
# Pass through j-rig's own result if present; otherwise interpret a boolean pass.
|
|
149
|
+
enforcement = gate.get("enforcement", "advisory")
|
|
150
|
+
jres = verdict.get("result") or ("PASS" if verdict.get("passed") else "FAIL")
|
|
151
|
+
meta = {"method": "consume-j-rig", "source": os.path.relpath(path, repo),
|
|
152
|
+
"jrig": {k: verdict.get(k) for k in ("result", "passed", "layers_passed", "baseline_delta")
|
|
153
|
+
if k in verdict}}
|
|
154
|
+
if jres == "PASS":
|
|
155
|
+
return make_row(gate["gate_id"], "PASS", policy_hash=sha256_str(policy),
|
|
156
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner, metadata=meta)
|
|
157
|
+
result, fm, sev = ("FAIL", "skill-quality:jrig-fail", None) if (strict or enforcement == "blocking") \
|
|
158
|
+
else ("ADVISORY", None, "error")
|
|
159
|
+
return make_row(gate["gate_id"], result, policy_hash=sha256_str(policy),
|
|
160
|
+
input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
|
|
161
|
+
failure_mode=fm, advisory_severity=sev, metadata=meta)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def compute_profile(repo, registry_path, profile_arg):
|
|
165
|
+
if profile_arg == "-":
|
|
166
|
+
return json.load(sys.stdin)
|
|
167
|
+
if profile_arg:
|
|
168
|
+
with open(profile_arg, "r", encoding="utf-8") as f:
|
|
169
|
+
return json.load(f)
|
|
170
|
+
out = subprocess.run([sys.executable, os.path.join(HERE, "classify.py"), repo,
|
|
171
|
+
"--registry", registry_path], capture_output=True, text=True)
|
|
172
|
+
if out.returncode != 0:
|
|
173
|
+
sys.stderr.write(out.stderr)
|
|
174
|
+
raise SystemExit(2)
|
|
175
|
+
return json.loads(out.stdout)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def main():
|
|
179
|
+
ap = argparse.ArgumentParser(description="Security/hygiene/skill-quality gate-runner -> gate-result/v1")
|
|
180
|
+
ap.add_argument("repo", nargs="?", default=".")
|
|
181
|
+
ap.add_argument("--strict", action="store_true", help="treat a finding/gap as FAIL (exit 1)")
|
|
182
|
+
ap.add_argument("--registry", default=C.DEFAULT_REGISTRY)
|
|
183
|
+
ap.add_argument("--profile", default=None, help="pinned audit-profile/v1 (PATH or '-')")
|
|
184
|
+
ap.add_argument("--jrig-verdict", default=None, help="path to a j-rig Evidence Bundle verdict to consume")
|
|
185
|
+
args = ap.parse_args()
|
|
186
|
+
|
|
187
|
+
repo = os.path.abspath(args.repo)
|
|
188
|
+
runner = f"audit-harness@{C.harness_version()}"
|
|
189
|
+
|
|
190
|
+
override_path = os.path.join(repo, ".audit-harness.yml")
|
|
191
|
+
override = C.parse_override(override_path) if os.path.isfile(override_path) else {"disable": False}
|
|
192
|
+
if override.get("disable") or os.environ.get("AUDIT_HARNESS_DISABLE") == "1":
|
|
193
|
+
sys.stderr.write("audit-harness: KILL-SWITCH active — scan skipped (no rows emitted)\n")
|
|
194
|
+
print("[]")
|
|
195
|
+
sys.exit(0)
|
|
196
|
+
|
|
197
|
+
profile = compute_profile(repo, os.path.abspath(args.registry), args.profile)
|
|
198
|
+
commit_sha = profile.get("subject", {}).get("commit_sha") or C.git_short_sha(repo)
|
|
199
|
+
|
|
200
|
+
gates = [g for g in profile.get("gates", [])
|
|
201
|
+
if g.get("dimension") in SCAN_DIMENSIONS and g.get("enforcement") != "disabled"]
|
|
202
|
+
|
|
203
|
+
rows = []
|
|
204
|
+
for gate in gates:
|
|
205
|
+
suffix = gate_suffix(gate["gate_id"])
|
|
206
|
+
tool = gate.get("tool")
|
|
207
|
+
if suffix == "hygiene-readme":
|
|
208
|
+
rows.append(run_readme(repo, gate, commit_sha, runner, args.strict))
|
|
209
|
+
elif tool == "j-rig":
|
|
210
|
+
rows.append(consume_jrig(repo, gate, commit_sha, runner, args.strict, args.jrig_verdict))
|
|
211
|
+
elif tool:
|
|
212
|
+
rows.append(run_tool(tool, repo, gate, commit_sha, runner, args.strict))
|
|
213
|
+
else:
|
|
214
|
+
rows.append(indeterminate(gate, commit_sha, runner,
|
|
215
|
+
f"gate '{suffix}' has no tool wired in this harness version",
|
|
216
|
+
f"scan:{suffix}"))
|
|
217
|
+
|
|
218
|
+
print(json.dumps(rows, indent=2))
|
|
219
|
+
n_fail = sum(1 for r in rows if r["result"] == "FAIL")
|
|
220
|
+
n_adv = sum(1 for r in rows if r["result"] == "ADVISORY")
|
|
221
|
+
n_pass = sum(1 for r in rows if r["result"] == "PASS")
|
|
222
|
+
sys.stderr.write(f"audit-harness scan: {n_pass} PASS, {n_adv} ADVISORY, {n_fail} FAIL "
|
|
223
|
+
f"across {len(rows)} gate(s)\n")
|
|
224
|
+
sys.exit(1 if n_fail else 0)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
if __name__ == "__main__":
|
|
228
|
+
main()
|