@intentsolutions/audit-harness 0.1.0 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ audit-harness currency — advisory upstream-currency report (PP-PLAN-040 Phase 5 / E7).
4
+
5
+ Currency depends on upstream state, which is non-deterministic and network-bound, so
6
+ it is deliberately the WEAKEST kind of check: an advisory REPORT with **no exit-code
7
+ authority, no auto-fix, and no live-fetch**. It reads the per-upstream-identity pin
8
+ relation (schemas/currency/pins.v1.json) — where each upstream carries its own
9
+ pinned_version + the date it was last verified (checked_at) + a staleness window —
10
+ and reports which pins are themselves STALE (checked_at older than the window), i.e.
11
+ which pins a human should re-verify against upstream.
12
+
13
+ This models the pin's OWN staleness as detectable, rather than one opaque
14
+ ".schema-version" scalar. The /sync-testing-harness skill consumes this report to
15
+ open advisory bump PRs; the report never reddens a build (always exit 0).
16
+
17
+ Stdlib only. No network. No filesystem mutation.
18
+ """
19
+ import argparse
20
+ import json
21
+ import os
22
+ import sys
23
+ from datetime import datetime, timezone
24
+
25
+ HERE = os.path.dirname(os.path.abspath(__file__))
26
+ DEFAULT_PINS = os.path.join(HERE, "..", "schemas", "currency", "pins.v1.json")
27
+
28
+
29
+ def parse_date(s):
30
+ try:
31
+ return datetime.strptime(s, "%Y-%m-%d").date()
32
+ except Exception:
33
+ return None
34
+
35
+
36
+ def build_report(pins_doc, today):
37
+ default_window = pins_doc.get("default_staleness_window_days", 90)
38
+ out = []
39
+ for pin in pins_doc.get("pins", []):
40
+ checked = parse_date(pin.get("checked_at", ""))
41
+ window = pin.get("staleness_window_days", default_window)
42
+ if checked is None:
43
+ age, status = None, "unknown-checked_at"
44
+ else:
45
+ age = (today - checked).days
46
+ status = "stale" if age > window else "current"
47
+ out.append({
48
+ "identity": pin.get("identity"),
49
+ "pinned_version": pin.get("pinned_version"),
50
+ "checked_at": pin.get("checked_at"),
51
+ "age_days": age,
52
+ "window_days": window,
53
+ "status": status,
54
+ "source": pin.get("source"),
55
+ "notes": pin.get("notes"),
56
+ })
57
+ return out
58
+
59
+
60
+ def main():
61
+ ap = argparse.ArgumentParser(description="Advisory upstream-currency report (no exit authority)")
62
+ ap.add_argument("--pins", default=DEFAULT_PINS, help="path to the pin relation datum")
63
+ ap.add_argument("--json", action="store_true", help="emit JSON report")
64
+ ap.add_argument("--today", default=None, help="override 'today' (YYYY-MM-DD) for reproducible reports/tests")
65
+ args = ap.parse_args()
66
+
67
+ pins_path = os.path.abspath(args.pins)
68
+ try:
69
+ with open(pins_path, "r", encoding="utf-8") as f:
70
+ pins_doc = json.load(f)
71
+ except Exception as e:
72
+ sys.stderr.write(f"currency: cannot read pins at {pins_path}: {e}\n")
73
+ sys.exit(2)
74
+
75
+ today = parse_date(args.today) if args.today else datetime.now(timezone.utc).date()
76
+ report = build_report(pins_doc, today)
77
+ stale = [r for r in report if r["status"] == "stale"]
78
+ unknown = [r for r in report if r["status"] == "unknown-checked_at"]
79
+
80
+ if args.json:
81
+ print(json.dumps({
82
+ "report": "currency/v1",
83
+ "generated_for": today.strftime("%Y-%m-%d"),
84
+ "pins": report,
85
+ "stale_count": len(stale),
86
+ "advisory": True,
87
+ }, indent=2))
88
+ else:
89
+ print(f"Upstream currency (advisory) — as of {today.strftime('%Y-%m-%d')}")
90
+ print(f"{'identity':<24} {'pinned':<14} {'checked_at':<12} {'age':>5} {'win':>4} status")
91
+ for r in report:
92
+ age = "—" if r["age_days"] is None else str(r["age_days"]) + "d"
93
+ if r["status"] == "stale":
94
+ mark = "⚠ STALE"
95
+ elif r["status"] == "current":
96
+ mark = "current"
97
+ else:
98
+ mark = "? " + r["status"]
99
+ print(f"{(r['identity'] or ''):<24} {(r['pinned_version'] or ''):<14} "
100
+ f"{(r['checked_at'] or ''):<12} {age:>5} {r['window_days']:>4} {mark}")
101
+ print()
102
+ if stale:
103
+ print(f"{len(stale)} pin(s) past their staleness window — re-verify against upstream, "
104
+ f"then bump pinned_version + checked_at in schemas/currency/pins.v1.json:")
105
+ for r in stale:
106
+ print(f" - {r['identity']}: last checked {r['checked_at']} "
107
+ f"({r['age_days']}d ago > {r['window_days']}d)")
108
+ else:
109
+ print("All pins within their staleness window.")
110
+ if unknown:
111
+ print(f"{len(unknown)} pin(s) have an unparseable checked_at — fix the date format (YYYY-MM-DD).")
112
+
113
+ # Advisory ONLY: never any exit-code authority. Always exit 0.
114
+ sys.exit(0)
115
+
116
+
117
+ if __name__ == "__main__":
118
+ main()
@@ -0,0 +1,256 @@
1
+ #!/usr/bin/env bash
2
+ # emit-evidence.sh — wrap a gate-result JSON envelope in an in-toto Statement v1.
3
+ #
4
+ # Reads a gate-result envelope JSON document from stdin (or --input), augments it
5
+ # with the fields the runner knows (timestamp, runner version, commit_sha), and
6
+ # emits a complete in-toto Statement v1 to stdout. Optionally signs the Statement
7
+ # via `cosign sign-blob` and/or pushes to the Rekor transparency log.
8
+ #
9
+ # Per intent-eval-lab/specs/evidence-bundle/v0.1.0-draft/SPEC.md the emitted
10
+ # Statement carries predicateType https://evals.intentsolutions.io/gate-result/v1.
11
+ #
12
+ # Usage:
13
+ # <gate> --json | bash emit-evidence.sh # unsigned, prints Statement
14
+ # bash emit-evidence.sh --input gate.json # read from file
15
+ # bash emit-evidence.sh --sign --key cosign.key < gate.json # cosign key-based sign
16
+ # bash emit-evidence.sh --sign --keyless < gate.json # cosign keyless (Fulcio OIDC)
17
+ # bash emit-evidence.sh --sign --rekor-url https://rekor.sigstore.dev < gate.json
18
+ # bash emit-evidence.sh --output bundle/row.json < gate.json
19
+ #
20
+ # Flags:
21
+ # --input PATH Read gate-result JSON from PATH instead of stdin
22
+ # --output PATH Write Statement (DSSE envelope if --sign) to PATH instead of stdout
23
+ # --sign Sign the Statement via cosign. Default: --keyless.
24
+ # --keyless Force cosign keyless signing (OIDC). Default when --sign and no --key.
25
+ # --key PATH Cosign keyref. Use instead of --keyless.
26
+ # --rekor-url URL Push the signed attestation to Rekor at URL. Implies --sign.
27
+ # Default Rekor URL when present without value: https://rekor.sigstore.dev
28
+ # --no-sign Explicitly skip signing (default behavior; documents the choice)
29
+ # --runner-version V Override the runner version string (default: from package.json)
30
+ # --commit-sha SHA Override the commit SHA (default: git rev-parse HEAD)
31
+ # --help, -h Print help
32
+ #
33
+ # Exit codes:
34
+ # 0 — Statement emitted successfully
35
+ # 1 — input JSON malformed or missing required fields
36
+ # 2 — signing requested but cosign not available
37
+ # 3 — Rekor push requested but failed
38
+ #
39
+ # CISO gate (per ISEDC v1 Q1, 2026-05-10): pushing to a public transparency log
40
+ # (Rekor) against the predicate URI https://evals.intentsolutions.io/gate-result/v1
41
+ # is BLOCKED until DNSSEC + CAA records are verified on the namespace. The script
42
+ # does NOT enforce this — that is operator discipline. See bead `iel-4zr` in
43
+ # intent-eval-platform/intent-eval-lab/.beads/.
44
+
45
+ set -euo pipefail
46
+
47
+ INPUT="-"
48
+ OUTPUT=""
49
+ SIGN=0
50
+ KEYLESS=0
51
+ KEYREF=""
52
+ REKOR_URL=""
53
+ RUNNER_VERSION_OVERRIDE=""
54
+ COMMIT_SHA_OVERRIDE=""
55
+ PREDICATE_URI="https://evals.intentsolutions.io/gate-result/v1"
56
+ STATEMENT_TYPE="https://in-toto.io/Statement/v1"
57
+
58
+ while [[ $# -gt 0 ]]; do
59
+ case "$1" in
60
+ --input) INPUT="$2"; shift 2 ;;
61
+ --output) OUTPUT="$2"; shift 2 ;;
62
+ --sign) SIGN=1; shift ;;
63
+ --keyless) SIGN=1; KEYLESS=1; shift ;;
64
+ --key) SIGN=1; KEYREF="$2"; shift 2 ;;
65
+ --rekor-url)
66
+ SIGN=1
67
+ if [[ "${2:-}" =~ ^-- ]] || [[ -z "${2:-}" ]]; then
68
+ REKOR_URL="https://rekor.sigstore.dev"
69
+ shift
70
+ else
71
+ REKOR_URL="$2"
72
+ shift 2
73
+ fi
74
+ ;;
75
+ --no-sign) SIGN=0; shift ;;
76
+ --runner-version) RUNNER_VERSION_OVERRIDE="$2"; shift 2 ;;
77
+ --commit-sha) COMMIT_SHA_OVERRIDE="$2"; shift 2 ;;
78
+ --help|-h) sed -n '2,40p' "$0"; exit 0 ;;
79
+ *) echo "emit-evidence: unknown flag $1" >&2; exit 1 ;;
80
+ esac
81
+ done
82
+
83
+ # --- Read input ---
84
+ if [[ "$INPUT" == "-" ]]; then
85
+ GATE_JSON=$(cat)
86
+ else
87
+ if [[ ! -r "$INPUT" ]]; then
88
+ echo "emit-evidence: cannot read $INPUT" >&2
89
+ exit 1
90
+ fi
91
+ GATE_JSON=$(cat "$INPUT")
92
+ fi
93
+
94
+ if [[ -z "$GATE_JSON" ]]; then
95
+ echo "emit-evidence: empty input" >&2
96
+ exit 1
97
+ fi
98
+
99
+ # --- Resolve runner + commit metadata ---
100
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
101
+ PKG_JSON="${SCRIPT_DIR}/../package.json"
102
+
103
+ if [[ -n "$RUNNER_VERSION_OVERRIDE" ]]; then
104
+ RUNNER="$RUNNER_VERSION_OVERRIDE"
105
+ elif [[ -f "$PKG_JSON" ]]; then
106
+ # Pass PKG_JSON via argv so paths with quotes/spaces/specials don't break the python source.
107
+ VER=$(python3 -c "import json, sys; print(json.load(open(sys.argv[1]))['version'])" "$PKG_JSON" 2>/dev/null || echo "unknown")
108
+ RUNNER="audit-harness@${VER}"
109
+ else
110
+ RUNNER="audit-harness@unknown"
111
+ fi
112
+
113
+ if [[ -n "$COMMIT_SHA_OVERRIDE" ]]; then
114
+ COMMIT_SHA="$COMMIT_SHA_OVERRIDE"
115
+ else
116
+ COMMIT_SHA=$(git rev-parse HEAD 2>/dev/null || echo "0000000")
117
+ fi
118
+
119
+ TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
120
+
121
+ # --- Compose the Statement via python (deterministic JSON shape, escaping handled) ---
122
+ STATEMENT=$(GATE_JSON="$GATE_JSON" PREDICATE_URI="$PREDICATE_URI" STATEMENT_TYPE="$STATEMENT_TYPE" \
123
+ RUNNER="$RUNNER" COMMIT_SHA="$COMMIT_SHA" TIMESTAMP="$TIMESTAMP" \
124
+ python3 - <<'PY'
125
+ import json, os, sys
126
+
127
+ gate = json.loads(os.environ["GATE_JSON"])
128
+
129
+ required = ["gate_id", "result", "input_hash", "policy_hash"]
130
+ missing = [k for k in required if k not in gate]
131
+ if missing:
132
+ sys.stderr.write(f"emit-evidence: gate-result missing required keys: {missing}\n")
133
+ sys.exit(1)
134
+
135
+ # Augment predicate with runner-supplied fields
136
+ predicate = {
137
+ "gate_id": gate["gate_id"],
138
+ "result": gate["result"],
139
+ "policy_hash": gate["policy_hash"],
140
+ "input_hash": gate["input_hash"],
141
+ "timestamp": os.environ["TIMESTAMP"],
142
+ "runner": os.environ["RUNNER"],
143
+ "commit_sha": os.environ["COMMIT_SHA"],
144
+ }
145
+
146
+ # Carry forward optional fields if present
147
+ for opt in ("metadata", "failure_mode", "advisory_severity"):
148
+ if opt in gate:
149
+ predicate[opt] = gate[opt]
150
+
151
+ # Subject naming: subject.name MUST equal predicate.gate_id (SPEC § 6 R8)
152
+ # Subject digest: subject.digest.sha256 MUST equal predicate.input_hash (SPEC § 6 R9)
153
+ input_hash = gate["input_hash"]
154
+ if not input_hash.startswith("sha256:"):
155
+ sys.stderr.write(f"emit-evidence: input_hash must be sha256:-prefixed, got: {input_hash}\n")
156
+ sys.exit(1)
157
+ digest_hex = input_hash[len("sha256:"):]
158
+
159
+ statement = {
160
+ "_type": os.environ["STATEMENT_TYPE"],
161
+ "subject": [{
162
+ "name": gate["gate_id"],
163
+ "digest": {"sha256": digest_hex},
164
+ }],
165
+ "predicateType": os.environ["PREDICATE_URI"],
166
+ "predicate": predicate,
167
+ }
168
+
169
+ print(json.dumps(statement))
170
+ PY
171
+ )
172
+
173
+ if [[ -z "$STATEMENT" ]]; then
174
+ echo "emit-evidence: failed to compose Statement" >&2
175
+ exit 1
176
+ fi
177
+
178
+ # --- OTel event (best-effort no-op if collector absent) ---
179
+ # Fire agent.rollout.gate.evaluated per intent-eval-lab/000-docs/001-DR-RFC-...md.
180
+ # We emit a single OTLP-shaped JSON line to stderr when AUDIT_HARNESS_OTEL=1
181
+ # OR an OTEL_EXPORTER_OTLP_ENDPOINT is set. Real exporter wiring is consumer-side;
182
+ # we emit a structured signal that any collector can scrape via stderr capture.
183
+ if [[ "${AUDIT_HARNESS_OTEL:-0}" == "1" ]] || [[ -n "${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ]]; then
184
+ GATE_ID=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('gate_id',''))" 2>/dev/null || echo "")
185
+ RESULT=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('result',''))" 2>/dev/null || echo "")
186
+ printf '[OTEL] {"name":"agent.rollout.gate.evaluated","attributes":{"gate.id":"%s","gate.result":"%s","gate.runner":"%s","gate.commit_sha":"%s"},"timestamp":"%s"}\n' \
187
+ "$GATE_ID" "$RESULT" "$RUNNER" "$COMMIT_SHA" "$TIMESTAMP" >&2
188
+ fi
189
+
190
+ # --- Sign + emit ---
191
+ emit() {
192
+ local content="$1"
193
+ if [[ -n "$OUTPUT" ]]; then
194
+ mkdir -p "$(dirname "$OUTPUT")"
195
+ printf '%s\n' "$content" > "$OUTPUT"
196
+ echo "emit-evidence: wrote $OUTPUT" >&2
197
+ else
198
+ printf '%s\n' "$content"
199
+ fi
200
+ }
201
+
202
+ if [[ "$SIGN" -eq 0 ]]; then
203
+ emit "$STATEMENT"
204
+ exit 0
205
+ fi
206
+
207
+ # Signing requires cosign. We use `cosign attest-blob` if available (canonical
208
+ # in-toto signing), falling back to `cosign sign-blob` with the Statement as the
209
+ # blob (less canonical but functional for verification round-trip).
210
+ if ! command -v cosign >/dev/null 2>&1; then
211
+ echo "emit-evidence: --sign requested but cosign is not installed (https://docs.sigstore.dev/cosign/installation/)" >&2
212
+ exit 2
213
+ fi
214
+
215
+ # Stage the Statement to a temp file for cosign to consume
216
+ TMP=$(mktemp -d)
217
+ trap 'rm -rf "$TMP"' EXIT
218
+ STATEMENT_FILE="$TMP/statement.json"
219
+ printf '%s\n' "$STATEMENT" > "$STATEMENT_FILE"
220
+ ENVELOPE_FILE="$TMP/envelope.dsse.json"
221
+
222
+ COSIGN_ARGS=("attest-blob" "--predicate" "$STATEMENT_FILE" "--type" "$PREDICATE_URI")
223
+ if [[ -n "$KEYREF" ]]; then
224
+ COSIGN_ARGS+=("--key" "$KEYREF")
225
+ elif [[ "$KEYLESS" -eq 1 ]] || [[ -z "$KEYREF" ]]; then
226
+ COSIGN_ARGS+=("--yes") # accept Fulcio OIDC keyless
227
+ fi
228
+ if [[ -n "$REKOR_URL" ]]; then
229
+ COSIGN_ARGS+=("--rekor-url" "$REKOR_URL")
230
+ COSIGN_ARGS+=("--tlog-upload=true")
231
+ else
232
+ COSIGN_ARGS+=("--tlog-upload=false")
233
+ fi
234
+ COSIGN_ARGS+=("--output-signature" "$ENVELOPE_FILE")
235
+ # `cosign attest-blob` needs a "blob" — the input the predicate attests to.
236
+ # Per SPEC subject naming, that's the input_hash; we use a virtual artifact name.
237
+ ARTIFACT_NAME="$(echo "$STATEMENT" | python3 -c "import json,sys; print(json.load(sys.stdin)['subject'][0]['name'])")"
238
+
239
+ # Write a placeholder blob whose sha256 == the declared input_hash. This makes
240
+ # the DSSE envelope's subject coherent with the predicate.
241
+ # (Cosign re-hashes the blob; we trust the gate's input_hash to be the canonical
242
+ # subject. For v0.x we accept this round-trip-by-construction.)
243
+ BLOB_FILE="$TMP/$ARTIFACT_NAME.blob"
244
+ # A real subject artifact would be the file the gate evaluated; for the envelope
245
+ # we use the in-band predicate as the blob. Verification only needs the DSSE
246
+ # wrap + the predicate, not the original artifact bytes.
247
+ cp "$STATEMENT_FILE" "$BLOB_FILE"
248
+
249
+ if ! cosign "${COSIGN_ARGS[@]}" "$BLOB_FILE" >&2; then
250
+ echo "emit-evidence: cosign signing failed" >&2
251
+ exit 3
252
+ fi
253
+
254
+ emit "$(cat "$ENVELOPE_FILE")"
255
+ echo "emit-evidence: signed envelope emitted${REKOR_URL:+ (Rekor: $REKOR_URL)}" >&2
256
+ exit 0
@@ -19,14 +19,32 @@
19
19
  # bash escape-scan.sh path/to/change.patch
20
20
  # bash escape-scan.sh --staged # git diff --cached
21
21
  # bash escape-scan.sh --range HEAD~1..HEAD
22
+ # bash escape-scan.sh --staged --json # machine-readable JSON to stdout
23
+ #
24
+ # JSON mode:
25
+ # stdout = single JSON object suitable for piping to `audit-harness emit-evidence`
26
+ # stderr = unchanged human-readable [SEVERITY] notes (preserves backward-compat)
27
+ # exit codes unchanged
22
28
 
23
29
  set -euo pipefail
24
30
 
25
31
  DIFF_SRC=""
26
32
  VERIFY_HASH=1
33
+ JSON_OUT=0
27
34
  ROOT="${ROOT:-$(pwd)}"
28
35
  HASH_SCRIPT="$(dirname "$0")/harness-hash.sh"
29
36
 
37
+ # First-pass arg parse: peel --json off the tail (any position) so primary
38
+ # arg parsing below is unchanged.
39
+ _filtered_args=()
40
+ for arg in "$@"; do
41
+ case "$arg" in
42
+ --json) JSON_OUT=1 ;;
43
+ *) _filtered_args+=("$arg") ;;
44
+ esac
45
+ done
46
+ set -- "${_filtered_args[@]+"${_filtered_args[@]}"}"
47
+
30
48
  if [[ "$#" -eq 0 ]]; then
31
49
  echo "escape-scan: pass a diff source (- for stdin, --staged, --range, or a patch file)" >&2
32
50
  exit 2
@@ -34,11 +52,20 @@ fi
34
52
 
35
53
  case "$1" in
36
54
  -) DIFF_SRC="/dev/stdin" ;;
37
- --staged) DIFF_SRC=$(mktemp); git diff --cached > "$DIFF_SRC" ;;
38
- --range) DIFF_SRC=$(mktemp); git diff "$2" > "$DIFF_SRC"; shift ;;
55
+ --staged)
56
+ DIFF_SRC=$(mktemp)
57
+ trap 'rm -f "$DIFF_SRC"' EXIT
58
+ git diff --cached > "$DIFF_SRC"
59
+ ;;
60
+ --range)
61
+ DIFF_SRC=$(mktemp)
62
+ trap 'rm -f "$DIFF_SRC"' EXIT
63
+ git diff "$2" > "$DIFF_SRC"
64
+ shift
65
+ ;;
39
66
  --no-hash) VERIFY_HASH=0; shift; DIFF_SRC="$1" ;;
40
67
  --help|-h)
41
- sed -n '2,22p' "$0"; exit 0 ;;
68
+ sed -n '2,26p' "$0"; exit 0 ;;
42
69
  *) DIFF_SRC="$1" ;;
43
70
  esac
44
71
 
@@ -159,7 +186,34 @@ if echo "$added_lines" | grep -Eq 'toBeDefined\(\)|\.is not None'; then
159
186
  fi
160
187
 
161
188
  # --- Summary & exit ---
162
- echo "escape-scan: REFUSE=$REFUSE CHALLENGE=$CHALLENGE FLAG=$FLAG"
189
+ if [[ "$JSON_OUT" -eq 1 ]]; then
190
+ # Result mapping (per intent-eval-lab evidence-bundle SPEC § 5 R6):
191
+ # any REFUSE → FAIL
192
+ # any CHALLENGE (no REFUSE) → FAIL (exit 1 = blocking, requires human)
193
+ # only FLAG → ADVISORY (exit 0 — informational)
194
+ # none → PASS
195
+ result="PASS"
196
+ if [[ "$REFUSE" -gt 0 || "$CHALLENGE" -gt 0 ]]; then
197
+ result="FAIL"
198
+ elif [[ "$FLAG" -gt 0 ]]; then
199
+ result="ADVISORY"
200
+ fi
201
+ input_hash=$(sha256sum "$DIFF_SRC" | awk '{print "sha256:"$1}')
202
+ policy_hash="sha256:0000000000000000000000000000000000000000000000000000000000000000"
203
+ if [[ -f "$TESTING_MD" ]]; then
204
+ policy_hash=$(sha256sum "$TESTING_MD" | awk '{print "sha256:"$1}')
205
+ fi
206
+ printf '{"gate_id":"audit-harness:%s:escape-scan","result":"%s","input_hash":"%s","policy_hash":"%s","metadata":{"refuse":%d,"challenge":%d,"flag":%d,"coverage_line_floor":%d,"coverage_branch_floor":%d,"mutation_floor":%d}' \
207
+ "${AUDIT_HARNESS_SIDE:-ci}" "$result" "$input_hash" "$policy_hash" "$REFUSE" "$CHALLENGE" "$FLAG" \
208
+ "$COVERAGE_LINE_FLOOR" "$COVERAGE_BRANCH_FLOOR" "$MUTATION_FLOOR"
209
+ if [[ "$result" == "ADVISORY" ]]; then
210
+ printf ',"advisory_severity":"info"'
211
+ fi
212
+ printf '}\n'
213
+ echo "escape-scan: REFUSE=$REFUSE CHALLENGE=$CHALLENGE FLAG=$FLAG" >&2
214
+ else
215
+ echo "escape-scan: REFUSE=$REFUSE CHALLENGE=$CHALLENGE FLAG=$FLAG"
216
+ fi
163
217
  if [[ "$REFUSE" -gt 0 ]]; then
164
218
  echo "escape-scan: pipeline halted (REFUSE)" >&2
165
219
  exit 2
@@ -0,0 +1,145 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ audit-harness fp-rate — measure a gate's false-positive / false-negative rate over
4
+ a labeled corpus.
5
+
6
+ A new gate ships `enforcement: advisory`. It earns promotion to `blocking` only
7
+ once its measured false-positive rate (clean inputs it wrongly flags) sits below a
8
+ stated bar on a labeled corpus. This harness produces that measurement — the
9
+ evidence an engineer cites when they pin `enforcement: blocking` in a repo's
10
+ `tests/TESTING.md` (PP-PLAN-040 Phase 0, bead c2e; rule: docs/gate-promotion.md).
11
+
12
+ Labeled corpus layout (default `tests/fixtures/conform`):
13
+ <corpus>/valid/<fixture>/... → every gate that fires here SHOULD be clean
14
+ <corpus>/malformed/<fixture>/... → every gate that fires here SHOULD flag
15
+
16
+ Per row the gate emits on a fixture, the verdict is bucketed:
17
+ clean = PASS | NOT_APPLICABLE
18
+ flag = FAIL | ADVISORY(advisory_severity=error)
19
+ skip = ADVISORY indeterminate (tool/schema absent) — unmeasurable, excluded
20
+
21
+ false positive (FP) = a `valid` fixture the gate flagged
22
+ false negative (FN) = a `malformed` fixture the gate left clean
23
+
24
+ Stdlib only. Read-only. Default exit 0 (report); `--max-fp-rate X` exits 1 if any
25
+ gate exceeds the bar (use in CI when promoting a gate).
26
+ """
27
+ import argparse
28
+ import json
29
+ import os
30
+ import subprocess
31
+ import sys
32
+
33
+ HERE = os.path.dirname(os.path.abspath(__file__))
34
+ CONFORM = os.path.join(HERE, "conform.py")
35
+ DEFAULT_CORPUS = os.path.join(HERE, "..", "tests", "fixtures", "conform")
36
+
37
+
38
+ def verdict_bucket(row):
39
+ r = row.get("result")
40
+ if r in ("PASS", "NOT_APPLICABLE"):
41
+ return "clean"
42
+ if r == "FAIL":
43
+ return "flag"
44
+ if r == "ADVISORY":
45
+ if row.get("metadata", {}).get("indeterminate"):
46
+ return "skip"
47
+ if row.get("advisory_severity") == "error":
48
+ return "flag"
49
+ return "skip"
50
+ return "skip"
51
+
52
+
53
+ def run_conform(fixture):
54
+ out = subprocess.run([sys.executable, CONFORM, fixture], capture_output=True, text=True)
55
+ try:
56
+ return json.loads(out.stdout)
57
+ except Exception:
58
+ return []
59
+
60
+
61
+ def measure(corpus):
62
+ # per gate_id: {valid_total, fp, malformed_total, fn, skipped}
63
+ stats = {}
64
+
65
+ def bump(gid, key):
66
+ s = stats.setdefault(gid, {"valid": 0, "fp": 0, "malformed": 0, "fn": 0, "skipped": 0})
67
+ s[key] += 1
68
+
69
+ for label in ("valid", "malformed"):
70
+ base = os.path.join(corpus, label)
71
+ if not os.path.isdir(base):
72
+ continue
73
+ for name in sorted(os.listdir(base)):
74
+ fixture = os.path.join(base, name)
75
+ if not os.path.isdir(fixture):
76
+ continue
77
+ for row in run_conform(fixture):
78
+ gid = row["gate_id"]
79
+ bucket = verdict_bucket(row)
80
+ if bucket == "skip":
81
+ bump(gid, "skipped")
82
+ continue
83
+ if label == "valid":
84
+ bump(gid, "valid")
85
+ if bucket == "flag":
86
+ bump(gid, "fp")
87
+ else:
88
+ bump(gid, "malformed")
89
+ if bucket == "clean":
90
+ bump(gid, "fn")
91
+ return stats
92
+
93
+
94
+ def rate(n, d):
95
+ return (n / d) if d else 0.0
96
+
97
+
98
+ def main():
99
+ ap = argparse.ArgumentParser(description="Measure gate FP/FN rate over a labeled corpus")
100
+ ap.add_argument("--corpus", default=DEFAULT_CORPUS, help="labeled corpus root (valid/ + malformed/)")
101
+ ap.add_argument("--json", action="store_true", help="emit JSON report to stdout")
102
+ ap.add_argument("--max-fp-rate", type=float, default=None,
103
+ help="exit 1 if any measured gate's FP-rate exceeds this (promotion gate)")
104
+ args = ap.parse_args()
105
+
106
+ corpus = os.path.abspath(args.corpus)
107
+ stats = measure(corpus)
108
+
109
+ report = {}
110
+ for gid, s in sorted(stats.items()):
111
+ report[gid] = {
112
+ "valid_samples": s["valid"],
113
+ "false_positives": s["fp"],
114
+ "fp_rate": round(rate(s["fp"], s["valid"]), 4),
115
+ "malformed_samples": s["malformed"],
116
+ "false_negatives": s["fn"],
117
+ "fn_rate": round(rate(s["fn"], s["malformed"]), 4),
118
+ "skipped_indeterminate": s["skipped"],
119
+ }
120
+
121
+ if args.json:
122
+ print(json.dumps({"corpus": os.path.relpath(corpus, os.path.join(HERE, "..")),
123
+ "gates": report}, indent=2))
124
+ else:
125
+ print(f"FP/FN rate over corpus: {os.path.relpath(corpus, os.path.join(HERE, '..'))}")
126
+ print(f"{'gate_id':<42} {'valid':>5} {'FP':>3} {'FP%':>6} {'malf':>4} {'FN':>3} {'FN%':>6}")
127
+ for gid, r in report.items():
128
+ print(f"{gid:<42} {r['valid_samples']:>5} {r['false_positives']:>3} "
129
+ f"{r['fp_rate']*100:>5.1f}% {r['malformed_samples']:>4} "
130
+ f"{r['false_negatives']:>3} {r['fn_rate']*100:>5.1f}%")
131
+ if not report:
132
+ print(" (no measurable gate verdicts in corpus)")
133
+
134
+ if args.max_fp_rate is not None:
135
+ over = {g: r["fp_rate"] for g, r in report.items() if r["fp_rate"] > args.max_fp_rate}
136
+ if over:
137
+ sys.stderr.write(f"\nfp-rate: {len(over)} gate(s) exceed --max-fp-rate={args.max_fp_rate}:\n")
138
+ for g, fr in over.items():
139
+ sys.stderr.write(f" {g}: {fr:.4f}\n")
140
+ sys.exit(1)
141
+ sys.exit(0)
142
+
143
+
144
+ if __name__ == "__main__":
145
+ main()