npm - @intentsolutions/audit-harness - Versions diffs - 0.1.0 → 1.1.6 - Mend

@intentsolutions/audit-harness 0.1.0 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +435 -0
package/LICENSE +202 -21
package/NOTICE +15 -0
package/README.md +36 -4
package/bin/audit-harness.js +108 -8
package/docs/gate-promotion.md +45 -0
package/package.json +13 -9
package/schemas/audit-profile/layer-applicability.md +146 -0
package/schemas/audit-profile/registry.v1.json +87 -0
package/schemas/audit-profile/v1.schema.json +294 -0
package/schemas/conform/v1/agent-frontmatter.schema.json +24 -0
package/schemas/conform/v1/mcp-config.schema.json +31 -0
package/schemas/conform/v1/plugin-manifest.schema.json +26 -0
package/schemas/conform/v1/skillmd-frontmatter.schema.json +40 -0
package/schemas/currency/pins.v1.json +55 -0
package/scripts/arch-check.sh +25 -1
package/scripts/audit.py +386 -0
package/scripts/bias-count.sh +50 -4
package/scripts/classify.py +403 -0
package/scripts/conform.py +481 -0
package/scripts/crap-score.py +65 -5
package/scripts/currency.py +118 -0
package/scripts/emit-evidence.sh +256 -0
package/scripts/escape-scan.sh +58 -4
package/scripts/fp-rate.py +145 -0
package/scripts/gen-layer-applicability.py +157 -0
package/scripts/gherkin-lint.sh +53 -9
package/scripts/harness-hash.sh +78 -5
package/scripts/scan.py +228 -0

package/scripts/currency.py ADDED Viewed

@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+"""
+audit-harness currency — advisory upstream-currency report (PP-PLAN-040 Phase 5 / E7).
+Currency depends on upstream state, which is non-deterministic and network-bound, so
+it is deliberately the WEAKEST kind of check: an advisory REPORT with **no exit-code
+authority, no auto-fix, and no live-fetch**. It reads the per-upstream-identity pin
+relation (schemas/currency/pins.v1.json) — where each upstream carries its own
+pinned_version + the date it was last verified (checked_at) + a staleness window —
+and reports which pins are themselves STALE (checked_at older than the window), i.e.
+which pins a human should re-verify against upstream.
+This models the pin's OWN staleness as detectable, rather than one opaque
+".schema-version" scalar. The /sync-testing-harness skill consumes this report to
+open advisory bump PRs; the report never reddens a build (always exit 0).
+Stdlib only. No network. No filesystem mutation.
+"""
+import argparse
+import json
+import os
+import sys
+from datetime import datetime, timezone
+HERE = os.path.dirname(os.path.abspath(__file__))
+DEFAULT_PINS = os.path.join(HERE, "..", "schemas", "currency", "pins.v1.json")
+def parse_date(s):
+    try:
+        return datetime.strptime(s, "%Y-%m-%d").date()
+    except Exception:
+        return None
+def build_report(pins_doc, today):
+    default_window = pins_doc.get("default_staleness_window_days", 90)
+    out = []
+    for pin in pins_doc.get("pins", []):
+        checked = parse_date(pin.get("checked_at", ""))
+        window = pin.get("staleness_window_days", default_window)
+        if checked is None:
+            age, status = None, "unknown-checked_at"
+        else:
+            age = (today - checked).days
+            status = "stale" if age > window else "current"
+        out.append({
+            "identity": pin.get("identity"),
+            "pinned_version": pin.get("pinned_version"),
+            "checked_at": pin.get("checked_at"),
+            "age_days": age,
+            "window_days": window,
+            "status": status,
+            "source": pin.get("source"),
+            "notes": pin.get("notes"),
+        })
+    return out
+def main():
+    ap = argparse.ArgumentParser(description="Advisory upstream-currency report (no exit authority)")
+    ap.add_argument("--pins", default=DEFAULT_PINS, help="path to the pin relation datum")
+    ap.add_argument("--json", action="store_true", help="emit JSON report")
+    ap.add_argument("--today", default=None, help="override 'today' (YYYY-MM-DD) for reproducible reports/tests")
+    args = ap.parse_args()
+    pins_path = os.path.abspath(args.pins)
+    try:
+        with open(pins_path, "r", encoding="utf-8") as f:
+            pins_doc = json.load(f)
+    except Exception as e:
+        sys.stderr.write(f"currency: cannot read pins at {pins_path}: {e}\n")
+        sys.exit(2)
+    today = parse_date(args.today) if args.today else datetime.now(timezone.utc).date()
+    report = build_report(pins_doc, today)
+    stale = [r for r in report if r["status"] == "stale"]
+    unknown = [r for r in report if r["status"] == "unknown-checked_at"]
+    if args.json:
+        print(json.dumps({
+            "report": "currency/v1",
+            "generated_for": today.strftime("%Y-%m-%d"),
+            "pins": report,
+            "stale_count": len(stale),
+            "advisory": True,
+        }, indent=2))
+    else:
+        print(f"Upstream currency (advisory) — as of {today.strftime('%Y-%m-%d')}")
+        print(f"{'identity':<24} {'pinned':<14} {'checked_at':<12} {'age':>5} {'win':>4}  status")
+        for r in report:
+            age = "—" if r["age_days"] is None else str(r["age_days"]) + "d"
+            if r["status"] == "stale":
+                mark = "⚠ STALE"
+            elif r["status"] == "current":
+                mark = "current"
+            else:
+                mark = "? " + r["status"]
+            print(f"{(r['identity'] or ''):<24} {(r['pinned_version'] or ''):<14} "
+                  f"{(r['checked_at'] or ''):<12} {age:>5} {r['window_days']:>4}  {mark}")
+        print()
+        if stale:
+            print(f"{len(stale)} pin(s) past their staleness window — re-verify against upstream, "
+                  f"then bump pinned_version + checked_at in schemas/currency/pins.v1.json:")
+            for r in stale:
+                print(f"  - {r['identity']}: last checked {r['checked_at']} "
+                      f"({r['age_days']}d ago > {r['window_days']}d)")
+        else:
+            print("All pins within their staleness window.")
+        if unknown:
+            print(f"{len(unknown)} pin(s) have an unparseable checked_at — fix the date format (YYYY-MM-DD).")
+    # Advisory ONLY: never any exit-code authority. Always exit 0.
+    sys.exit(0)
+if __name__ == "__main__":
+    main()

package/scripts/emit-evidence.sh ADDED Viewed

@@ -0,0 +1,256 @@
+#!/usr/bin/env bash
+# emit-evidence.sh — wrap a gate-result JSON envelope in an in-toto Statement v1.
+#
+# Reads a gate-result envelope JSON document from stdin (or --input), augments it
+# with the fields the runner knows (timestamp, runner version, commit_sha), and
+# emits a complete in-toto Statement v1 to stdout. Optionally signs the Statement
+# via `cosign sign-blob` and/or pushes to the Rekor transparency log.
+#
+# Per intent-eval-lab/specs/evidence-bundle/v0.1.0-draft/SPEC.md the emitted
+# Statement carries predicateType https://evals.intentsolutions.io/gate-result/v1.
+#
+# Usage:
+#   <gate> --json | bash emit-evidence.sh                          # unsigned, prints Statement
+#   bash emit-evidence.sh --input gate.json                        # read from file
+#   bash emit-evidence.sh --sign --key cosign.key < gate.json      # cosign key-based sign
+#   bash emit-evidence.sh --sign --keyless < gate.json             # cosign keyless (Fulcio OIDC)
+#   bash emit-evidence.sh --sign --rekor-url https://rekor.sigstore.dev < gate.json
+#   bash emit-evidence.sh --output bundle/row.json < gate.json
+#
+# Flags:
+#   --input PATH       Read gate-result JSON from PATH instead of stdin
+#   --output PATH      Write Statement (DSSE envelope if --sign) to PATH instead of stdout
+#   --sign             Sign the Statement via cosign. Default: --keyless.
+#   --keyless          Force cosign keyless signing (OIDC). Default when --sign and no --key.
+#   --key PATH         Cosign keyref. Use instead of --keyless.
+#   --rekor-url URL    Push the signed attestation to Rekor at URL. Implies --sign.
+#                      Default Rekor URL when present without value: https://rekor.sigstore.dev
+#   --no-sign          Explicitly skip signing (default behavior; documents the choice)
+#   --runner-version V Override the runner version string (default: from package.json)
+#   --commit-sha SHA   Override the commit SHA (default: git rev-parse HEAD)
+#   --help, -h         Print help
+#
+# Exit codes:
+#   0 — Statement emitted successfully
+#   1 — input JSON malformed or missing required fields
+#   2 — signing requested but cosign not available
+#   3 — Rekor push requested but failed
+#
+# CISO gate (per ISEDC v1 Q1, 2026-05-10): pushing to a public transparency log
+# (Rekor) against the predicate URI https://evals.intentsolutions.io/gate-result/v1
+# is BLOCKED until DNSSEC + CAA records are verified on the namespace. The script
+# does NOT enforce this — that is operator discipline. See bead `iel-4zr` in
+# intent-eval-platform/intent-eval-lab/.beads/.
+set -euo pipefail
+INPUT="-"
+OUTPUT=""
+SIGN=0
+KEYLESS=0
+KEYREF=""
+REKOR_URL=""
+RUNNER_VERSION_OVERRIDE=""
+COMMIT_SHA_OVERRIDE=""
+PREDICATE_URI="https://evals.intentsolutions.io/gate-result/v1"
+STATEMENT_TYPE="https://in-toto.io/Statement/v1"
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --input)       INPUT="$2"; shift 2 ;;
+    --output)      OUTPUT="$2"; shift 2 ;;
+    --sign)        SIGN=1; shift ;;
+    --keyless)     SIGN=1; KEYLESS=1; shift ;;
+    --key)         SIGN=1; KEYREF="$2"; shift 2 ;;
+    --rekor-url)
+                   SIGN=1
+                   if [[ "${2:-}" =~ ^-- ]] || [[ -z "${2:-}" ]]; then
+                     REKOR_URL="https://rekor.sigstore.dev"
+                     shift
+                   else
+                     REKOR_URL="$2"
+                     shift 2
+                   fi
+                   ;;
+    --no-sign)     SIGN=0; shift ;;
+    --runner-version) RUNNER_VERSION_OVERRIDE="$2"; shift 2 ;;
+    --commit-sha)  COMMIT_SHA_OVERRIDE="$2"; shift 2 ;;
+    --help|-h)     sed -n '2,40p' "$0"; exit 0 ;;
+    *) echo "emit-evidence: unknown flag $1" >&2; exit 1 ;;
+  esac
+done
+# --- Read input ---
+if [[ "$INPUT" == "-" ]]; then
+  GATE_JSON=$(cat)
+else
+  if [[ ! -r "$INPUT" ]]; then
+    echo "emit-evidence: cannot read $INPUT" >&2
+    exit 1
+  fi
+  GATE_JSON=$(cat "$INPUT")
+fi
+if [[ -z "$GATE_JSON" ]]; then
+  echo "emit-evidence: empty input" >&2
+  exit 1
+fi
+# --- Resolve runner + commit metadata ---
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PKG_JSON="${SCRIPT_DIR}/../package.json"
+if [[ -n "$RUNNER_VERSION_OVERRIDE" ]]; then
+  RUNNER="$RUNNER_VERSION_OVERRIDE"
+elif [[ -f "$PKG_JSON" ]]; then
+  # Pass PKG_JSON via argv so paths with quotes/spaces/specials don't break the python source.
+  VER=$(python3 -c "import json, sys; print(json.load(open(sys.argv[1]))['version'])" "$PKG_JSON" 2>/dev/null || echo "unknown")
+  RUNNER="audit-harness@${VER}"
+else
+  RUNNER="audit-harness@unknown"
+fi
+if [[ -n "$COMMIT_SHA_OVERRIDE" ]]; then
+  COMMIT_SHA="$COMMIT_SHA_OVERRIDE"
+else
+  COMMIT_SHA=$(git rev-parse HEAD 2>/dev/null || echo "0000000")
+fi
+TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+# --- Compose the Statement via python (deterministic JSON shape, escaping handled) ---
+STATEMENT=$(GATE_JSON="$GATE_JSON" PREDICATE_URI="$PREDICATE_URI" STATEMENT_TYPE="$STATEMENT_TYPE" \
+  RUNNER="$RUNNER" COMMIT_SHA="$COMMIT_SHA" TIMESTAMP="$TIMESTAMP" \
+  python3 - <<'PY'
+import json, os, sys
+gate = json.loads(os.environ["GATE_JSON"])
+required = ["gate_id", "result", "input_hash", "policy_hash"]
+missing = [k for k in required if k not in gate]
+if missing:
+    sys.stderr.write(f"emit-evidence: gate-result missing required keys: {missing}\n")
+    sys.exit(1)
+# Augment predicate with runner-supplied fields
+predicate = {
+    "gate_id":     gate["gate_id"],
+    "result":      gate["result"],
+    "policy_hash": gate["policy_hash"],
+    "input_hash":  gate["input_hash"],
+    "timestamp":   os.environ["TIMESTAMP"],
+    "runner":      os.environ["RUNNER"],
+    "commit_sha":  os.environ["COMMIT_SHA"],
+}
+# Carry forward optional fields if present
+for opt in ("metadata", "failure_mode", "advisory_severity"):
+    if opt in gate:
+        predicate[opt] = gate[opt]
+# Subject naming: subject.name MUST equal predicate.gate_id (SPEC § 6 R8)
+# Subject digest: subject.digest.sha256 MUST equal predicate.input_hash (SPEC § 6 R9)
+input_hash = gate["input_hash"]
+if not input_hash.startswith("sha256:"):
+    sys.stderr.write(f"emit-evidence: input_hash must be sha256:-prefixed, got: {input_hash}\n")
+    sys.exit(1)
+digest_hex = input_hash[len("sha256:"):]
+statement = {
+    "_type":         os.environ["STATEMENT_TYPE"],
+    "subject":       [{
+        "name":   gate["gate_id"],
+        "digest": {"sha256": digest_hex},
+    }],
+    "predicateType": os.environ["PREDICATE_URI"],
+    "predicate":     predicate,
+}
+print(json.dumps(statement))
+PY
+)
+if [[ -z "$STATEMENT" ]]; then
+  echo "emit-evidence: failed to compose Statement" >&2
+  exit 1
+fi
+# --- OTel event (best-effort no-op if collector absent) ---
+# Fire agent.rollout.gate.evaluated per intent-eval-lab/000-docs/001-DR-RFC-...md.
+# We emit a single OTLP-shaped JSON line to stderr when AUDIT_HARNESS_OTEL=1
+# OR an OTEL_EXPORTER_OTLP_ENDPOINT is set. Real exporter wiring is consumer-side;
+# we emit a structured signal that any collector can scrape via stderr capture.
+if [[ "${AUDIT_HARNESS_OTEL:-0}" == "1" ]] || [[ -n "${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ]]; then
+  GATE_ID=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('gate_id',''))" 2>/dev/null || echo "")
+  RESULT=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('result',''))" 2>/dev/null || echo "")
+  printf '[OTEL] {"name":"agent.rollout.gate.evaluated","attributes":{"gate.id":"%s","gate.result":"%s","gate.runner":"%s","gate.commit_sha":"%s"},"timestamp":"%s"}\n' \
+    "$GATE_ID" "$RESULT" "$RUNNER" "$COMMIT_SHA" "$TIMESTAMP" >&2
+fi
+# --- Sign + emit ---
+emit() {
+  local content="$1"
+  if [[ -n "$OUTPUT" ]]; then
+    mkdir -p "$(dirname "$OUTPUT")"
+    printf '%s\n' "$content" > "$OUTPUT"
+    echo "emit-evidence: wrote $OUTPUT" >&2
+  else
+    printf '%s\n' "$content"
+  fi
+}
+if [[ "$SIGN" -eq 0 ]]; then
+  emit "$STATEMENT"
+  exit 0
+fi
+# Signing requires cosign. We use `cosign attest-blob` if available (canonical
+# in-toto signing), falling back to `cosign sign-blob` with the Statement as the
+# blob (less canonical but functional for verification round-trip).
+if ! command -v cosign >/dev/null 2>&1; then
+  echo "emit-evidence: --sign requested but cosign is not installed (https://docs.sigstore.dev/cosign/installation/)" >&2
+  exit 2
+fi
+# Stage the Statement to a temp file for cosign to consume
+TMP=$(mktemp -d)
+trap 'rm -rf "$TMP"' EXIT
+STATEMENT_FILE="$TMP/statement.json"
+printf '%s\n' "$STATEMENT" > "$STATEMENT_FILE"
+ENVELOPE_FILE="$TMP/envelope.dsse.json"
+COSIGN_ARGS=("attest-blob" "--predicate" "$STATEMENT_FILE" "--type" "$PREDICATE_URI")
+if [[ -n "$KEYREF" ]]; then
+  COSIGN_ARGS+=("--key" "$KEYREF")
+elif [[ "$KEYLESS" -eq 1 ]] || [[ -z "$KEYREF" ]]; then
+  COSIGN_ARGS+=("--yes")   # accept Fulcio OIDC keyless
+fi
+if [[ -n "$REKOR_URL" ]]; then
+  COSIGN_ARGS+=("--rekor-url" "$REKOR_URL")
+  COSIGN_ARGS+=("--tlog-upload=true")
+else
+  COSIGN_ARGS+=("--tlog-upload=false")
+fi
+COSIGN_ARGS+=("--output-signature" "$ENVELOPE_FILE")
+# `cosign attest-blob` needs a "blob" — the input the predicate attests to.
+# Per SPEC subject naming, that's the input_hash; we use a virtual artifact name.
+ARTIFACT_NAME="$(echo "$STATEMENT" | python3 -c "import json,sys; print(json.load(sys.stdin)['subject'][0]['name'])")"
+# Write a placeholder blob whose sha256 == the declared input_hash. This makes
+# the DSSE envelope's subject coherent with the predicate.
+# (Cosign re-hashes the blob; we trust the gate's input_hash to be the canonical
+# subject. For v0.x we accept this round-trip-by-construction.)
+BLOB_FILE="$TMP/$ARTIFACT_NAME.blob"
+# A real subject artifact would be the file the gate evaluated; for the envelope
+# we use the in-band predicate as the blob. Verification only needs the DSSE
+# wrap + the predicate, not the original artifact bytes.
+cp "$STATEMENT_FILE" "$BLOB_FILE"
+if ! cosign "${COSIGN_ARGS[@]}" "$BLOB_FILE" >&2; then
+  echo "emit-evidence: cosign signing failed" >&2
+  exit 3
+fi
+emit "$(cat "$ENVELOPE_FILE")"
+echo "emit-evidence: signed envelope emitted${REKOR_URL:+ (Rekor: $REKOR_URL)}" >&2
+exit 0

package/scripts/escape-scan.sh CHANGED Viewed

@@ -19,14 +19,32 @@
 #   bash escape-scan.sh path/to/change.patch
 #   bash escape-scan.sh --staged          # git diff --cached
 #   bash escape-scan.sh --range HEAD~1..HEAD
+#   bash escape-scan.sh --staged --json   # machine-readable JSON to stdout
+#
+# JSON mode:
+#   stdout = single JSON object suitable for piping to `audit-harness emit-evidence`
+#   stderr = unchanged human-readable [SEVERITY] notes (preserves backward-compat)
+#   exit codes unchanged
 set -euo pipefail
 DIFF_SRC=""
 VERIFY_HASH=1
+JSON_OUT=0
 ROOT="${ROOT:-$(pwd)}"
 HASH_SCRIPT="$(dirname "$0")/harness-hash.sh"
+# First-pass arg parse: peel --json off the tail (any position) so primary
+# arg parsing below is unchanged.
+_filtered_args=()
+for arg in "$@"; do
+  case "$arg" in
+    --json) JSON_OUT=1 ;;
+    *) _filtered_args+=("$arg") ;;
+  esac
+done
+set -- "${_filtered_args[@]+"${_filtered_args[@]}"}"
 if [[ "$#" -eq 0 ]]; then
   echo "escape-scan: pass a diff source (- for stdin, --staged, --range, or a patch file)" >&2
   exit 2
@@ -34,11 +52,20 @@ fi
 case "$1" in
   -) DIFF_SRC="/dev/stdin" ;;
-  --staged) DIFF_SRC=$(mktemp); git diff --cached > "$DIFF_SRC" ;;
-  --range) DIFF_SRC=$(mktemp); git diff "$2" > "$DIFF_SRC"; shift ;;
+  --staged)
+    DIFF_SRC=$(mktemp)
+    trap 'rm -f "$DIFF_SRC"' EXIT
+    git diff --cached > "$DIFF_SRC"
+    ;;
+  --range)
+    DIFF_SRC=$(mktemp)
+    trap 'rm -f "$DIFF_SRC"' EXIT
+    git diff "$2" > "$DIFF_SRC"
+    shift
+    ;;
   --no-hash) VERIFY_HASH=0; shift; DIFF_SRC="$1" ;;
   --help|-h)
-    sed -n '2,22p' "$0"; exit 0 ;;
+    sed -n '2,26p' "$0"; exit 0 ;;
   *) DIFF_SRC="$1" ;;
 esac
@@ -159,7 +186,34 @@ if echo "$added_lines" | grep -Eq 'toBeDefined\(\)|\.is not None'; then
 fi
 # --- Summary & exit ---
-echo "escape-scan: REFUSE=$REFUSE CHALLENGE=$CHALLENGE FLAG=$FLAG"
+if [[ "$JSON_OUT" -eq 1 ]]; then
+  # Result mapping (per intent-eval-lab evidence-bundle SPEC § 5 R6):
+  #   any REFUSE → FAIL
+  #   any CHALLENGE (no REFUSE) → FAIL  (exit 1 = blocking, requires human)
+  #   only FLAG → ADVISORY (exit 0 — informational)
+  #   none → PASS
+  result="PASS"
+  if [[ "$REFUSE" -gt 0 || "$CHALLENGE" -gt 0 ]]; then
+    result="FAIL"
+  elif [[ "$FLAG" -gt 0 ]]; then
+    result="ADVISORY"
+  fi
+  input_hash=$(sha256sum "$DIFF_SRC" | awk '{print "sha256:"$1}')
+  policy_hash="sha256:0000000000000000000000000000000000000000000000000000000000000000"
+  if [[ -f "$TESTING_MD" ]]; then
+    policy_hash=$(sha256sum "$TESTING_MD" | awk '{print "sha256:"$1}')
+  fi
+  printf '{"gate_id":"audit-harness:%s:escape-scan","result":"%s","input_hash":"%s","policy_hash":"%s","metadata":{"refuse":%d,"challenge":%d,"flag":%d,"coverage_line_floor":%d,"coverage_branch_floor":%d,"mutation_floor":%d}' \
+    "${AUDIT_HARNESS_SIDE:-ci}" "$result" "$input_hash" "$policy_hash" "$REFUSE" "$CHALLENGE" "$FLAG" \
+    "$COVERAGE_LINE_FLOOR" "$COVERAGE_BRANCH_FLOOR" "$MUTATION_FLOOR"
+  if [[ "$result" == "ADVISORY" ]]; then
+    printf ',"advisory_severity":"info"'
+  fi
+  printf '}\n'
+  echo "escape-scan: REFUSE=$REFUSE CHALLENGE=$CHALLENGE FLAG=$FLAG" >&2
+else
+  echo "escape-scan: REFUSE=$REFUSE CHALLENGE=$CHALLENGE FLAG=$FLAG"
+fi
 if [[ "$REFUSE" -gt 0 ]]; then
   echo "escape-scan: pipeline halted (REFUSE)" >&2
   exit 2

package/scripts/fp-rate.py ADDED Viewed

@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+"""
+audit-harness fp-rate — measure a gate's false-positive / false-negative rate over
+a labeled corpus.
+A new gate ships `enforcement: advisory`. It earns promotion to `blocking` only
+once its measured false-positive rate (clean inputs it wrongly flags) sits below a
+stated bar on a labeled corpus. This harness produces that measurement — the
+evidence an engineer cites when they pin `enforcement: blocking` in a repo's
+`tests/TESTING.md` (PP-PLAN-040 Phase 0, bead c2e; rule: docs/gate-promotion.md).
+Labeled corpus layout (default `tests/fixtures/conform`):
+    <corpus>/valid/<fixture>/...      → every gate that fires here SHOULD be clean
+    <corpus>/malformed/<fixture>/...  → every gate that fires here SHOULD flag
+Per row the gate emits on a fixture, the verdict is bucketed:
+    clean  = PASS | NOT_APPLICABLE
+    flag   = FAIL | ADVISORY(advisory_severity=error)
+    skip   = ADVISORY indeterminate (tool/schema absent) — unmeasurable, excluded
+    false positive (FP) = a `valid` fixture the gate flagged
+    false negative (FN) = a `malformed` fixture the gate left clean
+Stdlib only. Read-only. Default exit 0 (report); `--max-fp-rate X` exits 1 if any
+gate exceeds the bar (use in CI when promoting a gate).
+"""
+import argparse
+import json
+import os
+import subprocess
+import sys
+HERE = os.path.dirname(os.path.abspath(__file__))
+CONFORM = os.path.join(HERE, "conform.py")
+DEFAULT_CORPUS = os.path.join(HERE, "..", "tests", "fixtures", "conform")
+def verdict_bucket(row):
+    r = row.get("result")
+    if r in ("PASS", "NOT_APPLICABLE"):
+        return "clean"
+    if r == "FAIL":
+        return "flag"
+    if r == "ADVISORY":
+        if row.get("metadata", {}).get("indeterminate"):
+            return "skip"
+        if row.get("advisory_severity") == "error":
+            return "flag"
+        return "skip"
+    return "skip"
+def run_conform(fixture):
+    out = subprocess.run([sys.executable, CONFORM, fixture], capture_output=True, text=True)
+    try:
+        return json.loads(out.stdout)
+    except Exception:
+        return []
+def measure(corpus):
+    # per gate_id: {valid_total, fp, malformed_total, fn, skipped}
+    stats = {}
+    def bump(gid, key):
+        s = stats.setdefault(gid, {"valid": 0, "fp": 0, "malformed": 0, "fn": 0, "skipped": 0})
+        s[key] += 1
+    for label in ("valid", "malformed"):
+        base = os.path.join(corpus, label)
+        if not os.path.isdir(base):
+            continue
+        for name in sorted(os.listdir(base)):
+            fixture = os.path.join(base, name)
+            if not os.path.isdir(fixture):
+                continue
+            for row in run_conform(fixture):
+                gid = row["gate_id"]
+                bucket = verdict_bucket(row)
+                if bucket == "skip":
+                    bump(gid, "skipped")
+                    continue
+                if label == "valid":
+                    bump(gid, "valid")
+                    if bucket == "flag":
+                        bump(gid, "fp")
+                else:
+                    bump(gid, "malformed")
+                    if bucket == "clean":
+                        bump(gid, "fn")
+    return stats
+def rate(n, d):
+    return (n / d) if d else 0.0
+def main():
+    ap = argparse.ArgumentParser(description="Measure gate FP/FN rate over a labeled corpus")
+    ap.add_argument("--corpus", default=DEFAULT_CORPUS, help="labeled corpus root (valid/ + malformed/)")
+    ap.add_argument("--json", action="store_true", help="emit JSON report to stdout")
+    ap.add_argument("--max-fp-rate", type=float, default=None,
+                    help="exit 1 if any measured gate's FP-rate exceeds this (promotion gate)")
+    args = ap.parse_args()
+    corpus = os.path.abspath(args.corpus)
+    stats = measure(corpus)
+    report = {}
+    for gid, s in sorted(stats.items()):
+        report[gid] = {
+            "valid_samples": s["valid"],
+            "false_positives": s["fp"],
+            "fp_rate": round(rate(s["fp"], s["valid"]), 4),
+            "malformed_samples": s["malformed"],
+            "false_negatives": s["fn"],
+            "fn_rate": round(rate(s["fn"], s["malformed"]), 4),
+            "skipped_indeterminate": s["skipped"],
+        }
+    if args.json:
+        print(json.dumps({"corpus": os.path.relpath(corpus, os.path.join(HERE, "..")),
+                          "gates": report}, indent=2))
+    else:
+        print(f"FP/FN rate over corpus: {os.path.relpath(corpus, os.path.join(HERE, '..'))}")
+        print(f"{'gate_id':<42} {'valid':>5} {'FP':>3} {'FP%':>6} {'malf':>4} {'FN':>3} {'FN%':>6}")
+        for gid, r in report.items():
+            print(f"{gid:<42} {r['valid_samples']:>5} {r['false_positives']:>3} "
+                  f"{r['fp_rate']*100:>5.1f}% {r['malformed_samples']:>4} "
+                  f"{r['false_negatives']:>3} {r['fn_rate']*100:>5.1f}%")
+        if not report:
+            print("  (no measurable gate verdicts in corpus)")
+    if args.max_fp_rate is not None:
+        over = {g: r["fp_rate"] for g, r in report.items() if r["fp_rate"] > args.max_fp_rate}
+        if over:
+            sys.stderr.write(f"\nfp-rate: {len(over)} gate(s) exceed --max-fp-rate={args.max_fp_rate}:\n")
+            for g, fr in over.items():
+                sys.stderr.write(f"  {g}: {fr:.4f}\n")
+            sys.exit(1)
+    sys.exit(0)
+if __name__ == "__main__":
+    main()