npm - @intentsolutions/audit-harness - Versions diffs - 1.1.7 → 1.2.0 - Mend

@intentsolutions/audit-harness 1.1.7 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +65 -0
package/bin/audit-harness.js +17 -6
package/docs/cred-gate.md +131 -0
package/package.json +8 -1
package/schemas/currency/pins.v1.json +164 -22
package/scripts/arch-check.sh +20 -2
package/scripts/bias-count.sh +18 -1
package/scripts/caa-check.sh +143 -0
package/scripts/check-wrapper-sync.sh +120 -0
package/scripts/crap-score.py +57 -6
package/scripts/cred-gate.sh +238 -0
package/scripts/currency.py +70 -25
package/scripts/dnssec-check.sh +158 -0
package/scripts/emit-evidence.sh +186 -14
package/scripts/escape-scan.sh +28 -3
package/scripts/gherkin-lint.sh +5 -0
package/scripts/harness-hash.sh +5 -0
package/scripts/kernel-shadow-check.sh +132 -0

package/scripts/currency.py CHANGED Viewed

@@ -1,18 +1,26 @@
 #!/usr/bin/env python3
 """
-audit-harness currency — advisory upstream-currency report (PP-PLAN-040 Phase 5 / E7).
+audit-harness currency — advisory poll-freshness report (PP-PLAN-040 Phase 5 / E7).
 Currency depends on upstream state, which is non-deterministic and network-bound, so
 it is deliberately the WEAKEST kind of check: an advisory REPORT with **no exit-code
 authority, no auto-fix, and no live-fetch**. It reads the per-upstream-identity pin
 relation (schemas/currency/pins.v1.json) — where each upstream carries its own
-pinned_version + the date it was last verified (checked_at) + a staleness window —
-and reports which pins are themselves STALE (checked_at older than the window), i.e.
-which pins a human should re-verify against upstream.
+pinned_version + the date it was last verified (checked_at) + an advisory
+poll-freshness SLA — and reports which pins are themselves PAST their SLA
+(checked_at older than the SLA window), i.e. which pins a human should re-verify
+against upstream. The SLA gates NOTHING except human attention.
 This models the pin's OWN staleness as detectable, rather than one opaque
-".schema-version" scalar. The /sync-testing-harness skill consumes this report to
-open advisory bump PRs; the report never reddens a build (always exit 0).
+".schema-version" scalar. Pins are grouped by class (spec-page / schema-file /
+release-feed / internal-contract); SLA resolution order is: explicit per-pin
+staleness_window_days > the pin's class SLA > default_staleness_window_days.
+The /sync-testing-harness skill consumes this report to open advisory bump PRs;
+the report never reddens a build (always exit 0).
+Follow-up (deliberately NOT wired here, [9k5h.10]): the intent-eval-lab
+detector-health surface will consume the --json output; that cross-repo
+integration is tracked separately.
 Stdlib only. No network. No filesystem mutation.
 """
@@ -25,6 +33,8 @@ from datetime import datetime, timezone
 HERE = os.path.dirname(os.path.abspath(__file__))
 DEFAULT_PINS = os.path.join(HERE, "..", "schemas", "currency", "pins.v1.json")
+UNCLASSED = "(unclassed)"
 def parse_date(s):
     try:
@@ -33,12 +43,23 @@ def parse_date(s):
         return None
+def resolve_window(pin, classes, default_window):
+    """SLA resolution: explicit per-pin window > class SLA > file default."""
+    if pin.get("staleness_window_days") is not None:
+        return pin["staleness_window_days"]
+    cls = classes.get(pin.get("class") or "", {})
+    if cls.get("staleness_window_days") is not None:
+        return cls["staleness_window_days"]
+    return default_window
 def build_report(pins_doc, today):
     default_window = pins_doc.get("default_staleness_window_days", 90)
+    classes = pins_doc.get("staleness_classes", {})
     out = []
     for pin in pins_doc.get("pins", []):
         checked = parse_date(pin.get("checked_at", ""))
-        window = pin.get("staleness_window_days", default_window)
+        window = resolve_window(pin, classes, default_window)
         if checked is None:
             age, status = None, "unknown-checked_at"
         else:
@@ -46,6 +67,7 @@ def build_report(pins_doc, today):
             status = "stale" if age > window else "current"
         out.append({
             "identity": pin.get("identity"),
+            "class": pin.get("class") or UNCLASSED,
             "pinned_version": pin.get("pinned_version"),
             "checked_at": pin.get("checked_at"),
             "age_days": age,
@@ -57,8 +79,18 @@ def build_report(pins_doc, today):
     return out
+def group_by_class(report):
+    """Ordered {class: [rows]} grouping, classes sorted, (unclassed) last."""
+    grouped = {}
+    for r in report:
+        grouped.setdefault(r["class"], []).append(r)
+    ordered = sorted(grouped, key=lambda c: (c == UNCLASSED, c))
+    return {c: grouped[c] for c in ordered}
 def main():
-    ap = argparse.ArgumentParser(description="Advisory upstream-currency report (no exit authority)")
+    ap = argparse.ArgumentParser(
+        description="Advisory poll-freshness report (no exit authority — the SLA gates nothing but human attention)")
     ap.add_argument("--pins", default=DEFAULT_PINS, help="path to the pin relation datum")
     ap.add_argument("--json", action="store_true", help="emit JSON report")
     ap.add_argument("--today", default=None, help="override 'today' (YYYY-MM-DD) for reproducible reports/tests")
@@ -74,39 +106,52 @@ def main():
     today = parse_date(args.today) if args.today else datetime.now(timezone.utc).date()
     report = build_report(pins_doc, today)
+    grouped = group_by_class(report)
     stale = [r for r in report if r["status"] == "stale"]
     unknown = [r for r in report if r["status"] == "unknown-checked_at"]
     if args.json:
+        by_class = {}
+        for cls, rows in grouped.items():
+            by_class[cls] = {
+                "total": len(rows),
+                "stale": sum(1 for r in rows if r["status"] == "stale"),
+                "current": sum(1 for r in rows if r["status"] == "current"),
+                "unknown": sum(1 for r in rows if r["status"] == "unknown-checked_at"),
+            }
         print(json.dumps({
             "report": "currency/v1",
             "generated_for": today.strftime("%Y-%m-%d"),
             "pins": report,
+            "by_class": by_class,
             "stale_count": len(stale),
             "advisory": True,
         }, indent=2))
     else:
-        print(f"Upstream currency (advisory) — as of {today.strftime('%Y-%m-%d')}")
-        print(f"{'identity':<24} {'pinned':<14} {'checked_at':<12} {'age':>5} {'win':>4}  status")
-        for r in report:
-            age = "—" if r["age_days"] is None else str(r["age_days"]) + "d"
-            if r["status"] == "stale":
-                mark = "⚠ STALE"
-            elif r["status"] == "current":
-                mark = "current"
-            else:
-                mark = "? " + r["status"]
-            print(f"{(r['identity'] or ''):<24} {(r['pinned_version'] or ''):<14} "
-                  f"{(r['checked_at'] or ''):<12} {age:>5} {r['window_days']:>4}  {mark}")
+        print(f"Upstream currency — advisory poll-freshness SLA report — as of {today.strftime('%Y-%m-%d')}")
+        print(f"{'identity':<26} {'pinned':<18} {'checked_at':<12} {'age':>5} {'sla':>4}  status")
+        for cls, rows in grouped.items():
+            print(f"[{cls}] — {len(rows)} pin(s)")
+            for r in rows:
+                age = "—" if r["age_days"] is None else str(r["age_days"]) + "d"
+                if r["status"] == "stale":
+                    mark = "⚠ PAST SLA"
+                elif r["status"] == "current":
+                    mark = "current"
+                else:
+                    mark = "? " + r["status"]
+                print(f"  {(r['identity'] or ''):<24} {(r['pinned_version'] or ''):<18} "
+                      f"{(r['checked_at'] or ''):<12} {age:>5} {r['window_days']:>4}  {mark}")
         print()
         if stale:
-            print(f"{len(stale)} pin(s) past their staleness window — re-verify against upstream, "
-                  f"then bump pinned_version + checked_at in schemas/currency/pins.v1.json:")
+            print(f"{len(stale)} pin(s) past their poll-freshness SLA — the SLA gates nothing but human "
+                  f"attention: re-verify against upstream, then bump pinned_version + checked_at in "
+                  f"schemas/currency/pins.v1.json:")
             for r in stale:
-                print(f"  - {r['identity']}: last checked {r['checked_at']} "
-                      f"({r['age_days']}d ago > {r['window_days']}d)")
+                print(f"  - {r['identity']} [{r['class']}]: last checked {r['checked_at']} "
+                      f"({r['age_days']}d ago > {r['window_days']}d SLA)")
         else:
-            print("All pins within their staleness window.")
+            print("All pins within their poll-freshness SLA.")
         if unknown:
             print(f"{len(unknown)} pin(s) have an unparseable checked_at — fix the date format (YYYY-MM-DD).")

package/scripts/dnssec-check.sh ADDED Viewed

@@ -0,0 +1,158 @@
+#!/usr/bin/env bash
+# dnssec-check.sh — verify a namespace is DNSSEC-signed before a production
+# signed attestation is anchored against it.
+#
+# WHY THIS EXISTS (CISO binding, DR-010 Q5 / ISEDC v1 Q1 2026-05-10):
+#   Predicate URIs for the Evidence Bundle live ONLY at evals.intentsolutions.io.
+#   Pushing a signed in-toto Statement to a PUBLIC transparency log (Rekor)
+#   against an unsigned namespace is irreversible and lets an attacker who can
+#   spoof the zone mint look-alike attestations. DNSSEC must be verified on the
+#   namespace BEFORE the first production attestation. This script is that gate.
+#   It anchors NOTHING — it is a read-only verification that can only make
+#   signing MORE conservative (fail-closed).
+#
+# WHY IT QUERIES AN EXPLICIT RESOLVER (the bug this version fixes):
+#   Querying the LOCAL STUB RESOLVER (plain `dig`, no `@server`) FALSE-NEGATIVES
+#   on hosts whose stub resolver strips DNSSEC records or never sets the AD bit
+#   (systemd-resolved, most CI runners, dev boxes behind a caching forwarder).
+#   On such a host a correctly DNSSEC-signed zone looks unsigned. For a
+#   fail-closed gate that is the WRONG failure mode for usability AND it can
+#   block a legitimate production signing while a genuinely-unsigned zone would
+#   also block — i.e. it loses all discriminating power. The fix is to query a
+#   TRUSTED VALIDATING resolver and require the resolver to assert validation
+#   (`delv` full-chain "fully validated", or `dig`'s AD bit + an RRSIG). The
+#   gate stays fail-closed: PASS only on positive confirmation from a trusted
+#   resolver; UNKNOWN / unreachable / no-tool => non-zero.
+#
+# Usage:
+#   bash scripts/dnssec-check.sh [DOMAIN]
+#   DNSSEC_CHECK_DOMAIN=evals.intentsolutions.io bash scripts/dnssec-check.sh
+#
+# Resolution order for the domain:
+#   1. $1 (positional)
+#   2. $DNSSEC_CHECK_DOMAIN
+#   3. default: evals.intentsolutions.io
+#
+# Behavior:
+#   - Queries each resolver in $DNSSEC_CHECK_RESOLVERS (default 1.1.1.1 8.8.8.8),
+#     in order, and PASSES on the FIRST that confirms DNSSEC validation.
+#   - For each resolver: prefers `delv @<resolver>` (full DNSSEC chain validation
+#     against the IANA trust anchor; "fully validated" => PASS). Falls back to
+#     `dig @<resolver> +dnssec` and requires BOTH the AD (Authenticated Data)
+#     header flag AND the presence of an RRSIG record (a non-validating answer,
+#     i.e. RRSIG but no AD, does NOT pass — a malicious/forwarding resolver that
+#     returns records without validating the chain cannot trivially pass).
+#   - If NO resolver confirms validation (every resolver says unsigned, or is
+#     unreachable), exits 1 (fail-closed).
+#   - If NEITHER delv NOR dig is installed, emits a typed UNKNOWN/UNREACHABLE
+#     result and exits 2 (fail-closed for production).
+#
+# Exit codes:
+#   0 — DNSSEC verified (a trusted resolver fully validated, or set AD + RRSIG)
+#   1 — DNSSEC NOT verified (no trusted resolver confirmed; zone unsigned /
+#       validation failed / all resolvers unreachable)
+#   2 — UNKNOWN/UNREACHABLE (no resolver tool installed at all)
+#
+# Override knobs:
+#   DNSSEC_CHECK_RESOLVERS — space-separated list of validating/public resolvers
+#                            to query in order (default: "1.1.1.1 8.8.8.8").
+#   DNSSEC_CHECK_DELV_CMD   — command used in place of `delv` (default: delv)
+#   DNSSEC_CHECK_DIG_CMD    — command used in place of `dig`  (default: dig)
+set -euo pipefail
+DOMAIN="${1:-${DNSSEC_CHECK_DOMAIN:-evals.intentsolutions.io}}"
+DELV_CMD="${DNSSEC_CHECK_DELV_CMD:-delv}"
+DIG_CMD="${DNSSEC_CHECK_DIG_CMD:-dig}"
+# Trusted validating/public resolvers, queried in order. Cloudflare (1.1.1.1)
+# and Google (8.8.8.8) both perform DNSSEC validation and set the AD bit.
+RESOLVERS="${DNSSEC_CHECK_RESOLVERS:-1.1.1.1 8.8.8.8}"
+log() { printf 'dnssec-check: %s\n' "$1" >&2; }
+if [[ "$DOMAIN" == "-h" || "$DOMAIN" == "--help" ]]; then
+  sed -n '2,60p' "$0"
+  exit 0
+fi
+have() { command -v "$1" >/dev/null 2>&1; }
+have_delv=0
+have_dig=0
+have "$DELV_CMD" && have_delv=1
+have "$DIG_CMD" && have_dig=1
+# --- No resolver tool at all -> typed UNKNOWN, fail-closed (exit 2) ---
+if [[ "$have_delv" -eq 0 && "$have_dig" -eq 0 ]]; then
+  log "UNKNOWN/UNREACHABLE — neither '$DELV_CMD' nor '$DIG_CMD' is installed"
+  log "  cannot verify DNSSEC for '$DOMAIN'; failing closed (production must not sign on UNKNOWN)"
+  log "  remediation: install bind9-dnsutils (provides dig + delv) on the signing host"
+  exit 2
+fi
+# delv_validates RESOLVER -> 0 if delv reports the chain fully validated.
+delv_validates() {
+  local resolver="$1" out
+  # delv prints "; fully validated" on each validated RRset when the chain of
+  # trust holds; "; unsigned answer" / "resolution failed" otherwise. delv
+  # validates LOCALLY against the IANA trust anchor regardless of which resolver
+  # serves the records, so a non-validating @resolver cannot fake a pass.
+  out="$("$DELV_CMD" "$DOMAIN" "@$resolver" 2>&1 || true)"
+  printf '%s\n' "$out" | grep -q "fully validated"
+}
+# dig_validates RESOLVER -> 0 if the resolver set the AD bit AND an RRSIG is
+# present. BOTH are required: AD alone could be spoofed by a lying resolver
+# without signatures, RRSIG alone proves the zone publishes signatures but not
+# that the chain validated. Requiring AD means a non-validating resolver's
+# answer (RRSIG copied through, AD never set) does NOT pass.
+dig_validates() {
+  local resolver="$1" out ad_flag=0 rrsig=0
+  out="$("$DIG_CMD" "@$resolver" +dnssec +multiline "$DOMAIN" 2>&1 || true)"
+  if printf '%s\n' "$out" | grep -qE '^;; flags:[^;]*\bad\b'; then
+    ad_flag=1
+  fi
+  if printf '%s\n' "$out" | grep -qE '[[:space:]]RRSIG[[:space:]]'; then
+    rrsig=1
+  fi
+  [[ "$ad_flag" -eq 1 && "$rrsig" -eq 1 ]]
+}
+saw_unsigned=0  # at least one resolver answered, and said NOT validated
+for resolver in $RESOLVERS; do
+  # --- Path 1: delv @resolver (authoritative DNSSEC chain validation) ---
+  if [[ "$have_delv" -eq 1 ]]; then
+    log "validating DNSSEC for '$DOMAIN' via $DELV_CMD @$resolver"
+    if delv_validates "$resolver"; then
+      log "VERIFIED — '$DOMAIN' is DNSSEC-signed (delv @$resolver: fully validated)"
+      exit 0
+    fi
+    saw_unsigned=1
+    log "delv @$resolver did not confirm validation; trying dig @$resolver"
+  fi
+  # --- Path 2: dig @resolver +dnssec (AD bit + RRSIG presence) ---
+  if [[ "$have_dig" -eq 1 ]]; then
+    log "checking DNSSEC for '$DOMAIN' via $DIG_CMD @$resolver +dnssec"
+    if dig_validates "$resolver"; then
+      log "VERIFIED — '$DOMAIN' is DNSSEC-signed (dig @$resolver: AD bit set + RRSIG present)"
+      exit 0
+    fi
+    saw_unsigned=1
+    log "dig @$resolver did not confirm validation (no AD+RRSIG) for '$DOMAIN'"
+  fi
+done
+# No resolver confirmed validation. Distinguish "answered but unsigned" from
+# "nothing reachable" only for the operator message — both fail-closed (exit 1).
+if [[ "$saw_unsigned" -eq 1 ]]; then
+  log "NOT VERIFIED — no trusted resolver confirmed DNSSEC for '$DOMAIN' (zone appears unsigned / chain not validated)"
+  log "  resolvers tried: $RESOLVERS"
+  log "  remediation: sign the zone (DNSSEC) at the registrar/DNS host, then re-run"
+else
+  log "NOT VERIFIED — could not reach any resolver to validate DNSSEC for '$DOMAIN'"
+  log "  resolvers tried: $RESOLVERS"
+  log "  failing closed (production must not sign without positive confirmation)"
+fi
+exit 1

package/scripts/emit-evidence.sh CHANGED Viewed

@@ -35,15 +35,28 @@
 #   1 — input JSON malformed or missing required fields
 #   2 — signing requested but cosign not available
 #   3 — Rekor push requested but failed
+#   4 — production DNSSEC/CAA pre-flight FAILED (fail-closed; nothing was signed)
 #
-# CISO gate (per ISEDC v1 Q1, 2026-05-10): pushing to a public transparency log
-# (Rekor) against the predicate URI https://evals.intentsolutions.io/gate-result/v1
-# is BLOCKED until DNSSEC + CAA records are verified on the namespace. The script
-# does NOT enforce this — that is operator discipline. See bead `iel-4zr` in
-# intent-eval-platform/intent-eval-lab/.beads/.
+# CISO gate (per DR-010 Q5 / ISEDC v1 Q1, 2026-05-10): pushing to a PUBLIC
+# transparency log (Rekor) against the predicate URI
+# https://evals.intentsolutions.io/gate-result/v1 is BLOCKED until DNSSEC + CAA
+# records are verified on the namespace. This script ENFORCES that: when a
+# production Rekor push is requested (--rekor-url / non-empty REKOR_URL), it runs
+# scripts/dnssec-check.sh then scripts/caa-check.sh against the predicate
+# namespace and REFUSES to sign (exit 4) if either fails. The gate is read-only —
+# it anchors nothing and can only make signing MORE conservative.
+#
+# Opt-out (NON-PRODUCTION / staging ONLY): EVIDENCE_SKIP_DNS_PREFLIGHT=1 skips the
+# pre-flight. It is honored ONLY when no production Rekor push is requested; a
+# real Rekor push can NEVER be silently skipped.
 set -euo pipefail
+# Bash version floor: these gates rely on bash 4+ features. Refuse early with a
+# clear message on bash 3.x (e.g. macOS system bash) instead of failing later
+# with a cryptic syntax error (jcgw).
+[ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
 INPUT="-"
 OUTPUT=""
 SIGN=0
@@ -54,6 +67,9 @@ RUNNER_VERSION_OVERRIDE=""
 COMMIT_SHA_OVERRIDE=""
 PREDICATE_URI="https://evals.intentsolutions.io/gate-result/v1"
 STATEMENT_TYPE="https://in-toto.io/Statement/v1"
+# The namespace whose DNSSEC + CAA posture gates production attestations. Derived
+# from the predicate URI host; overridable for testing via EVIDENCE_PREDICATE_DOMAIN.
+PREDICATE_DOMAIN="${EVIDENCE_PREDICATE_DOMAIN:-evals.intentsolutions.io}"
 while [[ $# -gt 0 ]]; do
   case "$1" in
@@ -175,16 +191,138 @@ if [[ -z "$STATEMENT" ]]; then
   exit 1
 fi
-# --- OTel event (best-effort no-op if collector absent) ---
-# Fire agent.rollout.gate.evaluated per intent-eval-lab/000-docs/001-DR-RFC-...md.
-# We emit a single OTLP-shaped JSON line to stderr when AUDIT_HARNESS_OTEL=1
-# OR an OTEL_EXPORTER_OTLP_ENDPOINT is set. Real exporter wiring is consumer-side;
-# we emit a structured signal that any collector can scrape via stderr capture.
+# --- OTel events (best-effort no-op if collector absent) ---
+# The gate-decision event fires per the NORMATIVE runtime event taxonomy
+# intent-eval-lab/000-docs/067-AT-SPEC-runtime-event-taxonomy-2026-06-12.md § 2.2
+# (GOVERNANCE events, `gate.*`):
+#
+#   1. agent.rollout.gate.evaluated — observability signal fired at the
+#      start/observation of a gate evaluation. NON-NORMATIVE: 067-AT-SPEC closes
+#      the `gate.*` category and does NOT define a gate-evaluated event, so this
+#      carries the legacy raw gate identity + result for collectors that already
+#      scrape it. It is NOT a 067-pinned name and a future taxonomy extension may
+#      retire or rename it; nothing should pin to it. The normative signal is (2).
+#   2. gate.decision.emitted (iah-E07b) — fired at the END of the gate
+#      evaluation. This is the NORMATIVE name from 067-AT-SPEC § 2.2: "a
+#      RolloutGate decision row is emitted under gate-result/v1". Payload per
+#      § 2.2: gate.name (string), gate.decision (enum pass|fail|advisory|error),
+#      gate.policy_ref (string). This is the one a ship-gate dashboard alerts on.
+#
+# ATTRIBUTE-SPELLING AUTHORITY (do NOT redefine here): the canonical attribute
+# names are pinned by the kernel at
+# intent-eval-core/schemas/v1/otel-attributes.yaml — OTel-idiomatic dotted
+# lowercase (e.g. gate.decision). We spell every attribute to match that file.
+# 067-AT-SPEC § 2.2 is the EVENT-NAME authority for gate.decision.emitted and its
+# payload schema; the gate.decision enum {pass, fail, advisory, error} is the
+# closed gate-result/v1 verdict enum (Blueprint B § 7.4 / kernel gate-result
+# schema) — NOT the RolloutGateDecision ship/no_ship vocabulary.
+#
+# We emit OTLP-shaped JSON lines to stderr when AUDIT_HARNESS_OTEL=1 OR an
+# OTEL_EXPORTER_OTLP_ENDPOINT is set. Real exporter wiring is consumer-side; we
+# emit a structured signal any collector can scrape via stderr capture. The path
+# is fully best-effort: a collector being absent is the no-op default, and a
+# python failure (||) degrades to an empty line that is simply not printed —
+# the gate's own exit status is never affected by OTel emission (iah-E07c).
 if [[ "${AUDIT_HARNESS_OTEL:-0}" == "1" ]] || [[ -n "${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ]]; then
-  GATE_ID=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('gate_id',''))" 2>/dev/null || echo "")
-  RESULT=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('result',''))" 2>/dev/null || echo "")
-  printf '[OTEL] {"name":"agent.rollout.gate.evaluated","attributes":{"gate.id":"%s","gate.result":"%s","gate.runner":"%s","gate.commit_sha":"%s"},"timestamp":"%s"}\n' \
-    "$GATE_ID" "$RESULT" "$RUNNER" "$COMMIT_SHA" "$TIMESTAMP" >&2
+  # Compose the JSON via python so every attribute value is JSON-escaped.
+  # printf-interpolating gate_id/result/runner into a JSON format string
+  # emitted structurally invalid JSON whenever a value carried a double quote
+  # (e.g. AUDIT_HARNESS_SIDE='ci"injection' flowing into gate_id).
+  OTEL_LINES=$(GATE_JSON="$GATE_JSON" RUNNER="$RUNNER" COMMIT_SHA="$COMMIT_SHA" TIMESTAMP="$TIMESTAMP" \
+    python3 - <<'PY' 2>/dev/null || echo ""
+import json, os
+try:
+    gate = json.loads(os.environ["GATE_JSON"])
+except (json.JSONDecodeError, ValueError):
+    gate = {}
+runner = os.environ["RUNNER"]
+commit_sha = os.environ["COMMIT_SHA"]
+timestamp = os.environ["TIMESTAMP"]
+gate_id = str(gate.get("gate_id", ""))
+# The canonical gate-result/v1 verdict field is gate_decision (lowercase enum,
+# Blueprint B § 7.4); the legacy draft envelope used `result` (UPPERCASE). Read
+# the canonical field first, fall back to the legacy field.
+gate_decision_raw = str(gate.get("gate_decision", gate.get("result", "")))
+# gate.name / gate.policy_ref per 067-AT-SPEC § 2.2 payload schema. The canonical
+# envelope carries gate_name (kebab-case) + policy_ref; fall back to gate_id /
+# policy_hash for legacy draft envelopes that predate Blueprint B § 7.4.
+gate_name = str(gate.get("gate_name", gate_id))
+policy_ref = str(gate.get("policy_ref", gate.get("policy_hash", "")))
+# Map the inbound verdict to the closed gate.decision enum {pass, fail,
+# advisory, error} (gate-result/v1 / kernel gate-result schema). This is the
+# 067-AT-SPEC § 2.2 enum — NOT the RolloutGateDecision ship/no_ship vocabulary.
+# Canonical lowercase values pass straight through; legacy UPPERCASE results map
+# down; an unrecognized/missing verdict is `error` (the gate could not affirm a
+# decision — an error condition, not a clean `fail`).
+_DECISION_MAP = {
+    "pass": "pass",
+    "fail": "fail",
+    "advisory": "advisory",
+    "error": "error",
+}
+decision = _DECISION_MAP.get(gate_decision_raw.strip().lower(), "error")
+# An advisory_severity hint on a non-fail/non-error row signals an advisory row
+# even when the legacy `result` field only said PASS.
+if decision in ("pass",) and gate.get("advisory_severity"):
+    decision = "advisory"
+reasons = []
+if decision == "pass":
+    reasons.append(f"gate '{gate_id}' decision: pass")
+else:
+    reasons.append(
+        f"gate '{gate_id}' decision: {decision} "
+        f"(verdict={gate_decision_raw or 'NO_VERDICT'})"
+    )
+fm = gate.get("failure_mode")
+if fm:
+    reasons.append(f"failure_mode: {fm}")
+# Event 1: agent.rollout.gate.evaluated (NON-NORMATIVE observability signal;
+# unchanged shape — not a 067-AT-SPEC-pinned name, see header note).
+evaluated = {
+    "name": "agent.rollout.gate.evaluated",
+    "attributes": {
+        "gate.id": gate_id,
+        "gate.result": gate_decision_raw,
+        "gate.runner": runner,
+        "gate.commit_sha": commit_sha,
+    },
+    "timestamp": timestamp,
+}
+# Event 2: gate.decision.emitted (iah-E07b) — NORMATIVE per 067-AT-SPEC § 2.2.
+# Payload: gate.name (string) + gate.decision (enum pass|fail|advisory|error) +
+# gate.policy_ref (string). The reasons / runner / commit_sha are additive
+# diagnostic attributes carried for dashboards; they do not contradict the
+# § 2.2 required payload.
+decision_event = {
+    "name": "gate.decision.emitted",
+    "attributes": {
+        "gate.name": gate_name,
+        "gate.decision": decision,
+        "gate.policy_ref": policy_ref,
+        "gate.id": gate_id,
+        "gate.reasons": reasons,
+        "gate.runner": runner,
+        "gate.commit_sha": commit_sha,
+    },
+    "timestamp": timestamp,
+}
+for ev in (evaluated, decision_event):
+    print(json.dumps(ev, separators=(",", ":")))
+PY
+)
+  # Print each emitted OTLP line with the [OTEL] marker the collector scrapes.
+  if [[ -n "$OTEL_LINES" ]]; then
+    while IFS= read -r _otel_line; do
+      [[ -n "$_otel_line" ]] && printf '[OTEL] %s\n' "$_otel_line" >&2
+    done <<< "$OTEL_LINES"
+  fi
 fi
 # --- Sign + emit ---
@@ -212,6 +350,40 @@ if ! command -v cosign >/dev/null 2>&1; then
   exit 2
 fi
+# --- Production DNSSEC + CAA pre-flight gate (CISO binding DR-010 Q5) ----------
+# A "production" signing event is one that pushes a signed Statement to a PUBLIC
+# transparency log (Rekor) — i.e. REKOR_URL is non-empty. Before that irreversible
+# anchor, the predicate namespace MUST be DNSSEC-signed AND CAA-pinned. We run the
+# two read-only checks; if EITHER fails we REFUSE to sign and exit 4.
+#
+# The opt-out EVIDENCE_SKIP_DNS_PREFLIGHT=1 is honored ONLY for non-production
+# (no Rekor push). A real Rekor push can never be silently skipped.
+if [[ -n "$REKOR_URL" ]]; then
+  PREFLIGHT_DIR="$(cd "$(dirname "$0")" && pwd)"
+  if [[ "${EVIDENCE_SKIP_DNS_PREFLIGHT:-0}" == "1" ]]; then
+    echo "emit-evidence: IGNORING EVIDENCE_SKIP_DNS_PREFLIGHT=1 — a Rekor push (REKOR_URL=$REKOR_URL) is a production attestation and CANNOT skip the DNSSEC/CAA pre-flight." >&2
+  fi
+  echo "emit-evidence: production Rekor push requested — running DNSSEC + CAA pre-flight on '$PREDICATE_DOMAIN'" >&2
+  if ! bash "$PREFLIGHT_DIR/dnssec-check.sh" "$PREDICATE_DOMAIN" >&2; then
+    echo "emit-evidence: REFUSING TO SIGN — DNSSEC pre-flight FAILED for '$PREDICATE_DOMAIN'." >&2
+    echo "emit-evidence: remediation: pin DNSSEC + CAA on $PREDICATE_DOMAIN before any production attestation." >&2
+    echo "emit-evidence:   see intent-eval-platform/intent-eval-lab/000-docs (DR-010 Q5 CISO binding) + the iah-E06 runbook." >&2
+    exit 4
+  fi
+  if ! bash "$PREFLIGHT_DIR/caa-check.sh" "$PREDICATE_DOMAIN" >&2; then
+    echo "emit-evidence: REFUSING TO SIGN — CAA pre-flight FAILED for '$PREDICATE_DOMAIN'." >&2
+    echo "emit-evidence: remediation: pin DNSSEC + CAA on $PREDICATE_DOMAIN before any production attestation." >&2
+    echo "emit-evidence:   set EXPECTED_CAA_ISSUER to the published CA, then publish a CAA record pinning it." >&2
+    exit 4
+  fi
+  echo "emit-evidence: DNSSEC + CAA pre-flight PASSED for '$PREDICATE_DOMAIN' — proceeding to sign." >&2
+elif [[ "${EVIDENCE_SKIP_DNS_PREFLIGHT:-0}" == "1" ]]; then
+  # Non-production sign (no Rekor push) with the explicit opt-out set: keep
+  # existing staging flows green without running the network-bound checks.
+  echo "emit-evidence: non-production sign (no Rekor push); DNSSEC/CAA pre-flight skipped per EVIDENCE_SKIP_DNS_PREFLIGHT=1." >&2
+fi
 # Stage the Statement to a temp file for cosign to consume
 TMP=$(mktemp -d)
 trap 'rm -rf "$TMP"' EXIT

package/scripts/escape-scan.sh CHANGED Viewed

@@ -28,6 +28,23 @@
 set -euo pipefail
+# Bash version floor: these gates rely on bash 4+ features. Refuse early with a
+# clear message on bash 3.x (e.g. macOS system bash) instead of failing later
+# with a cryptic syntax error (jcgw).
+[ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
+# Cross-platform SHA-256: `sha256sum` ships with GNU coreutils (Linux);
+# macOS only has `shasum -a 256`. Both produce identical `<hash>  <file>`
+# output, so downstream awk parsing is unchanged. Mirrors harness-hash.sh.
+if command -v sha256sum >/dev/null 2>&1; then
+  SHA256_CMD=(sha256sum)
+elif command -v shasum >/dev/null 2>&1; then
+  SHA256_CMD=(shasum -a 256)
+else
+  echo "escape-scan: neither sha256sum nor shasum found in PATH" >&2
+  exit 2
+fi
 DIFF_SRC=""
 VERIFY_HASH=1
 JSON_OUT=0
@@ -51,7 +68,15 @@ if [[ "$#" -eq 0 ]]; then
 fi
 case "$1" in
-  -) DIFF_SRC="/dev/stdin" ;;
+  -)
+    # Buffer stdin into a temp file so the diff can be read multiple times.
+    # /dev/stdin is drained by the first grep, which would leave later reads
+    # (notably the input_hash sha256) seeing an empty stream — emitting the
+    # SHA-256 of "" instead of the real diff hash.
+    DIFF_SRC=$(mktemp)
+    trap 'rm -f "$DIFF_SRC"' EXIT
+    cat > "$DIFF_SRC"
+    ;;
   --staged)
     DIFF_SRC=$(mktemp)
     trap 'rm -f "$DIFF_SRC"' EXIT
@@ -198,10 +223,10 @@ if [[ "$JSON_OUT" -eq 1 ]]; then
   elif [[ "$FLAG" -gt 0 ]]; then
     result="ADVISORY"
   fi
-  input_hash=$(sha256sum "$DIFF_SRC" | awk '{print "sha256:"$1}')
+  input_hash=$("${SHA256_CMD[@]}" "$DIFF_SRC" | awk '{print "sha256:"$1}')
   policy_hash="sha256:0000000000000000000000000000000000000000000000000000000000000000"
   if [[ -f "$TESTING_MD" ]]; then
-    policy_hash=$(sha256sum "$TESTING_MD" | awk '{print "sha256:"$1}')
+    policy_hash=$("${SHA256_CMD[@]}" "$TESTING_MD" | awk '{print "sha256:"$1}')
   fi
   printf '{"gate_id":"audit-harness:%s:escape-scan","result":"%s","input_hash":"%s","policy_hash":"%s","metadata":{"refuse":%d,"challenge":%d,"flag":%d,"coverage_line_floor":%d,"coverage_branch_floor":%d,"mutation_floor":%d}' \
     "${AUDIT_HARNESS_SIDE:-ci}" "$result" "$input_hash" "$policy_hash" "$REFUSE" "$CHALLENGE" "$FLAG" \

package/scripts/gherkin-lint.sh CHANGED Viewed

@@ -13,6 +13,11 @@
 set -euo pipefail
+# Bash version floor: these gates rely on bash 4+ features. Refuse early with a
+# clear message on bash 3.x (e.g. macOS system bash) instead of failing later
+# with a cryptic syntax error (jcgw).
+[ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
 PATH_ARG="features/"
 STRICT=0
 JSON_OUT=0

package/scripts/harness-hash.sh CHANGED Viewed

@@ -23,6 +23,11 @@
 set -euo pipefail
+# Bash version floor: these gates rely on bash 4+ features. Refuse early with a
+# clear message on bash 3.x (e.g. macOS system bash) instead of failing later
+# with a cryptic syntax error (jcgw).
+[ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
 # Cross-platform SHA-256: `sha256sum` ships with GNU coreutils (Linux);
 # macOS only has `shasum -a 256`. Both produce identical `<hash>  <file>`
 # output, so downstream awk parsing is unchanged.