npm - @intentsolutions/audit-harness - Versions diffs - 1.1.6 → 1.1.8 - Mend

@intentsolutions/audit-harness 1.1.6 → 1.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/CHANGELOG.md +40 -0
package/bin/audit-harness.js +6 -5
package/package.json +8 -1
package/schemas/currency/pins.v1.json +164 -22
package/scripts/arch-check.sh +20 -2
package/scripts/bias-count.sh +18 -1
package/scripts/caa-check.sh +143 -0
package/scripts/crap-score.py +57 -6
package/scripts/currency.py +70 -25
package/scripts/dnssec-check.sh +158 -0
package/scripts/emit-evidence.sh +79 -9
package/scripts/escape-scan.sh +28 -3
package/scripts/gherkin-lint.sh +5 -0
package/scripts/harness-hash.sh +5 -0
package/scripts/kernel-shadow-check.sh +132 -0

package/scripts/crap-score.py CHANGED Viewed

@@ -50,6 +50,22 @@ EXCLUDED_DIRS = {
 }
+def is_excluded_dir(name: str) -> bool:
+    """Single exclusion predicate shared by the candidate-discovery walk and
+    the --json input-hash walk.
+    Both walks MUST agree on which directories they descend into; otherwise the
+    set of files that feed the CRAP score can diverge from the set that feeds
+    the input_hash, and the score/hash desync (a hash that claims to cover
+    files the score never saw, or vice versa). The rule is: skip any dot-dir
+    (e.g. `.idea`, `.svn`, `.git`) OR any explicitly-named build/vendor dir in
+    EXCLUDED_DIRS. Previously discovery dropped all dot-dirs while the hash walk
+    dropped only the named subset, so a dot-dir not in EXCLUDED_DIRS was hashed
+    but never scored.
+    """
+    return name.startswith(".") or name in EXCLUDED_DIRS
 def crap(complexity: int, coverage_pct: float) -> float:
     cov = max(0.0, min(100.0, coverage_pct)) / 100.0
     return (complexity ** 2) * ((1.0 - cov) ** 3) + complexity
@@ -98,8 +114,7 @@ def score_python(root: Path, kind: str) -> list[MethodScore]:
             scanned = [
                 p.name for p in root.iterdir()
                 if p.is_dir()
-                and not p.name.startswith(".")
-                and p.name not in EXCLUDED_DIRS
+                and not is_excluded_dir(p.name)
                 and p.name not in test_dirs
                 and any(p.rglob("*.py"))
             ]
@@ -165,7 +180,15 @@ def score_go(root: Path, kind: str) -> list[MethodScore]:
         print("[crap-score] gocyclo not installed", file=sys.stderr)
         return []
-    rc, out, _ = run(["gocyclo", "-ignore", "_test.go" if kind == "src" else ".*\\.go$", "."], root)
+    # For kind="src", ignore *_test.go at the gocyclo level. For kind="test",
+    # do NOT pass -ignore: a pattern like `.*\.go$` matches every analyzable
+    # file (gocyclo only reads .go files), which silenced all test-kind output.
+    # The include-filter below keeps only *_test.go rows for kind="test".
+    gocyclo_cmd = ["gocyclo"]
+    if kind == "src":
+        gocyclo_cmd += ["-ignore", "_test.go"]
+    gocyclo_cmd.append(".")
+    rc, out, _ = run(gocyclo_cmd, root)
     complexity: list[tuple[str, str, int]] = []
     for line in out.splitlines():
         parts = line.strip().split()
@@ -187,11 +210,28 @@ def score_go(root: Path, kind: str) -> list[MethodScore]:
     if not cov_out.is_file() and which_or_none("go"):
         run(["go", "test", "-coverprofile=coverage.out", "-covermode=atomic", "./..."], root)
     if cov_out.is_file() and which_or_none("go"):
+        # `go tool cover -func` reports module-qualified paths
+        # (github.com/user/repo/pkg/file.go) while gocyclo reports repo-relative
+        # paths (pkg/file.go). Strip the module prefix read from go.mod so the
+        # coverage keys join the complexity keys.
+        module_prefix = ""
+        go_mod = root / "go.mod"
+        if go_mod.is_file():
+            try:
+                for mod_line in go_mod.read_text().splitlines():
+                    mod_line = mod_line.strip()
+                    if mod_line.startswith("module ") or mod_line.startswith("module\t"):
+                        module_prefix = mod_line.split(None, 1)[1].strip() + "/"
+                        break
+            except OSError:
+                pass
         rc, out, _ = run(["go", "tool", "cover", "-func=coverage.out"], root)
         for line in out.splitlines():
             parts = line.split()
             if len(parts) >= 3 and parts[-1].endswith("%"):
                 fpath = parts[0].split(":", 1)[0]
+                if module_prefix and fpath.startswith(module_prefix):
+                    fpath = fpath[len(module_prefix):]
                 try:
                     pct = float(parts[-1].rstrip("%"))
                 except ValueError:
@@ -228,6 +268,17 @@ def score_js(root: Path, kind: str) -> list[MethodScore]:
     except json.JSONDecodeError:
         return []
+    # c8/istanbul's json-summary reporter keys files by ABSOLUTE path while
+    # complexity-report (run with a repo-relative target) reports repo-relative
+    # paths. Normalize both sides to repo-relative so the coverage join works.
+    def _rel_to_root(p: str) -> str:
+        if os.path.isabs(p):
+            try:
+                return os.path.relpath(p, str(root))
+            except ValueError:
+                return p  # e.g. different drive on Windows — keep as-is
+        return p
     cov_path = root / "coverage" / "coverage-summary.json"
     coverage: dict[str, float] = {}
     if cov_path.is_file():
@@ -237,14 +288,14 @@ def score_js(root: Path, kind: str) -> list[MethodScore]:
                 if fpath == "total":
                     continue
                 lines_pct = summary.get("lines", {}).get("pct", 0.0)
-                coverage[fpath] = float(lines_pct)
+                coverage[_rel_to_root(fpath)] = float(lines_pct)
         except (OSError, json.JSONDecodeError):
             pass
     scores: list[MethodScore] = []
     for report in data.get("reports", []):
         fpath = report.get("path", "")
-        cov = coverage.get(fpath, 0.0)
+        cov = coverage.get(_rel_to_root(fpath), 0.0)
         for func in report.get("functions", []):
             c = int(func.get("cyclomatic", 1))
             scores.append(
@@ -403,7 +454,7 @@ def main() -> int:
         exts = (".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".java", ".kt", ".cs", ".php", ".rb")
         collected: list[Path] = []
         for dirpath, dirs, files in os.walk(root):
-            dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
+            dirs[:] = [d for d in dirs if not is_excluded_dir(d)]
             for fn in files:
                 if fn.endswith(exts):
                     collected.append(Path(dirpath) / fn)

package/scripts/currency.py CHANGED Viewed

@@ -1,18 +1,26 @@
 #!/usr/bin/env python3
 """
-audit-harness currency — advisory upstream-currency report (PP-PLAN-040 Phase 5 / E7).
+audit-harness currency — advisory poll-freshness report (PP-PLAN-040 Phase 5 / E7).
 Currency depends on upstream state, which is non-deterministic and network-bound, so
 it is deliberately the WEAKEST kind of check: an advisory REPORT with **no exit-code
 authority, no auto-fix, and no live-fetch**. It reads the per-upstream-identity pin
 relation (schemas/currency/pins.v1.json) — where each upstream carries its own
-pinned_version + the date it was last verified (checked_at) + a staleness window —
-and reports which pins are themselves STALE (checked_at older than the window), i.e.
-which pins a human should re-verify against upstream.
+pinned_version + the date it was last verified (checked_at) + an advisory
+poll-freshness SLA — and reports which pins are themselves PAST their SLA
+(checked_at older than the SLA window), i.e. which pins a human should re-verify
+against upstream. The SLA gates NOTHING except human attention.
 This models the pin's OWN staleness as detectable, rather than one opaque
-".schema-version" scalar. The /sync-testing-harness skill consumes this report to
-open advisory bump PRs; the report never reddens a build (always exit 0).
+".schema-version" scalar. Pins are grouped by class (spec-page / schema-file /
+release-feed / internal-contract); SLA resolution order is: explicit per-pin
+staleness_window_days > the pin's class SLA > default_staleness_window_days.
+The /sync-testing-harness skill consumes this report to open advisory bump PRs;
+the report never reddens a build (always exit 0).
+Follow-up (deliberately NOT wired here, [9k5h.10]): the intent-eval-lab
+detector-health surface will consume the --json output; that cross-repo
+integration is tracked separately.
 Stdlib only. No network. No filesystem mutation.
 """
@@ -25,6 +33,8 @@ from datetime import datetime, timezone
 HERE = os.path.dirname(os.path.abspath(__file__))
 DEFAULT_PINS = os.path.join(HERE, "..", "schemas", "currency", "pins.v1.json")
+UNCLASSED = "(unclassed)"
 def parse_date(s):
     try:
@@ -33,12 +43,23 @@ def parse_date(s):
         return None
+def resolve_window(pin, classes, default_window):
+    """SLA resolution: explicit per-pin window > class SLA > file default."""
+    if pin.get("staleness_window_days") is not None:
+        return pin["staleness_window_days"]
+    cls = classes.get(pin.get("class") or "", {})
+    if cls.get("staleness_window_days") is not None:
+        return cls["staleness_window_days"]
+    return default_window
 def build_report(pins_doc, today):
     default_window = pins_doc.get("default_staleness_window_days", 90)
+    classes = pins_doc.get("staleness_classes", {})
     out = []
     for pin in pins_doc.get("pins", []):
         checked = parse_date(pin.get("checked_at", ""))
-        window = pin.get("staleness_window_days", default_window)
+        window = resolve_window(pin, classes, default_window)
         if checked is None:
             age, status = None, "unknown-checked_at"
         else:
@@ -46,6 +67,7 @@ def build_report(pins_doc, today):
             status = "stale" if age > window else "current"
         out.append({
             "identity": pin.get("identity"),
+            "class": pin.get("class") or UNCLASSED,
             "pinned_version": pin.get("pinned_version"),
             "checked_at": pin.get("checked_at"),
             "age_days": age,
@@ -57,8 +79,18 @@ def build_report(pins_doc, today):
     return out
+def group_by_class(report):
+    """Ordered {class: [rows]} grouping, classes sorted, (unclassed) last."""
+    grouped = {}
+    for r in report:
+        grouped.setdefault(r["class"], []).append(r)
+    ordered = sorted(grouped, key=lambda c: (c == UNCLASSED, c))
+    return {c: grouped[c] for c in ordered}
 def main():
-    ap = argparse.ArgumentParser(description="Advisory upstream-currency report (no exit authority)")
+    ap = argparse.ArgumentParser(
+        description="Advisory poll-freshness report (no exit authority — the SLA gates nothing but human attention)")
     ap.add_argument("--pins", default=DEFAULT_PINS, help="path to the pin relation datum")
     ap.add_argument("--json", action="store_true", help="emit JSON report")
     ap.add_argument("--today", default=None, help="override 'today' (YYYY-MM-DD) for reproducible reports/tests")
@@ -74,39 +106,52 @@ def main():
     today = parse_date(args.today) if args.today else datetime.now(timezone.utc).date()
     report = build_report(pins_doc, today)
+    grouped = group_by_class(report)
     stale = [r for r in report if r["status"] == "stale"]
     unknown = [r for r in report if r["status"] == "unknown-checked_at"]
     if args.json:
+        by_class = {}
+        for cls, rows in grouped.items():
+            by_class[cls] = {
+                "total": len(rows),
+                "stale": sum(1 for r in rows if r["status"] == "stale"),
+                "current": sum(1 for r in rows if r["status"] == "current"),
+                "unknown": sum(1 for r in rows if r["status"] == "unknown-checked_at"),
+            }
         print(json.dumps({
             "report": "currency/v1",
             "generated_for": today.strftime("%Y-%m-%d"),
             "pins": report,
+            "by_class": by_class,
             "stale_count": len(stale),
             "advisory": True,
         }, indent=2))
     else:
-        print(f"Upstream currency (advisory) — as of {today.strftime('%Y-%m-%d')}")
-        print(f"{'identity':<24} {'pinned':<14} {'checked_at':<12} {'age':>5} {'win':>4}  status")
-        for r in report:
-            age = "—" if r["age_days"] is None else str(r["age_days"]) + "d"
-            if r["status"] == "stale":
-                mark = "⚠ STALE"
-            elif r["status"] == "current":
-                mark = "current"
-            else:
-                mark = "? " + r["status"]
-            print(f"{(r['identity'] or ''):<24} {(r['pinned_version'] or ''):<14} "
-                  f"{(r['checked_at'] or ''):<12} {age:>5} {r['window_days']:>4}  {mark}")
+        print(f"Upstream currency — advisory poll-freshness SLA report — as of {today.strftime('%Y-%m-%d')}")
+        print(f"{'identity':<26} {'pinned':<18} {'checked_at':<12} {'age':>5} {'sla':>4}  status")
+        for cls, rows in grouped.items():
+            print(f"[{cls}] — {len(rows)} pin(s)")
+            for r in rows:
+                age = "—" if r["age_days"] is None else str(r["age_days"]) + "d"
+                if r["status"] == "stale":
+                    mark = "⚠ PAST SLA"
+                elif r["status"] == "current":
+                    mark = "current"
+                else:
+                    mark = "? " + r["status"]
+                print(f"  {(r['identity'] or ''):<24} {(r['pinned_version'] or ''):<18} "
+                      f"{(r['checked_at'] or ''):<12} {age:>5} {r['window_days']:>4}  {mark}")
         print()
         if stale:
-            print(f"{len(stale)} pin(s) past their staleness window — re-verify against upstream, "
-                  f"then bump pinned_version + checked_at in schemas/currency/pins.v1.json:")
+            print(f"{len(stale)} pin(s) past their poll-freshness SLA — the SLA gates nothing but human "
+                  f"attention: re-verify against upstream, then bump pinned_version + checked_at in "
+                  f"schemas/currency/pins.v1.json:")
             for r in stale:
-                print(f"  - {r['identity']}: last checked {r['checked_at']} "
-                      f"({r['age_days']}d ago > {r['window_days']}d)")
+                print(f"  - {r['identity']} [{r['class']}]: last checked {r['checked_at']} "
+                      f"({r['age_days']}d ago > {r['window_days']}d SLA)")
         else:
-            print("All pins within their staleness window.")
+            print("All pins within their poll-freshness SLA.")
         if unknown:
             print(f"{len(unknown)} pin(s) have an unparseable checked_at — fix the date format (YYYY-MM-DD).")

package/scripts/dnssec-check.sh ADDED Viewed

@@ -0,0 +1,158 @@
+#!/usr/bin/env bash
+# dnssec-check.sh — verify a namespace is DNSSEC-signed before a production
+# signed attestation is anchored against it.
+#
+# WHY THIS EXISTS (CISO binding, DR-010 Q5 / ISEDC v1 Q1 2026-05-10):
+#   Predicate URIs for the Evidence Bundle live ONLY at evals.intentsolutions.io.
+#   Pushing a signed in-toto Statement to a PUBLIC transparency log (Rekor)
+#   against an unsigned namespace is irreversible and lets an attacker who can
+#   spoof the zone mint look-alike attestations. DNSSEC must be verified on the
+#   namespace BEFORE the first production attestation. This script is that gate.
+#   It anchors NOTHING — it is a read-only verification that can only make
+#   signing MORE conservative (fail-closed).
+#
+# WHY IT QUERIES AN EXPLICIT RESOLVER (the bug this version fixes):
+#   Querying the LOCAL STUB RESOLVER (plain `dig`, no `@server`) FALSE-NEGATIVES
+#   on hosts whose stub resolver strips DNSSEC records or never sets the AD bit
+#   (systemd-resolved, most CI runners, dev boxes behind a caching forwarder).
+#   On such a host a correctly DNSSEC-signed zone looks unsigned. For a
+#   fail-closed gate that is the WRONG failure mode for usability AND it can
+#   block a legitimate production signing while a genuinely-unsigned zone would
+#   also block — i.e. it loses all discriminating power. The fix is to query a
+#   TRUSTED VALIDATING resolver and require the resolver to assert validation
+#   (`delv` full-chain "fully validated", or `dig`'s AD bit + an RRSIG). The
+#   gate stays fail-closed: PASS only on positive confirmation from a trusted
+#   resolver; UNKNOWN / unreachable / no-tool => non-zero.
+#
+# Usage:
+#   bash scripts/dnssec-check.sh [DOMAIN]
+#   DNSSEC_CHECK_DOMAIN=evals.intentsolutions.io bash scripts/dnssec-check.sh
+#
+# Resolution order for the domain:
+#   1. $1 (positional)
+#   2. $DNSSEC_CHECK_DOMAIN
+#   3. default: evals.intentsolutions.io
+#
+# Behavior:
+#   - Queries each resolver in $DNSSEC_CHECK_RESOLVERS (default 1.1.1.1 8.8.8.8),
+#     in order, and PASSES on the FIRST that confirms DNSSEC validation.
+#   - For each resolver: prefers `delv @<resolver>` (full DNSSEC chain validation
+#     against the IANA trust anchor; "fully validated" => PASS). Falls back to
+#     `dig @<resolver> +dnssec` and requires BOTH the AD (Authenticated Data)
+#     header flag AND the presence of an RRSIG record (a non-validating answer,
+#     i.e. RRSIG but no AD, does NOT pass — a malicious/forwarding resolver that
+#     returns records without validating the chain cannot trivially pass).
+#   - If NO resolver confirms validation (every resolver says unsigned, or is
+#     unreachable), exits 1 (fail-closed).
+#   - If NEITHER delv NOR dig is installed, emits a typed UNKNOWN/UNREACHABLE
+#     result and exits 2 (fail-closed for production).
+#
+# Exit codes:
+#   0 — DNSSEC verified (a trusted resolver fully validated, or set AD + RRSIG)
+#   1 — DNSSEC NOT verified (no trusted resolver confirmed; zone unsigned /
+#       validation failed / all resolvers unreachable)
+#   2 — UNKNOWN/UNREACHABLE (no resolver tool installed at all)
+#
+# Override knobs:
+#   DNSSEC_CHECK_RESOLVERS — space-separated list of validating/public resolvers
+#                            to query in order (default: "1.1.1.1 8.8.8.8").
+#   DNSSEC_CHECK_DELV_CMD   — command used in place of `delv` (default: delv)
+#   DNSSEC_CHECK_DIG_CMD    — command used in place of `dig`  (default: dig)
+set -euo pipefail
+DOMAIN="${1:-${DNSSEC_CHECK_DOMAIN:-evals.intentsolutions.io}}"
+DELV_CMD="${DNSSEC_CHECK_DELV_CMD:-delv}"
+DIG_CMD="${DNSSEC_CHECK_DIG_CMD:-dig}"
+# Trusted validating/public resolvers, queried in order. Cloudflare (1.1.1.1)
+# and Google (8.8.8.8) both perform DNSSEC validation and set the AD bit.
+RESOLVERS="${DNSSEC_CHECK_RESOLVERS:-1.1.1.1 8.8.8.8}"
+log() { printf 'dnssec-check: %s\n' "$1" >&2; }
+if [[ "$DOMAIN" == "-h" || "$DOMAIN" == "--help" ]]; then
+  sed -n '2,60p' "$0"
+  exit 0
+fi
+have() { command -v "$1" >/dev/null 2>&1; }
+have_delv=0
+have_dig=0
+have "$DELV_CMD" && have_delv=1
+have "$DIG_CMD" && have_dig=1
+# --- No resolver tool at all -> typed UNKNOWN, fail-closed (exit 2) ---
+if [[ "$have_delv" -eq 0 && "$have_dig" -eq 0 ]]; then
+  log "UNKNOWN/UNREACHABLE — neither '$DELV_CMD' nor '$DIG_CMD' is installed"
+  log "  cannot verify DNSSEC for '$DOMAIN'; failing closed (production must not sign on UNKNOWN)"
+  log "  remediation: install bind9-dnsutils (provides dig + delv) on the signing host"
+  exit 2
+fi
+# delv_validates RESOLVER -> 0 if delv reports the chain fully validated.
+delv_validates() {
+  local resolver="$1" out
+  # delv prints "; fully validated" on each validated RRset when the chain of
+  # trust holds; "; unsigned answer" / "resolution failed" otherwise. delv
+  # validates LOCALLY against the IANA trust anchor regardless of which resolver
+  # serves the records, so a non-validating @resolver cannot fake a pass.
+  out="$("$DELV_CMD" "$DOMAIN" "@$resolver" 2>&1 || true)"
+  printf '%s\n' "$out" | grep -q "fully validated"
+}
+# dig_validates RESOLVER -> 0 if the resolver set the AD bit AND an RRSIG is
+# present. BOTH are required: AD alone could be spoofed by a lying resolver
+# without signatures, RRSIG alone proves the zone publishes signatures but not
+# that the chain validated. Requiring AD means a non-validating resolver's
+# answer (RRSIG copied through, AD never set) does NOT pass.
+dig_validates() {
+  local resolver="$1" out ad_flag=0 rrsig=0
+  out="$("$DIG_CMD" "@$resolver" +dnssec +multiline "$DOMAIN" 2>&1 || true)"
+  if printf '%s\n' "$out" | grep -qE '^;; flags:[^;]*\bad\b'; then
+    ad_flag=1
+  fi
+  if printf '%s\n' "$out" | grep -qE '[[:space:]]RRSIG[[:space:]]'; then
+    rrsig=1
+  fi
+  [[ "$ad_flag" -eq 1 && "$rrsig" -eq 1 ]]
+}
+saw_unsigned=0  # at least one resolver answered, and said NOT validated
+for resolver in $RESOLVERS; do
+  # --- Path 1: delv @resolver (authoritative DNSSEC chain validation) ---
+  if [[ "$have_delv" -eq 1 ]]; then
+    log "validating DNSSEC for '$DOMAIN' via $DELV_CMD @$resolver"
+    if delv_validates "$resolver"; then
+      log "VERIFIED — '$DOMAIN' is DNSSEC-signed (delv @$resolver: fully validated)"
+      exit 0
+    fi
+    saw_unsigned=1
+    log "delv @$resolver did not confirm validation; trying dig @$resolver"
+  fi
+  # --- Path 2: dig @resolver +dnssec (AD bit + RRSIG presence) ---
+  if [[ "$have_dig" -eq 1 ]]; then
+    log "checking DNSSEC for '$DOMAIN' via $DIG_CMD @$resolver +dnssec"
+    if dig_validates "$resolver"; then
+      log "VERIFIED — '$DOMAIN' is DNSSEC-signed (dig @$resolver: AD bit set + RRSIG present)"
+      exit 0
+    fi
+    saw_unsigned=1
+    log "dig @$resolver did not confirm validation (no AD+RRSIG) for '$DOMAIN'"
+  fi
+done
+# No resolver confirmed validation. Distinguish "answered but unsigned" from
+# "nothing reachable" only for the operator message — both fail-closed (exit 1).
+if [[ "$saw_unsigned" -eq 1 ]]; then
+  log "NOT VERIFIED — no trusted resolver confirmed DNSSEC for '$DOMAIN' (zone appears unsigned / chain not validated)"
+  log "  resolvers tried: $RESOLVERS"
+  log "  remediation: sign the zone (DNSSEC) at the registrar/DNS host, then re-run"
+else
+  log "NOT VERIFIED — could not reach any resolver to validate DNSSEC for '$DOMAIN'"
+  log "  resolvers tried: $RESOLVERS"
+  log "  failing closed (production must not sign without positive confirmation)"
+fi
+exit 1

package/scripts/emit-evidence.sh CHANGED Viewed

@@ -35,15 +35,28 @@
 #   1 — input JSON malformed or missing required fields
 #   2 — signing requested but cosign not available
 #   3 — Rekor push requested but failed
+#   4 — production DNSSEC/CAA pre-flight FAILED (fail-closed; nothing was signed)
 #
-# CISO gate (per ISEDC v1 Q1, 2026-05-10): pushing to a public transparency log
-# (Rekor) against the predicate URI https://evals.intentsolutions.io/gate-result/v1
-# is BLOCKED until DNSSEC + CAA records are verified on the namespace. The script
-# does NOT enforce this — that is operator discipline. See bead `iel-4zr` in
-# intent-eval-platform/intent-eval-lab/.beads/.
+# CISO gate (per DR-010 Q5 / ISEDC v1 Q1, 2026-05-10): pushing to a PUBLIC
+# transparency log (Rekor) against the predicate URI
+# https://evals.intentsolutions.io/gate-result/v1 is BLOCKED until DNSSEC + CAA
+# records are verified on the namespace. This script ENFORCES that: when a
+# production Rekor push is requested (--rekor-url / non-empty REKOR_URL), it runs
+# scripts/dnssec-check.sh then scripts/caa-check.sh against the predicate
+# namespace and REFUSES to sign (exit 4) if either fails. The gate is read-only —
+# it anchors nothing and can only make signing MORE conservative.
+#
+# Opt-out (NON-PRODUCTION / staging ONLY): EVIDENCE_SKIP_DNS_PREFLIGHT=1 skips the
+# pre-flight. It is honored ONLY when no production Rekor push is requested; a
+# real Rekor push can NEVER be silently skipped.
 set -euo pipefail
+# Bash version floor: these gates rely on bash 4+ features. Refuse early with a
+# clear message on bash 3.x (e.g. macOS system bash) instead of failing later
+# with a cryptic syntax error (jcgw).
+[ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
 INPUT="-"
 OUTPUT=""
 SIGN=0
@@ -54,6 +67,9 @@ RUNNER_VERSION_OVERRIDE=""
 COMMIT_SHA_OVERRIDE=""
 PREDICATE_URI="https://evals.intentsolutions.io/gate-result/v1"
 STATEMENT_TYPE="https://in-toto.io/Statement/v1"
+# The namespace whose DNSSEC + CAA posture gates production attestations. Derived
+# from the predicate URI host; overridable for testing via EVIDENCE_PREDICATE_DOMAIN.
+PREDICATE_DOMAIN="${EVIDENCE_PREDICATE_DOMAIN:-evals.intentsolutions.io}"
 while [[ $# -gt 0 ]]; do
   case "$1" in
@@ -181,10 +197,30 @@ fi
 # OR an OTEL_EXPORTER_OTLP_ENDPOINT is set. Real exporter wiring is consumer-side;
 # we emit a structured signal that any collector can scrape via stderr capture.
 if [[ "${AUDIT_HARNESS_OTEL:-0}" == "1" ]] || [[ -n "${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ]]; then
-  GATE_ID=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('gate_id',''))" 2>/dev/null || echo "")
-  RESULT=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('result',''))" 2>/dev/null || echo "")
-  printf '[OTEL] {"name":"agent.rollout.gate.evaluated","attributes":{"gate.id":"%s","gate.result":"%s","gate.runner":"%s","gate.commit_sha":"%s"},"timestamp":"%s"}\n' \
-    "$GATE_ID" "$RESULT" "$RUNNER" "$COMMIT_SHA" "$TIMESTAMP" >&2
+  # Compose the JSON via python so every attribute value is JSON-escaped.
+  # printf-interpolating gate_id/result/runner into a JSON format string
+  # emitted structurally invalid JSON whenever a value carried a double quote
+  # (e.g. AUDIT_HARNESS_SIDE='ci"injection' flowing into gate_id).
+  OTEL_LINE=$(GATE_JSON="$GATE_JSON" RUNNER="$RUNNER" COMMIT_SHA="$COMMIT_SHA" TIMESTAMP="$TIMESTAMP" \
+    python3 - <<'PY' 2>/dev/null || echo ""
+import json, os
+try:
+    gate = json.loads(os.environ["GATE_JSON"])
+except (json.JSONDecodeError, ValueError):
+    gate = {}
+print(json.dumps({
+    "name": "agent.rollout.gate.evaluated",
+    "attributes": {
+        "gate.id": str(gate.get("gate_id", "")),
+        "gate.result": str(gate.get("result", "")),
+        "gate.runner": os.environ["RUNNER"],
+        "gate.commit_sha": os.environ["COMMIT_SHA"],
+    },
+    "timestamp": os.environ["TIMESTAMP"],
+}, separators=(",", ":")))
+PY
+)
+  [[ -n "$OTEL_LINE" ]] && printf '[OTEL] %s\n' "$OTEL_LINE" >&2
 fi
 # --- Sign + emit ---
@@ -212,6 +248,40 @@ if ! command -v cosign >/dev/null 2>&1; then
   exit 2
 fi
+# --- Production DNSSEC + CAA pre-flight gate (CISO binding DR-010 Q5) ----------
+# A "production" signing event is one that pushes a signed Statement to a PUBLIC
+# transparency log (Rekor) — i.e. REKOR_URL is non-empty. Before that irreversible
+# anchor, the predicate namespace MUST be DNSSEC-signed AND CAA-pinned. We run the
+# two read-only checks; if EITHER fails we REFUSE to sign and exit 4.
+#
+# The opt-out EVIDENCE_SKIP_DNS_PREFLIGHT=1 is honored ONLY for non-production
+# (no Rekor push). A real Rekor push can never be silently skipped.
+if [[ -n "$REKOR_URL" ]]; then
+  PREFLIGHT_DIR="$(cd "$(dirname "$0")" && pwd)"
+  if [[ "${EVIDENCE_SKIP_DNS_PREFLIGHT:-0}" == "1" ]]; then
+    echo "emit-evidence: IGNORING EVIDENCE_SKIP_DNS_PREFLIGHT=1 — a Rekor push (REKOR_URL=$REKOR_URL) is a production attestation and CANNOT skip the DNSSEC/CAA pre-flight." >&2
+  fi
+  echo "emit-evidence: production Rekor push requested — running DNSSEC + CAA pre-flight on '$PREDICATE_DOMAIN'" >&2
+  if ! bash "$PREFLIGHT_DIR/dnssec-check.sh" "$PREDICATE_DOMAIN" >&2; then
+    echo "emit-evidence: REFUSING TO SIGN — DNSSEC pre-flight FAILED for '$PREDICATE_DOMAIN'." >&2
+    echo "emit-evidence: remediation: pin DNSSEC + CAA on $PREDICATE_DOMAIN before any production attestation." >&2
+    echo "emit-evidence:   see intent-eval-platform/intent-eval-lab/000-docs (DR-010 Q5 CISO binding) + the iah-E06 runbook." >&2
+    exit 4
+  fi
+  if ! bash "$PREFLIGHT_DIR/caa-check.sh" "$PREDICATE_DOMAIN" >&2; then
+    echo "emit-evidence: REFUSING TO SIGN — CAA pre-flight FAILED for '$PREDICATE_DOMAIN'." >&2
+    echo "emit-evidence: remediation: pin DNSSEC + CAA on $PREDICATE_DOMAIN before any production attestation." >&2
+    echo "emit-evidence:   set EXPECTED_CAA_ISSUER to the published CA, then publish a CAA record pinning it." >&2
+    exit 4
+  fi
+  echo "emit-evidence: DNSSEC + CAA pre-flight PASSED for '$PREDICATE_DOMAIN' — proceeding to sign." >&2
+elif [[ "${EVIDENCE_SKIP_DNS_PREFLIGHT:-0}" == "1" ]]; then
+  # Non-production sign (no Rekor push) with the explicit opt-out set: keep
+  # existing staging flows green without running the network-bound checks.
+  echo "emit-evidence: non-production sign (no Rekor push); DNSSEC/CAA pre-flight skipped per EVIDENCE_SKIP_DNS_PREFLIGHT=1." >&2
+fi
 # Stage the Statement to a temp file for cosign to consume
 TMP=$(mktemp -d)
 trap 'rm -rf "$TMP"' EXIT

package/scripts/escape-scan.sh CHANGED Viewed

@@ -28,6 +28,23 @@
 set -euo pipefail
+# Bash version floor: these gates rely on bash 4+ features. Refuse early with a
+# clear message on bash 3.x (e.g. macOS system bash) instead of failing later
+# with a cryptic syntax error (jcgw).
+[ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
+# Cross-platform SHA-256: `sha256sum` ships with GNU coreutils (Linux);
+# macOS only has `shasum -a 256`. Both produce identical `<hash>  <file>`
+# output, so downstream awk parsing is unchanged. Mirrors harness-hash.sh.
+if command -v sha256sum >/dev/null 2>&1; then
+  SHA256_CMD=(sha256sum)
+elif command -v shasum >/dev/null 2>&1; then
+  SHA256_CMD=(shasum -a 256)
+else
+  echo "escape-scan: neither sha256sum nor shasum found in PATH" >&2
+  exit 2
+fi
 DIFF_SRC=""
 VERIFY_HASH=1
 JSON_OUT=0
@@ -51,7 +68,15 @@ if [[ "$#" -eq 0 ]]; then
 fi
 case "$1" in
-  -) DIFF_SRC="/dev/stdin" ;;
+  -)
+    # Buffer stdin into a temp file so the diff can be read multiple times.
+    # /dev/stdin is drained by the first grep, which would leave later reads
+    # (notably the input_hash sha256) seeing an empty stream — emitting the
+    # SHA-256 of "" instead of the real diff hash.
+    DIFF_SRC=$(mktemp)
+    trap 'rm -f "$DIFF_SRC"' EXIT
+    cat > "$DIFF_SRC"
+    ;;
   --staged)
     DIFF_SRC=$(mktemp)
     trap 'rm -f "$DIFF_SRC"' EXIT
@@ -198,10 +223,10 @@ if [[ "$JSON_OUT" -eq 1 ]]; then
   elif [[ "$FLAG" -gt 0 ]]; then
     result="ADVISORY"
   fi
-  input_hash=$(sha256sum "$DIFF_SRC" | awk '{print "sha256:"$1}')
+  input_hash=$("${SHA256_CMD[@]}" "$DIFF_SRC" | awk '{print "sha256:"$1}')
   policy_hash="sha256:0000000000000000000000000000000000000000000000000000000000000000"
   if [[ -f "$TESTING_MD" ]]; then
-    policy_hash=$(sha256sum "$TESTING_MD" | awk '{print "sha256:"$1}')
+    policy_hash=$("${SHA256_CMD[@]}" "$TESTING_MD" | awk '{print "sha256:"$1}')
   fi
   printf '{"gate_id":"audit-harness:%s:escape-scan","result":"%s","input_hash":"%s","policy_hash":"%s","metadata":{"refuse":%d,"challenge":%d,"flag":%d,"coverage_line_floor":%d,"coverage_branch_floor":%d,"mutation_floor":%d}' \
     "${AUDIT_HARNESS_SIDE:-ci}" "$result" "$input_hash" "$policy_hash" "$REFUSE" "$CHALLENGE" "$FLAG" \

package/scripts/gherkin-lint.sh CHANGED Viewed

@@ -13,6 +13,11 @@
 set -euo pipefail
+# Bash version floor: these gates rely on bash 4+ features. Refuse early with a
+# clear message on bash 3.x (e.g. macOS system bash) instead of failing later
+# with a cryptic syntax error (jcgw).
+[ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
 PATH_ARG="features/"
 STRICT=0
 JSON_OUT=0

package/scripts/harness-hash.sh CHANGED Viewed

@@ -23,6 +23,11 @@
 set -euo pipefail
+# Bash version floor: these gates rely on bash 4+ features. Refuse early with a
+# clear message on bash 3.x (e.g. macOS system bash) instead of failing later
+# with a cryptic syntax error (jcgw).
+[ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
 # Cross-platform SHA-256: `sha256sum` ships with GNU coreutils (Linux);
 # macOS only has `shasum -a 256`. Both produce identical `<hash>  <file>`
 # output, so downstream awk parsing is unchanged.