@intentsolutions/audit-harness 1.1.7 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,26 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- audit-harness currency — advisory upstream-currency report (PP-PLAN-040 Phase 5 / E7).
3
+ audit-harness currency — advisory poll-freshness report (PP-PLAN-040 Phase 5 / E7).
4
4
 
5
5
  Currency depends on upstream state, which is non-deterministic and network-bound, so
6
6
  it is deliberately the WEAKEST kind of check: an advisory REPORT with **no exit-code
7
7
  authority, no auto-fix, and no live-fetch**. It reads the per-upstream-identity pin
8
8
  relation (schemas/currency/pins.v1.json) — where each upstream carries its own
9
- pinned_version + the date it was last verified (checked_at) + a staleness window —
10
- and reports which pins are themselves STALE (checked_at older than the window), i.e.
11
- which pins a human should re-verify against upstream.
9
+ pinned_version + the date it was last verified (checked_at) + an advisory
10
+ poll-freshness SLA — and reports which pins are themselves PAST their SLA
11
+ (checked_at older than the SLA window), i.e. which pins a human should re-verify
12
+ against upstream. The SLA gates NOTHING except human attention.
12
13
 
13
14
  This models the pin's OWN staleness as detectable, rather than one opaque
14
- ".schema-version" scalar. The /sync-testing-harness skill consumes this report to
15
- open advisory bump PRs; the report never reddens a build (always exit 0).
15
+ ".schema-version" scalar. Pins are grouped by class (spec-page / schema-file /
16
+ release-feed / internal-contract); SLA resolution order is: explicit per-pin
17
+ staleness_window_days > the pin's class SLA > default_staleness_window_days.
18
+ The /sync-testing-harness skill consumes this report to open advisory bump PRs;
19
+ the report never reddens a build (always exit 0).
20
+
21
+ Follow-up (deliberately NOT wired here, [9k5h.10]): the intent-eval-lab
22
+ detector-health surface will consume the --json output; that cross-repo
23
+ integration is tracked separately.
16
24
 
17
25
  Stdlib only. No network. No filesystem mutation.
18
26
  """
@@ -25,6 +33,8 @@ from datetime import datetime, timezone
25
33
  HERE = os.path.dirname(os.path.abspath(__file__))
26
34
  DEFAULT_PINS = os.path.join(HERE, "..", "schemas", "currency", "pins.v1.json")
27
35
 
36
+ UNCLASSED = "(unclassed)"
37
+
28
38
 
29
39
  def parse_date(s):
30
40
  try:
@@ -33,12 +43,23 @@ def parse_date(s):
33
43
  return None
34
44
 
35
45
 
46
+ def resolve_window(pin, classes, default_window):
47
+ """SLA resolution: explicit per-pin window > class SLA > file default."""
48
+ if pin.get("staleness_window_days") is not None:
49
+ return pin["staleness_window_days"]
50
+ cls = classes.get(pin.get("class") or "", {})
51
+ if cls.get("staleness_window_days") is not None:
52
+ return cls["staleness_window_days"]
53
+ return default_window
54
+
55
+
36
56
  def build_report(pins_doc, today):
37
57
  default_window = pins_doc.get("default_staleness_window_days", 90)
58
+ classes = pins_doc.get("staleness_classes", {})
38
59
  out = []
39
60
  for pin in pins_doc.get("pins", []):
40
61
  checked = parse_date(pin.get("checked_at", ""))
41
- window = pin.get("staleness_window_days", default_window)
62
+ window = resolve_window(pin, classes, default_window)
42
63
  if checked is None:
43
64
  age, status = None, "unknown-checked_at"
44
65
  else:
@@ -46,6 +67,7 @@ def build_report(pins_doc, today):
46
67
  status = "stale" if age > window else "current"
47
68
  out.append({
48
69
  "identity": pin.get("identity"),
70
+ "class": pin.get("class") or UNCLASSED,
49
71
  "pinned_version": pin.get("pinned_version"),
50
72
  "checked_at": pin.get("checked_at"),
51
73
  "age_days": age,
@@ -57,8 +79,18 @@ def build_report(pins_doc, today):
57
79
  return out
58
80
 
59
81
 
82
+ def group_by_class(report):
83
+ """Ordered {class: [rows]} grouping, classes sorted, (unclassed) last."""
84
+ grouped = {}
85
+ for r in report:
86
+ grouped.setdefault(r["class"], []).append(r)
87
+ ordered = sorted(grouped, key=lambda c: (c == UNCLASSED, c))
88
+ return {c: grouped[c] for c in ordered}
89
+
90
+
60
91
  def main():
61
- ap = argparse.ArgumentParser(description="Advisory upstream-currency report (no exit authority)")
92
+ ap = argparse.ArgumentParser(
93
+ description="Advisory poll-freshness report (no exit authority — the SLA gates nothing but human attention)")
62
94
  ap.add_argument("--pins", default=DEFAULT_PINS, help="path to the pin relation datum")
63
95
  ap.add_argument("--json", action="store_true", help="emit JSON report")
64
96
  ap.add_argument("--today", default=None, help="override 'today' (YYYY-MM-DD) for reproducible reports/tests")
@@ -74,39 +106,52 @@ def main():
74
106
 
75
107
  today = parse_date(args.today) if args.today else datetime.now(timezone.utc).date()
76
108
  report = build_report(pins_doc, today)
109
+ grouped = group_by_class(report)
77
110
  stale = [r for r in report if r["status"] == "stale"]
78
111
  unknown = [r for r in report if r["status"] == "unknown-checked_at"]
79
112
 
80
113
  if args.json:
114
+ by_class = {}
115
+ for cls, rows in grouped.items():
116
+ by_class[cls] = {
117
+ "total": len(rows),
118
+ "stale": sum(1 for r in rows if r["status"] == "stale"),
119
+ "current": sum(1 for r in rows if r["status"] == "current"),
120
+ "unknown": sum(1 for r in rows if r["status"] == "unknown-checked_at"),
121
+ }
81
122
  print(json.dumps({
82
123
  "report": "currency/v1",
83
124
  "generated_for": today.strftime("%Y-%m-%d"),
84
125
  "pins": report,
126
+ "by_class": by_class,
85
127
  "stale_count": len(stale),
86
128
  "advisory": True,
87
129
  }, indent=2))
88
130
  else:
89
- print(f"Upstream currency (advisory) — as of {today.strftime('%Y-%m-%d')}")
90
- print(f"{'identity':<24} {'pinned':<14} {'checked_at':<12} {'age':>5} {'win':>4} status")
91
- for r in report:
92
- age = "—" if r["age_days"] is None else str(r["age_days"]) + "d"
93
- if r["status"] == "stale":
94
- mark = " STALE"
95
- elif r["status"] == "current":
96
- mark = "current"
97
- else:
98
- mark = "? " + r["status"]
99
- print(f"{(r['identity'] or ''):<24} {(r['pinned_version'] or ''):<14} "
100
- f"{(r['checked_at'] or ''):<12} {age:>5} {r['window_days']:>4} {mark}")
131
+ print(f"Upstream currency advisory poll-freshness SLA report — as of {today.strftime('%Y-%m-%d')}")
132
+ print(f"{'identity':<26} {'pinned':<18} {'checked_at':<12} {'age':>5} {'sla':>4} status")
133
+ for cls, rows in grouped.items():
134
+ print(f"[{cls}] {len(rows)} pin(s)")
135
+ for r in rows:
136
+ age = "—" if r["age_days"] is None else str(r["age_days"]) + "d"
137
+ if r["status"] == "stale":
138
+ mark = "⚠ PAST SLA"
139
+ elif r["status"] == "current":
140
+ mark = "current"
141
+ else:
142
+ mark = "? " + r["status"]
143
+ print(f" {(r['identity'] or ''):<24} {(r['pinned_version'] or ''):<18} "
144
+ f"{(r['checked_at'] or ''):<12} {age:>5} {r['window_days']:>4} {mark}")
101
145
  print()
102
146
  if stale:
103
- print(f"{len(stale)} pin(s) past their staleness windowre-verify against upstream, "
104
- f"then bump pinned_version + checked_at in schemas/currency/pins.v1.json:")
147
+ print(f"{len(stale)} pin(s) past their poll-freshness SLAthe SLA gates nothing but human "
148
+ f"attention: re-verify against upstream, then bump pinned_version + checked_at in "
149
+ f"schemas/currency/pins.v1.json:")
105
150
  for r in stale:
106
- print(f" - {r['identity']}: last checked {r['checked_at']} "
107
- f"({r['age_days']}d ago > {r['window_days']}d)")
151
+ print(f" - {r['identity']} [{r['class']}]: last checked {r['checked_at']} "
152
+ f"({r['age_days']}d ago > {r['window_days']}d SLA)")
108
153
  else:
109
- print("All pins within their staleness window.")
154
+ print("All pins within their poll-freshness SLA.")
110
155
  if unknown:
111
156
  print(f"{len(unknown)} pin(s) have an unparseable checked_at — fix the date format (YYYY-MM-DD).")
112
157
 
@@ -0,0 +1,158 @@
1
+ #!/usr/bin/env bash
2
+ # dnssec-check.sh — verify a namespace is DNSSEC-signed before a production
3
+ # signed attestation is anchored against it.
4
+ #
5
+ # WHY THIS EXISTS (CISO binding, DR-010 Q5 / ISEDC v1 Q1 2026-05-10):
6
+ # Predicate URIs for the Evidence Bundle live ONLY at evals.intentsolutions.io.
7
+ # Pushing a signed in-toto Statement to a PUBLIC transparency log (Rekor)
8
+ # against an unsigned namespace is irreversible and lets an attacker who can
9
+ # spoof the zone mint look-alike attestations. DNSSEC must be verified on the
10
+ # namespace BEFORE the first production attestation. This script is that gate.
11
+ # It anchors NOTHING — it is a read-only verification that can only make
12
+ # signing MORE conservative (fail-closed).
13
+ #
14
+ # WHY IT QUERIES AN EXPLICIT RESOLVER (the bug this version fixes):
15
+ # Querying the LOCAL STUB RESOLVER (plain `dig`, no `@server`) FALSE-NEGATIVES
16
+ # on hosts whose stub resolver strips DNSSEC records or never sets the AD bit
17
+ # (systemd-resolved, most CI runners, dev boxes behind a caching forwarder).
18
+ # On such a host a correctly DNSSEC-signed zone looks unsigned. For a
19
+ # fail-closed gate that is the WRONG failure mode for usability AND it can
20
+ # block a legitimate production signing while a genuinely-unsigned zone would
21
+ # also block — i.e. it loses all discriminating power. The fix is to query a
22
+ # TRUSTED VALIDATING resolver and require the resolver to assert validation
23
+ # (`delv` full-chain "fully validated", or `dig`'s AD bit + an RRSIG). The
24
+ # gate stays fail-closed: PASS only on positive confirmation from a trusted
25
+ # resolver; UNKNOWN / unreachable / no-tool => non-zero.
26
+ #
27
+ # Usage:
28
+ # bash scripts/dnssec-check.sh [DOMAIN]
29
+ # DNSSEC_CHECK_DOMAIN=evals.intentsolutions.io bash scripts/dnssec-check.sh
30
+ #
31
+ # Resolution order for the domain:
32
+ # 1. $1 (positional)
33
+ # 2. $DNSSEC_CHECK_DOMAIN
34
+ # 3. default: evals.intentsolutions.io
35
+ #
36
+ # Behavior:
37
+ # - Queries each resolver in $DNSSEC_CHECK_RESOLVERS (default 1.1.1.1 8.8.8.8),
38
+ # in order, and PASSES on the FIRST that confirms DNSSEC validation.
39
+ # - For each resolver: prefers `delv @<resolver>` (full DNSSEC chain validation
40
+ # against the IANA trust anchor; "fully validated" => PASS). Falls back to
41
+ # `dig @<resolver> +dnssec` and requires BOTH the AD (Authenticated Data)
42
+ # header flag AND the presence of an RRSIG record (a non-validating answer,
43
+ # i.e. RRSIG but no AD, does NOT pass — a malicious/forwarding resolver that
44
+ # returns records without validating the chain cannot trivially pass).
45
+ # - If NO resolver confirms validation (every resolver says unsigned, or is
46
+ # unreachable), exits 1 (fail-closed).
47
+ # - If NEITHER delv NOR dig is installed, emits a typed UNKNOWN/UNREACHABLE
48
+ # result and exits 2 (fail-closed for production).
49
+ #
50
+ # Exit codes:
51
+ # 0 — DNSSEC verified (a trusted resolver fully validated, or set AD + RRSIG)
52
+ # 1 — DNSSEC NOT verified (no trusted resolver confirmed; zone unsigned /
53
+ # validation failed / all resolvers unreachable)
54
+ # 2 — UNKNOWN/UNREACHABLE (no resolver tool installed at all)
55
+ #
56
+ # Override knobs:
57
+ # DNSSEC_CHECK_RESOLVERS — space-separated list of validating/public resolvers
58
+ # to query in order (default: "1.1.1.1 8.8.8.8").
59
+ # DNSSEC_CHECK_DELV_CMD — command used in place of `delv` (default: delv)
60
+ # DNSSEC_CHECK_DIG_CMD — command used in place of `dig` (default: dig)
61
+
62
+ set -euo pipefail
63
+
64
+ DOMAIN="${1:-${DNSSEC_CHECK_DOMAIN:-evals.intentsolutions.io}}"
65
+ DELV_CMD="${DNSSEC_CHECK_DELV_CMD:-delv}"
66
+ DIG_CMD="${DNSSEC_CHECK_DIG_CMD:-dig}"
67
+ # Trusted validating/public resolvers, queried in order. Cloudflare (1.1.1.1)
68
+ # and Google (8.8.8.8) both perform DNSSEC validation and set the AD bit.
69
+ RESOLVERS="${DNSSEC_CHECK_RESOLVERS:-1.1.1.1 8.8.8.8}"
70
+
71
+ log() { printf 'dnssec-check: %s\n' "$1" >&2; }
72
+
73
+ if [[ "$DOMAIN" == "-h" || "$DOMAIN" == "--help" ]]; then
74
+ sed -n '2,60p' "$0"
75
+ exit 0
76
+ fi
77
+
78
+ have() { command -v "$1" >/dev/null 2>&1; }
79
+
80
+ have_delv=0
81
+ have_dig=0
82
+ have "$DELV_CMD" && have_delv=1
83
+ have "$DIG_CMD" && have_dig=1
84
+
85
+ # --- No resolver tool at all -> typed UNKNOWN, fail-closed (exit 2) ---
86
+ if [[ "$have_delv" -eq 0 && "$have_dig" -eq 0 ]]; then
87
+ log "UNKNOWN/UNREACHABLE — neither '$DELV_CMD' nor '$DIG_CMD' is installed"
88
+ log " cannot verify DNSSEC for '$DOMAIN'; failing closed (production must not sign on UNKNOWN)"
89
+ log " remediation: install bind9-dnsutils (provides dig + delv) on the signing host"
90
+ exit 2
91
+ fi
92
+
93
+ # delv_validates RESOLVER -> 0 if delv reports the chain fully validated.
94
+ delv_validates() {
95
+ local resolver="$1" out
96
+ # delv prints "; fully validated" on each validated RRset when the chain of
97
+ # trust holds; "; unsigned answer" / "resolution failed" otherwise. delv
98
+ # validates LOCALLY against the IANA trust anchor regardless of which resolver
99
+ # serves the records, so a non-validating @resolver cannot fake a pass.
100
+ out="$("$DELV_CMD" "$DOMAIN" "@$resolver" 2>&1 || true)"
101
+ printf '%s\n' "$out" | grep -q "fully validated"
102
+ }
103
+
104
+ # dig_validates RESOLVER -> 0 if the resolver set the AD bit AND an RRSIG is
105
+ # present. BOTH are required: AD alone could be spoofed by a lying resolver
106
+ # without signatures, RRSIG alone proves the zone publishes signatures but not
107
+ # that the chain validated. Requiring AD means a non-validating resolver's
108
+ # answer (RRSIG copied through, AD never set) does NOT pass.
109
+ dig_validates() {
110
+ local resolver="$1" out ad_flag=0 rrsig=0
111
+ out="$("$DIG_CMD" "@$resolver" +dnssec +multiline "$DOMAIN" 2>&1 || true)"
112
+ if printf '%s\n' "$out" | grep -qE '^;; flags:[^;]*\bad\b'; then
113
+ ad_flag=1
114
+ fi
115
+ if printf '%s\n' "$out" | grep -qE '[[:space:]]RRSIG[[:space:]]'; then
116
+ rrsig=1
117
+ fi
118
+ [[ "$ad_flag" -eq 1 && "$rrsig" -eq 1 ]]
119
+ }
120
+
121
+ saw_unsigned=0 # at least one resolver answered, and said NOT validated
122
+
123
+ for resolver in $RESOLVERS; do
124
+ # --- Path 1: delv @resolver (authoritative DNSSEC chain validation) ---
125
+ if [[ "$have_delv" -eq 1 ]]; then
126
+ log "validating DNSSEC for '$DOMAIN' via $DELV_CMD @$resolver"
127
+ if delv_validates "$resolver"; then
128
+ log "VERIFIED — '$DOMAIN' is DNSSEC-signed (delv @$resolver: fully validated)"
129
+ exit 0
130
+ fi
131
+ saw_unsigned=1
132
+ log "delv @$resolver did not confirm validation; trying dig @$resolver"
133
+ fi
134
+
135
+ # --- Path 2: dig @resolver +dnssec (AD bit + RRSIG presence) ---
136
+ if [[ "$have_dig" -eq 1 ]]; then
137
+ log "checking DNSSEC for '$DOMAIN' via $DIG_CMD @$resolver +dnssec"
138
+ if dig_validates "$resolver"; then
139
+ log "VERIFIED — '$DOMAIN' is DNSSEC-signed (dig @$resolver: AD bit set + RRSIG present)"
140
+ exit 0
141
+ fi
142
+ saw_unsigned=1
143
+ log "dig @$resolver did not confirm validation (no AD+RRSIG) for '$DOMAIN'"
144
+ fi
145
+ done
146
+
147
+ # No resolver confirmed validation. Distinguish "answered but unsigned" from
148
+ # "nothing reachable" only for the operator message — both fail-closed (exit 1).
149
+ if [[ "$saw_unsigned" -eq 1 ]]; then
150
+ log "NOT VERIFIED — no trusted resolver confirmed DNSSEC for '$DOMAIN' (zone appears unsigned / chain not validated)"
151
+ log " resolvers tried: $RESOLVERS"
152
+ log " remediation: sign the zone (DNSSEC) at the registrar/DNS host, then re-run"
153
+ else
154
+ log "NOT VERIFIED — could not reach any resolver to validate DNSSEC for '$DOMAIN'"
155
+ log " resolvers tried: $RESOLVERS"
156
+ log " failing closed (production must not sign without positive confirmation)"
157
+ fi
158
+ exit 1
@@ -35,15 +35,28 @@
35
35
  # 1 — input JSON malformed or missing required fields
36
36
  # 2 — signing requested but cosign not available
37
37
  # 3 — Rekor push requested but failed
38
+ # 4 — production DNSSEC/CAA pre-flight FAILED (fail-closed; nothing was signed)
38
39
  #
39
- # CISO gate (per ISEDC v1 Q1, 2026-05-10): pushing to a public transparency log
40
- # (Rekor) against the predicate URI https://evals.intentsolutions.io/gate-result/v1
41
- # is BLOCKED until DNSSEC + CAA records are verified on the namespace. The script
42
- # does NOT enforce this that is operator discipline. See bead `iel-4zr` in
43
- # intent-eval-platform/intent-eval-lab/.beads/.
40
+ # CISO gate (per DR-010 Q5 / ISEDC v1 Q1, 2026-05-10): pushing to a PUBLIC
41
+ # transparency log (Rekor) against the predicate URI
42
+ # https://evals.intentsolutions.io/gate-result/v1 is BLOCKED until DNSSEC + CAA
43
+ # records are verified on the namespace. This script ENFORCES that: when a
44
+ # production Rekor push is requested (--rekor-url / non-empty REKOR_URL), it runs
45
+ # scripts/dnssec-check.sh then scripts/caa-check.sh against the predicate
46
+ # namespace and REFUSES to sign (exit 4) if either fails. The gate is read-only —
47
+ # it anchors nothing and can only make signing MORE conservative.
48
+ #
49
+ # Opt-out (NON-PRODUCTION / staging ONLY): EVIDENCE_SKIP_DNS_PREFLIGHT=1 skips the
50
+ # pre-flight. It is honored ONLY when no production Rekor push is requested; a
51
+ # real Rekor push can NEVER be silently skipped.
44
52
 
45
53
  set -euo pipefail
46
54
 
55
+ # Bash version floor: these gates rely on bash 4+ features. Refuse early with a
56
+ # clear message on bash 3.x (e.g. macOS system bash) instead of failing later
57
+ # with a cryptic syntax error (jcgw).
58
+ [ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
59
+
47
60
  INPUT="-"
48
61
  OUTPUT=""
49
62
  SIGN=0
@@ -54,6 +67,9 @@ RUNNER_VERSION_OVERRIDE=""
54
67
  COMMIT_SHA_OVERRIDE=""
55
68
  PREDICATE_URI="https://evals.intentsolutions.io/gate-result/v1"
56
69
  STATEMENT_TYPE="https://in-toto.io/Statement/v1"
70
+ # The namespace whose DNSSEC + CAA posture gates production attestations. Derived
71
+ # from the predicate URI host; overridable for testing via EVIDENCE_PREDICATE_DOMAIN.
72
+ PREDICATE_DOMAIN="${EVIDENCE_PREDICATE_DOMAIN:-evals.intentsolutions.io}"
57
73
 
58
74
  while [[ $# -gt 0 ]]; do
59
75
  case "$1" in
@@ -175,16 +191,138 @@ if [[ -z "$STATEMENT" ]]; then
175
191
  exit 1
176
192
  fi
177
193
 
178
- # --- OTel event (best-effort no-op if collector absent) ---
179
- # Fire agent.rollout.gate.evaluated per intent-eval-lab/000-docs/001-DR-RFC-...md.
180
- # We emit a single OTLP-shaped JSON line to stderr when AUDIT_HARNESS_OTEL=1
181
- # OR an OTEL_EXPORTER_OTLP_ENDPOINT is set. Real exporter wiring is consumer-side;
182
- # we emit a structured signal that any collector can scrape via stderr capture.
194
+ # --- OTel events (best-effort no-op if collector absent) ---
195
+ # The gate-decision event fires per the NORMATIVE runtime event taxonomy
196
+ # intent-eval-lab/000-docs/067-AT-SPEC-runtime-event-taxonomy-2026-06-12.md § 2.2
197
+ # (GOVERNANCE events, `gate.*`):
198
+ #
199
+ # 1. agent.rollout.gate.evaluated — observability signal fired at the
200
+ # start/observation of a gate evaluation. NON-NORMATIVE: 067-AT-SPEC closes
201
+ # the `gate.*` category and does NOT define a gate-evaluated event, so this
202
+ # carries the legacy raw gate identity + result for collectors that already
203
+ # scrape it. It is NOT a 067-pinned name and a future taxonomy extension may
204
+ # retire or rename it; nothing should pin to it. The normative signal is (2).
205
+ # 2. gate.decision.emitted (iah-E07b) — fired at the END of the gate
206
+ # evaluation. This is the NORMATIVE name from 067-AT-SPEC § 2.2: "a
207
+ # RolloutGate decision row is emitted under gate-result/v1". Payload per
208
+ # § 2.2: gate.name (string), gate.decision (enum pass|fail|advisory|error),
209
+ # gate.policy_ref (string). This is the one a ship-gate dashboard alerts on.
210
+ #
211
+ # ATTRIBUTE-SPELLING AUTHORITY (do NOT redefine here): the canonical attribute
212
+ # names are pinned by the kernel at
213
+ # intent-eval-core/schemas/v1/otel-attributes.yaml — OTel-idiomatic dotted
214
+ # lowercase (e.g. gate.decision). We spell every attribute to match that file.
215
+ # 067-AT-SPEC § 2.2 is the EVENT-NAME authority for gate.decision.emitted and its
216
+ # payload schema; the gate.decision enum {pass, fail, advisory, error} is the
217
+ # closed gate-result/v1 verdict enum (Blueprint B § 7.4 / kernel gate-result
218
+ # schema) — NOT the RolloutGateDecision ship/no_ship vocabulary.
219
+ #
220
+ # We emit OTLP-shaped JSON lines to stderr when AUDIT_HARNESS_OTEL=1 OR an
221
+ # OTEL_EXPORTER_OTLP_ENDPOINT is set. Real exporter wiring is consumer-side; we
222
+ # emit a structured signal any collector can scrape via stderr capture. The path
223
+ # is fully best-effort: a collector being absent is the no-op default, and a
224
+ # python failure (||) degrades to an empty line that is simply not printed —
225
+ # the gate's own exit status is never affected by OTel emission (iah-E07c).
183
226
  if [[ "${AUDIT_HARNESS_OTEL:-0}" == "1" ]] || [[ -n "${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ]]; then
184
- GATE_ID=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('gate_id',''))" 2>/dev/null || echo "")
185
- RESULT=$(echo "$GATE_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('result',''))" 2>/dev/null || echo "")
186
- printf '[OTEL] {"name":"agent.rollout.gate.evaluated","attributes":{"gate.id":"%s","gate.result":"%s","gate.runner":"%s","gate.commit_sha":"%s"},"timestamp":"%s"}\n' \
187
- "$GATE_ID" "$RESULT" "$RUNNER" "$COMMIT_SHA" "$TIMESTAMP" >&2
227
+ # Compose the JSON via python so every attribute value is JSON-escaped.
228
+ # printf-interpolating gate_id/result/runner into a JSON format string
229
+ # emitted structurally invalid JSON whenever a value carried a double quote
230
+ # (e.g. AUDIT_HARNESS_SIDE='ci"injection' flowing into gate_id).
231
+ OTEL_LINES=$(GATE_JSON="$GATE_JSON" RUNNER="$RUNNER" COMMIT_SHA="$COMMIT_SHA" TIMESTAMP="$TIMESTAMP" \
232
+ python3 - <<'PY' 2>/dev/null || echo ""
233
+ import json, os
234
+ try:
235
+ gate = json.loads(os.environ["GATE_JSON"])
236
+ except (json.JSONDecodeError, ValueError):
237
+ gate = {}
238
+
239
+ runner = os.environ["RUNNER"]
240
+ commit_sha = os.environ["COMMIT_SHA"]
241
+ timestamp = os.environ["TIMESTAMP"]
242
+ gate_id = str(gate.get("gate_id", ""))
243
+ # The canonical gate-result/v1 verdict field is gate_decision (lowercase enum,
244
+ # Blueprint B § 7.4); the legacy draft envelope used `result` (UPPERCASE). Read
245
+ # the canonical field first, fall back to the legacy field.
246
+ gate_decision_raw = str(gate.get("gate_decision", gate.get("result", "")))
247
+
248
+ # gate.name / gate.policy_ref per 067-AT-SPEC § 2.2 payload schema. The canonical
249
+ # envelope carries gate_name (kebab-case) + policy_ref; fall back to gate_id /
250
+ # policy_hash for legacy draft envelopes that predate Blueprint B § 7.4.
251
+ gate_name = str(gate.get("gate_name", gate_id))
252
+ policy_ref = str(gate.get("policy_ref", gate.get("policy_hash", "")))
253
+
254
+ # Map the inbound verdict to the closed gate.decision enum {pass, fail,
255
+ # advisory, error} (gate-result/v1 / kernel gate-result schema). This is the
256
+ # 067-AT-SPEC § 2.2 enum — NOT the RolloutGateDecision ship/no_ship vocabulary.
257
+ # Canonical lowercase values pass straight through; legacy UPPERCASE results map
258
+ # down; an unrecognized/missing verdict is `error` (the gate could not affirm a
259
+ # decision — an error condition, not a clean `fail`).
260
+ _DECISION_MAP = {
261
+ "pass": "pass",
262
+ "fail": "fail",
263
+ "advisory": "advisory",
264
+ "error": "error",
265
+ }
266
+ decision = _DECISION_MAP.get(gate_decision_raw.strip().lower(), "error")
267
+ # An advisory_severity hint on a non-fail/non-error row signals an advisory row
268
+ # even when the legacy `result` field only said PASS.
269
+ if decision in ("pass",) and gate.get("advisory_severity"):
270
+ decision = "advisory"
271
+
272
+ reasons = []
273
+ if decision == "pass":
274
+ reasons.append(f"gate '{gate_id}' decision: pass")
275
+ else:
276
+ reasons.append(
277
+ f"gate '{gate_id}' decision: {decision} "
278
+ f"(verdict={gate_decision_raw or 'NO_VERDICT'})"
279
+ )
280
+ fm = gate.get("failure_mode")
281
+ if fm:
282
+ reasons.append(f"failure_mode: {fm}")
283
+
284
+ # Event 1: agent.rollout.gate.evaluated (NON-NORMATIVE observability signal;
285
+ # unchanged shape — not a 067-AT-SPEC-pinned name, see header note).
286
+ evaluated = {
287
+ "name": "agent.rollout.gate.evaluated",
288
+ "attributes": {
289
+ "gate.id": gate_id,
290
+ "gate.result": gate_decision_raw,
291
+ "gate.runner": runner,
292
+ "gate.commit_sha": commit_sha,
293
+ },
294
+ "timestamp": timestamp,
295
+ }
296
+
297
+ # Event 2: gate.decision.emitted (iah-E07b) — NORMATIVE per 067-AT-SPEC § 2.2.
298
+ # Payload: gate.name (string) + gate.decision (enum pass|fail|advisory|error) +
299
+ # gate.policy_ref (string). The reasons / runner / commit_sha are additive
300
+ # diagnostic attributes carried for dashboards; they do not contradict the
301
+ # § 2.2 required payload.
302
+ decision_event = {
303
+ "name": "gate.decision.emitted",
304
+ "attributes": {
305
+ "gate.name": gate_name,
306
+ "gate.decision": decision,
307
+ "gate.policy_ref": policy_ref,
308
+ "gate.id": gate_id,
309
+ "gate.reasons": reasons,
310
+ "gate.runner": runner,
311
+ "gate.commit_sha": commit_sha,
312
+ },
313
+ "timestamp": timestamp,
314
+ }
315
+
316
+ for ev in (evaluated, decision_event):
317
+ print(json.dumps(ev, separators=(",", ":")))
318
+ PY
319
+ )
320
+ # Print each emitted OTLP line with the [OTEL] marker the collector scrapes.
321
+ if [[ -n "$OTEL_LINES" ]]; then
322
+ while IFS= read -r _otel_line; do
323
+ [[ -n "$_otel_line" ]] && printf '[OTEL] %s\n' "$_otel_line" >&2
324
+ done <<< "$OTEL_LINES"
325
+ fi
188
326
  fi
189
327
 
190
328
  # --- Sign + emit ---
@@ -212,6 +350,40 @@ if ! command -v cosign >/dev/null 2>&1; then
212
350
  exit 2
213
351
  fi
214
352
 
353
+ # --- Production DNSSEC + CAA pre-flight gate (CISO binding DR-010 Q5) ----------
354
+ # A "production" signing event is one that pushes a signed Statement to a PUBLIC
355
+ # transparency log (Rekor) — i.e. REKOR_URL is non-empty. Before that irreversible
356
+ # anchor, the predicate namespace MUST be DNSSEC-signed AND CAA-pinned. We run the
357
+ # two read-only checks; if EITHER fails we REFUSE to sign and exit 4.
358
+ #
359
+ # The opt-out EVIDENCE_SKIP_DNS_PREFLIGHT=1 is honored ONLY for non-production
360
+ # (no Rekor push). A real Rekor push can never be silently skipped.
361
+ if [[ -n "$REKOR_URL" ]]; then
362
+ PREFLIGHT_DIR="$(cd "$(dirname "$0")" && pwd)"
363
+ if [[ "${EVIDENCE_SKIP_DNS_PREFLIGHT:-0}" == "1" ]]; then
364
+ echo "emit-evidence: IGNORING EVIDENCE_SKIP_DNS_PREFLIGHT=1 — a Rekor push (REKOR_URL=$REKOR_URL) is a production attestation and CANNOT skip the DNSSEC/CAA pre-flight." >&2
365
+ fi
366
+ echo "emit-evidence: production Rekor push requested — running DNSSEC + CAA pre-flight on '$PREDICATE_DOMAIN'" >&2
367
+
368
+ if ! bash "$PREFLIGHT_DIR/dnssec-check.sh" "$PREDICATE_DOMAIN" >&2; then
369
+ echo "emit-evidence: REFUSING TO SIGN — DNSSEC pre-flight FAILED for '$PREDICATE_DOMAIN'." >&2
370
+ echo "emit-evidence: remediation: pin DNSSEC + CAA on $PREDICATE_DOMAIN before any production attestation." >&2
371
+ echo "emit-evidence: see intent-eval-platform/intent-eval-lab/000-docs (DR-010 Q5 CISO binding) + the iah-E06 runbook." >&2
372
+ exit 4
373
+ fi
374
+ if ! bash "$PREFLIGHT_DIR/caa-check.sh" "$PREDICATE_DOMAIN" >&2; then
375
+ echo "emit-evidence: REFUSING TO SIGN — CAA pre-flight FAILED for '$PREDICATE_DOMAIN'." >&2
376
+ echo "emit-evidence: remediation: pin DNSSEC + CAA on $PREDICATE_DOMAIN before any production attestation." >&2
377
+ echo "emit-evidence: set EXPECTED_CAA_ISSUER to the published CA, then publish a CAA record pinning it." >&2
378
+ exit 4
379
+ fi
380
+ echo "emit-evidence: DNSSEC + CAA pre-flight PASSED for '$PREDICATE_DOMAIN' — proceeding to sign." >&2
381
+ elif [[ "${EVIDENCE_SKIP_DNS_PREFLIGHT:-0}" == "1" ]]; then
382
+ # Non-production sign (no Rekor push) with the explicit opt-out set: keep
383
+ # existing staging flows green without running the network-bound checks.
384
+ echo "emit-evidence: non-production sign (no Rekor push); DNSSEC/CAA pre-flight skipped per EVIDENCE_SKIP_DNS_PREFLIGHT=1." >&2
385
+ fi
386
+
215
387
  # Stage the Statement to a temp file for cosign to consume
216
388
  TMP=$(mktemp -d)
217
389
  trap 'rm -rf "$TMP"' EXIT
@@ -28,6 +28,23 @@
28
28
 
29
29
  set -euo pipefail
30
30
 
31
+ # Bash version floor: these gates rely on bash 4+ features. Refuse early with a
32
+ # clear message on bash 3.x (e.g. macOS system bash) instead of failing later
33
+ # with a cryptic syntax error (jcgw).
34
+ [ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
35
+
36
+ # Cross-platform SHA-256: `sha256sum` ships with GNU coreutils (Linux);
37
+ # macOS only has `shasum -a 256`. Both produce identical `<hash> <file>`
38
+ # output, so downstream awk parsing is unchanged. Mirrors harness-hash.sh.
39
+ if command -v sha256sum >/dev/null 2>&1; then
40
+ SHA256_CMD=(sha256sum)
41
+ elif command -v shasum >/dev/null 2>&1; then
42
+ SHA256_CMD=(shasum -a 256)
43
+ else
44
+ echo "escape-scan: neither sha256sum nor shasum found in PATH" >&2
45
+ exit 2
46
+ fi
47
+
31
48
  DIFF_SRC=""
32
49
  VERIFY_HASH=1
33
50
  JSON_OUT=0
@@ -51,7 +68,15 @@ if [[ "$#" -eq 0 ]]; then
51
68
  fi
52
69
 
53
70
  case "$1" in
54
- -) DIFF_SRC="/dev/stdin" ;;
71
+ -)
72
+ # Buffer stdin into a temp file so the diff can be read multiple times.
73
+ # /dev/stdin is drained by the first grep, which would leave later reads
74
+ # (notably the input_hash sha256) seeing an empty stream — emitting the
75
+ # SHA-256 of "" instead of the real diff hash.
76
+ DIFF_SRC=$(mktemp)
77
+ trap 'rm -f "$DIFF_SRC"' EXIT
78
+ cat > "$DIFF_SRC"
79
+ ;;
55
80
  --staged)
56
81
  DIFF_SRC=$(mktemp)
57
82
  trap 'rm -f "$DIFF_SRC"' EXIT
@@ -198,10 +223,10 @@ if [[ "$JSON_OUT" -eq 1 ]]; then
198
223
  elif [[ "$FLAG" -gt 0 ]]; then
199
224
  result="ADVISORY"
200
225
  fi
201
- input_hash=$(sha256sum "$DIFF_SRC" | awk '{print "sha256:"$1}')
226
+ input_hash=$("${SHA256_CMD[@]}" "$DIFF_SRC" | awk '{print "sha256:"$1}')
202
227
  policy_hash="sha256:0000000000000000000000000000000000000000000000000000000000000000"
203
228
  if [[ -f "$TESTING_MD" ]]; then
204
- policy_hash=$(sha256sum "$TESTING_MD" | awk '{print "sha256:"$1}')
229
+ policy_hash=$("${SHA256_CMD[@]}" "$TESTING_MD" | awk '{print "sha256:"$1}')
205
230
  fi
206
231
  printf '{"gate_id":"audit-harness:%s:escape-scan","result":"%s","input_hash":"%s","policy_hash":"%s","metadata":{"refuse":%d,"challenge":%d,"flag":%d,"coverage_line_floor":%d,"coverage_branch_floor":%d,"mutation_floor":%d}' \
207
232
  "${AUDIT_HARNESS_SIDE:-ci}" "$result" "$input_hash" "$policy_hash" "$REFUSE" "$CHALLENGE" "$FLAG" \
@@ -13,6 +13,11 @@
13
13
 
14
14
  set -euo pipefail
15
15
 
16
+ # Bash version floor: these gates rely on bash 4+ features. Refuse early with a
17
+ # clear message on bash 3.x (e.g. macOS system bash) instead of failing later
18
+ # with a cryptic syntax error (jcgw).
19
+ [ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
20
+
16
21
  PATH_ARG="features/"
17
22
  STRICT=0
18
23
  JSON_OUT=0
@@ -23,6 +23,11 @@
23
23
 
24
24
  set -euo pipefail
25
25
 
26
+ # Bash version floor: these gates rely on bash 4+ features. Refuse early with a
27
+ # clear message on bash 3.x (e.g. macOS system bash) instead of failing later
28
+ # with a cryptic syntax error (jcgw).
29
+ [ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 3; }
30
+
26
31
  # Cross-platform SHA-256: `sha256sum` ships with GNU coreutils (Linux);
27
32
  # macOS only has `shasum -a 256`. Both produce identical `<hash> <file>`
28
33
  # output, so downstream awk parsing is unchanged.