@blamejs/exceptd-skills 0.12.28 → 0.12.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,806 @@
1
+ #!/usr/bin/env node
2
+ // One-shot backfill of theater_test field for data/framework-control-gaps.json.
3
+ // Hard Rule #6: every compliance-framework finding includes a specific test
4
+ // that distinguishes paper compliance from actual security.
5
+ //
6
+ // Per-entry tests are authored against the entry's framework + control_name +
7
+ // real_requirement so each one discriminates the named framework's paper
8
+ // language from the named real-world threat.
9
+
10
+ const fs = require('fs');
11
+ const path = require('path');
12
+
13
+ const CATALOG_PATH = path.resolve(__dirname, '..', 'data', 'framework-control-gaps.json');
14
+
15
+ const PAPER = 'compliance-theater';
16
+
17
+ // Map of entry-key → theater_test. Hand-authored, grouped by framework family
18
+ // so the discriminating test fits the language an auditor for THAT framework
19
+ // uses. Where two entries share the same audit pattern (e.g. several NIST
20
+ // 800-53 SI-* controls), the tests are similar in shape but worded against
21
+ // the specific control text — never literally copy-pasted.
22
+ const TESTS = {
23
+ // ---------------------------------------------------------------------
24
+ // Universal / cross-framework AI gaps
25
+ // ---------------------------------------------------------------------
26
+ 'ALL-AI-PIPELINE-INTEGRITY': {
27
+ claim: "We monitor our AI providers for security and treat model updates like any other vendor change.",
28
+ test: "Pull the change-control register for the last 4 quarters; filter for entries where the affected asset is an externally hosted LLM, embedding model, or AI provider API. Count how many record (a) the model version pinned at the time, (b) a behavioural regression suite executed against the new version, and (c) the provider changelog reviewed with sign-off. Theater verdict if fewer than 90% of provider-side model updates produced an in-scope change-control entry, or if any sampled entry lacks a regression-suite artifact.",
29
+ evidence_required: ["change-control register CSV export filtered to AI/ML assets", "behavioural regression test results bundle keyed to provider model versions", "provider changelog review log with reviewer identity + timestamp"],
30
+ verdict_when_failed: PAPER
31
+ },
32
+ 'ALL-MCP-TOOL-TRUST': {
33
+ claim: "Developer tooling is governed; AI plugins are no different from any other dev dependency.",
34
+ test: "Scan every developer endpoint and CI runner for installed MCP server manifests (.claude/, .cursor/, .vscode/, ~/.codeium/, etc.). For each discovered MCP server, attempt to verify a publisher signature, locate it in an organisational allowlist, and trace its tool-grant prompt history. Theater verdict if any endpoint has an MCP server that is unsigned, absent from the allowlist, or has tool grants that bypassed user prompting.",
35
+ evidence_required: ["endpoint-scan output enumerating MCP server manifests with hashes", "organisational MCP allowlist (or evidence one does not exist)", "tool-grant audit log for one randomly selected developer over 30 days"],
36
+ verdict_when_failed: PAPER
37
+ },
38
+ 'ALL-PROMPT-INJECTION-ACCESS-CONTROL': {
39
+ claim: "Our IAM controls cover all actions taken in our environment, including those by AI agents.",
40
+ test: "Review the audit log for the past 30 days of any AI-agent service account. Sample 10 actions taken by the agent; for each, identify whether the action was the result of (a) an end-user request that the agent fulfilled within scope, or (b) content from a third-party data source (web page, document, RAG corpus) that influenced the action. Theater verdict if any sampled action originated from third-party content without per-action user re-authorization, or if the audit log does not preserve the prompt input chain for forensic reconstruction.",
41
+ evidence_required: ["AI agent service account audit log 30d", "prompt input chain (system prompt + user prompt + tool results) for sampled actions", "policy text defining prompt-level scope for each agent role"],
42
+ verdict_when_failed: PAPER
43
+ },
44
+
45
+ // ---------------------------------------------------------------------
46
+ // Australian frameworks (Essential 8, ISM)
47
+ // ---------------------------------------------------------------------
48
+ 'AU-Essential-8-App-Hardening': {
49
+ claim: "We hardened user applications per Essential Eight Maturity Level 2; browsers and Office are locked down.",
50
+ test: "Take the operator's hardened-application list. Confirm whether it enumerates AI coding assistants (Copilot, Cursor, Claude Code, Windsurf), MCP servers, and AI-tool config files (.claude/settings.json, .cursor/mcp.json, .vscode/settings.json:chat.tools.autoApprove) as in-scope. Pick a developer endpoint at random; verify those config files are integrity-monitored with the same alerting profile as security-sensitive files. Theater verdict if AI assistants are absent from the hardened-application list or if a config-file modification on the sampled endpoint would not generate an integrity alert.",
51
+ evidence_required: ["hardened-application policy document with version date", "FIM/HIDS configuration showing watch list", "test-induced modification on a non-production endpoint to confirm alert fires"],
52
+ verdict_when_failed: PAPER
53
+ },
54
+ 'AU-Essential-8-Backup': {
55
+ claim: "Daily backups with off-network retention satisfy Essential Eight Maturity Level 2 Strategy 8.",
56
+ test: "From the latest backup catalogue, confirm presence of fine-tuned model weights, RAG corpora, and AI tool configuration files (.claude/settings.json, MCP server registry). Restore one RAG corpus to an isolated environment; per-document-hash compare to current production. Theater verdict if AI artefacts are absent from the catalogue, or if any document hash diverges from production without a documented authoring event explaining the divergence.",
57
+ evidence_required: ["backup catalogue manifest", "test-restore log for one RAG corpus", "per-document hash diff between restored and production corpus"],
58
+ verdict_when_failed: PAPER
59
+ },
60
+ 'AU-Essential-8-MFA': {
61
+ claim: "MFA is enforced on all administrative identities per Essential Eight ML2 with phishing-resistant factors.",
62
+ test: "Sample 10 admin identities; for each, confirm the registered authenticator class is FIDO2/WebAuthn-bound (not SMS, voice, or TOTP). Then enumerate AI-provider service credentials (OpenAI, Anthropic, HuggingFace API tokens) used by the same admin scope; check token age and rotation policy. Theater verdict if any sampled human admin uses SMS/voice, or if any AI-provider credential has no rotation policy or is older than 90 days.",
63
+ evidence_required: ["IdP authenticator export for sampled admins", "AI-provider credential inventory with creation/rotation timestamps", "documented credential rotation policy"],
64
+ verdict_when_failed: PAPER
65
+ },
66
+ 'AU-Essential-8-Patch': {
67
+ claim: "We patch operating systems within the Essential Eight ML3 48-hour window for critical exploits.",
68
+ test: "Pull the last 5 CISA KEV listings affecting an OS in scope. For each, measure elapsed time from KEV listing date to deployed-on-fleet-percentage >=95%. For one host that cannot accept a reboot in the window, confirm a live-patching capability is provisioned and was used. Theater verdict if any sampled KEV listing exceeded 48h to 95% fleet coverage, or if any 'cannot reboot' host lacks a live-patching pathway.",
69
+ evidence_required: ["patch-deployment telemetry timestamped against KEV listing dates", "live-patch agent inventory with last-applied-patch evidence", "fleet coverage rollup per CVE"],
70
+ verdict_when_failed: PAPER
71
+ },
72
+
73
+ // ---------------------------------------------------------------------
74
+ // CIS Controls
75
+ // ---------------------------------------------------------------------
76
+ 'CIS-Controls-v8-Control7': {
77
+ claim: "We meet CIS Control 7 IG3 by remediating critical vulnerabilities within one month.",
78
+ test: "Pull the vulnerability register for the past 12 months. Filter for CVEs that appeared on CISA KEV with public PoC during the period. For each, measure (a) time from KEV listing to verified mitigation, and (b) whether the mitigation was a live patch, configuration change, or isolation. Theater verdict if any KEV+PoC entry exceeded 4h to verified mitigation or if 'monthly cadence' was applied to a KEV-listed CVE.",
79
+ evidence_required: ["vuln-management register CSV export with timestamped state transitions", "KEV listing dates per CVE", "mitigation evidence (patch deployment log, config change ticket, isolation network ACL)"],
80
+ verdict_when_failed: PAPER
81
+ },
82
+
83
+ // ---------------------------------------------------------------------
84
+ // CMMC / FedRAMP
85
+ // ---------------------------------------------------------------------
86
+ 'CMMC-2.0-Level-2': {
87
+ claim: "We are CMMC Level 2 attested across all 110 NIST 800-171 controls; CUI is protected end-to-end.",
88
+ test: "Walk the 3.4.1 (CM) asset inventory and check for AI assistants and MCP servers with CUI-adjacent access. Then inspect 3.13 system-and-communications protections to confirm AI-API egress is enumerated as a CUI exfiltration channel with monitoring. Theater verdict if AI assistants are absent from the asset inventory, or if AI-API egress at the CUI boundary has no monitoring rule, or if cross-walks to UK DEF STAN / AU DISP for joint programmes are missing.",
89
+ evidence_required: ["3.4.1 asset inventory export filtered to AI/ML and MCP entries", "egress monitoring rule export for AI-API destinations", "cross-walk document for joint programmes (if any)"],
90
+ verdict_when_failed: PAPER
91
+ },
92
+ 'FedRAMP-Rev5-Moderate': {
93
+ claim: "All cloud services in our boundary are FedRAMP Moderate authorised; AI services are covered.",
94
+ test: "Enumerate every AI/ML service consumed within the authorisation boundary. For each, locate either (a) a FedRAMP Moderate ATO letter, (b) a documented exception with risk acceptance signed by the AO, or (c) an equivalence path (StateRAMP, FedRAMP Tailored, etc.). Verify the SSP includes shared-responsibility language covering prompt data, output data, training opt-out, and retention. Theater verdict if any AI service is in use without one of (a)-(c), or if the SSP shared-responsibility matrix lacks AI-specific clauses.",
95
+ evidence_required: ["AI service inventory keyed to FedRAMP marketplace IDs", "AO-signed risk acceptance for non-authorised AI services", "SSP excerpts showing AI shared-responsibility language"],
96
+ verdict_when_failed: PAPER
97
+ },
98
+
99
+ // ---------------------------------------------------------------------
100
+ // CWE / SBOM standards
101
+ // ---------------------------------------------------------------------
102
+ 'CWE-Top-25-2024-meta': {
103
+ claim: "Our SAST/DAST coverage maps to the CWE Top 25; we test for the most dangerous weaknesses.",
104
+ test: "Pull the SAST/DAST rule pack and enumerate which CWE IDs each rule targets. Confirm rules exist for AI-specific CWE classes (CWE-1039 model integrity, CWE-1395 dependency on vulnerable third-party component, prompt-injection class CWEs). Run the rule pack against a known-vulnerable test fixture containing prompt-injection patterns. Theater verdict if AI-relevant CWE IDs are absent from the rule pack, or if the fixture run produces zero findings on the planted prompt-injection.",
105
+ evidence_required: ["SAST/DAST rule-to-CWE mapping export", "test fixture with planted prompt-injection patterns", "scan report against the fixture"],
106
+ verdict_when_failed: PAPER
107
+ },
108
+ 'CycloneDX-v1.6-SBOM': {
109
+ claim: "We ship a CycloneDX 1.6 SBOM with every release; supply-chain transparency is satisfied.",
110
+ test: "Pull the SBOM for the most recent release. Confirm presence of an `mlComponent` (or equivalent ML-BOM) section enumerating model + adapters + tokenizer. Check provenance fields (signature, supplier, training data source) for empty values. Confirm MCP servers in the build environment are reflected. Theater verdict if ML components are absent, or if more than 20% of components have an empty provenance field.",
111
+ evidence_required: ["latest CycloneDX 1.6 SBOM JSON", "ML-BOM section specifically", "MCP server manifest from build environment"],
112
+ verdict_when_failed: PAPER
113
+ },
114
+ 'SPDX-v3.0-SBOM': {
115
+ claim: "We publish SPDX 3.0 SBOMs and they include AI-BOM coverage per the AI profile.",
116
+ test: "Pull the SPDX 3.0 document for the most recent release. Confirm the `Build` profile and `AI` profile are both declared. Inspect AI-profile sections for populated `useSensitivePersonalInformation`, `safetyRiskAssessment`, `modelDataPreprocessing`, and training-data fields. Cross-walk SPDX AI-BOM identifiers against CycloneDX ML-BOM identifiers to confirm consistency. Theater verdict if the AI profile is declared but key fields are empty, or if SPDX↔CycloneDX cross-walk produces conflicting model identities.",
117
+ evidence_required: ["latest SPDX 3.0 document with profile declarations", "AI-profile field-population coverage report", "SPDX↔CycloneDX cross-walk mapping"],
118
+ verdict_when_failed: PAPER
119
+ },
120
+
121
+ // ---------------------------------------------------------------------
122
+ // EU DORA family
123
+ // ---------------------------------------------------------------------
124
+ 'DORA-Art28': {
125
+ claim: "Our DORA Art. 28 ICT third-party register covers all critical or important function dependencies.",
126
+ test: "From the Art. 28 register, sample 5 third-party ICT services consumed in CIF (critical or important function) flows. For each, verify presence of build-provenance metadata (SLSA producer identifier, workflow file hash, cache key surface). Check for monthly producer-side cache verification evidence. Theater verdict if any sampled CIF dependency lacks build-provenance metadata, or if cache verification has not run in the last 90 days.",
127
+ evidence_required: ["Art. 28 register export with provenance fields", "monthly cache-verification job logs", "SLSA attestations from sampled producers"],
128
+ verdict_when_failed: PAPER
129
+ },
130
+ 'DORA-RTS-Subcontracting': {
131
+ claim: "Our DORA RTS subcontracting register lists every sub-processor for ICT services supporting CIF.",
132
+ test: "Pull the subcontracting register. Confirm enumeration of AI sub-processors per ICT service line: model providers, embedding providers, vector stores, RAG corpus hosts, MCP server providers. Compute foundation-model concentration (% of CIF flows that share a single foundation model). Theater verdict if AI sub-processors are absent from any service line that consumes AI, or if foundation-model concentration is undocumented.",
133
+ evidence_required: ["subcontracting register export with AI sub-processor entries", "foundation-model concentration analysis report", "exit-strategy evidence per critical AI sub-processor"],
134
+ verdict_when_failed: PAPER
135
+ },
136
+ 'DORA-ITS-TLPT': {
137
+ claim: "Our most recent threat-led penetration test under DORA Art. 26 covered the full CIF estate.",
138
+ test: "Pull the TLPT scoping template and final report. Confirm AI/MCP assets are enumerated in scope. Verify the threat-intel inputs cite ATLAS TTPs and AI-discovered CVE classes. Confirm the TLPT team includes documented AI/MCP competency. Inspect the report for at least one finding originating from an AI/MCP attack path. Theater verdict if the scoping template excludes AI/MCP assets despite their presence in CIF flows, or if the team lacks documented AI competency.",
139
+ evidence_required: ["TLPT scoping template", "TLPT final report with AI/MCP findings section", "TLPT team CVs covering AI/MCP red-team experience"],
140
+ verdict_when_failed: PAPER
141
+ },
142
+ 'DORA-RTS-Incident-Classification': {
143
+ claim: "Our incident-classification process implements the DORA RTS criteria for major ICT incidents.",
144
+ test: "Pull the incident register for the last 12 months. For each major-classified incident, confirm presence of qualitative criteria evaluation. Then ask whether AI-incident classes (model invocations on injected intent, RAG corpus integrity loss, agent actions outside scope) would surface a major classification under the current criteria. Theater verdict if AI-class quantitative measures are absent, or if a synthetic AI-incident scenario evaluated against current criteria fails to trigger major classification when impact warrants it.",
145
+ evidence_required: ["incident register CSV with classification rationale per entry", "RTS criteria mapping document", "synthetic AI-incident classification dry-run record"],
146
+ verdict_when_failed: PAPER
147
+ },
148
+ 'DORA-IA-CTPP-Oversight': {
149
+ claim: "We track designated critical third-party providers (CTPPs) per DORA Art. 31-44.",
150
+ test: "Pull the CTPP designation list. Confirm whether frontier-AI providers and MCP/agent-runtime providers consumed by the entity appear or have a documented evaluation against designation criteria. Check Lead Overseer audit deliverables for AI-specific artefacts (model cards, system cards, eval results, training data manifests). Theater verdict if AI providers consumed at scale are absent without an evaluation record, or if Lead Overseer artefacts lack AI-specific content.",
151
+ evidence_required: ["CTPP designation list with evaluation rationale", "Lead Overseer engagement record with deliverable list", "AI-provider concentration analysis"],
152
+ verdict_when_failed: PAPER
153
+ },
154
+ 'DORA-Art-19-IdP-4h': {
155
+ claim: "We can meet the DORA Art. 19 4-hour major-ICT-incident notification clock for IdP compromise.",
156
+ test: "Run a tabletop: at T0 a SIEM alert fires for IdP token-signing certificate rotation by an unrecognised principal. Stopwatch the elapsed time from T0 to a draft notification ready for the Competent Authority covering scope, root cause hypothesis, impacted services, and recovery posture. Theater verdict if elapsed time exceeds 4h, or if the playbook does not name the on-call who initiates the clock, or if the tabletop has not been run in the last 12 months.",
157
+ evidence_required: ["tabletop execution log with stopwatch timestamps", "DORA notification draft produced under exercise", "on-call rota covering 24/7 IdP-incident response"],
158
+ verdict_when_failed: PAPER
159
+ },
160
+ 'DORA-Art-21-Telecom-ICT': {
161
+ claim: "Our telecom ICT third-party arrangements satisfy DORA Art. 21.",
162
+ test: "Pull the Art. 21 ICT register; filter for telecom-class providers (carriers, MVNOs, SMS gateways, voice carriers). Confirm enumeration of LI-gateway access risk, signaling-protocol exposure (SS7/Diameter/HTTP/2 for 5G), and sub-carrier visibility into CIF flows. Theater verdict if telecom providers appear only as 'connectivity vendors' without carrier-class threat-model entries, or if no concentration analysis exists across telecom providers.",
163
+ evidence_required: ["Art. 21 ICT register telecom subset", "carrier-class threat-model document", "concentration analysis report"],
164
+ verdict_when_failed: PAPER
165
+ },
166
+
167
+ // ---------------------------------------------------------------------
168
+ // EU AI Act
169
+ // ---------------------------------------------------------------------
170
+ 'EU-AI-Act-Art-15': {
171
+ claim: "Our high-risk AI system meets the EU AI Act Art. 15 'appropriate level of cybersecurity'.",
172
+ test: "Request the cybersecurity test pack. Confirm presence of (a) prompt-injection red-team results bound to OWASP LLM Top 10, (b) RAG-corpus integrity test results, (c) model-extraction-resistance assessment, (d) MCP/plugin trust verification log. Then check incident-reporting bridge to NIS2 + DORA. Theater verdict if any of (a)-(d) are absent or older than 12 months, or if the bridge to NIS2/DORA notification clocks is undocumented.",
173
+ evidence_required: ["adversarial test pack covering OWASP LLM Top 10", "RAG corpus integrity test report", "incident-reporting playbook with NIS2/DORA bridge"],
174
+ verdict_when_failed: PAPER
175
+ },
176
+ 'EU-AI-Act-Art-53-GPAI': {
177
+ claim: "We comply with EU AI Act Art. 53 GPAI provider obligations including training-data summary publication.",
178
+ test: "Pull the published training-data summary. Confirm machine-readable corpus-level granularity sufficient for copyright audit (per-corpus identifier + size + collection method + opt-out evidence). Walk downstream-provider documentation; confirm signed bindings to a production model fingerprint. Theater verdict if the summary is prose-only without machine-readable structure, or if downstream docs reference an unsigned/floating model identity.",
179
+ evidence_required: ["machine-readable training-data summary file (YAML/JSON)", "downstream documentation bundle with signed model fingerprint", "per-corpus copyright-policy attestations"],
180
+ verdict_when_failed: PAPER
181
+ },
182
+ 'EU-AI-Act-Art-55-Systemic': {
183
+ claim: "Our GPAI model with systemic risk meets the additional Art. 55 obligations.",
184
+ test: "Pull the adversarial-evaluation report. Confirm coverage of OWASP LLM Top 10 + ATLAS TTPs + MCP-trust scenarios. Pull the energy report; confirm kWh-per-million-tokens and training compute under ISO/IEC TR 24028 framing. Cross-walk the incident-reporting clock with DORA Art. 19 timing. Theater verdict if the eval omits any of OWASP/ATLAS/MCP coverage, if energy reporting is qualitative only, or if the incident-clock cross-walk is missing.",
185
+ evidence_required: ["adversarial eval report with method per attack class", "energy reporting per ISO/IEC TR 24028", "incident-clock cross-walk to DORA"],
186
+ verdict_when_failed: PAPER
187
+ },
188
+ 'EU-AI-Act-Annex-IX-Conformity': {
189
+ claim: "Our high-risk AI system passed conformity assessment per Annex IX.",
190
+ test: "If internal-control route was used: request the third-party sample audit (e.g. AI-Office annual sampling) outcome. If notified-body route: request the body's scope letter and confirm AI-specific competency. For both, confirm an operational definition of 'substantial modification' covers fine-tuning, RAG changes, and system-prompt changes — and that a recent change was assessed against it. Theater verdict if the sampling/notified-body record is absent, or if substantial-modification gating has never fired despite a known fine-tune or RAG change.",
191
+ evidence_required: ["internal-control attestation + sampling outcome OR notified-body scope letter", "substantial-modification policy document", "change log showing modifications assessed against the policy"],
192
+ verdict_when_failed: PAPER
193
+ },
194
+ 'EU-AI-Act-GPAI-CoP': {
195
+ claim: "We follow the GPAI Code of Practice as our presumed-compliance route for Art. 53/55.",
196
+ test: "Confirm signatory status. Pull the AI Office's published enforcement-deference position for code-conformant signatories. For each evidentiary commitment in the Code, locate the artefact (training-data summary, eval report, downstream-distributor list, energy report) and confirm it is current. Theater verdict if signatory but any required Code artefact is missing or older than the Code's refresh cadence.",
197
+ evidence_required: ["Code-of-Practice signatory confirmation", "evidentiary artefact bundle keyed to Code commitments", "AI Office enforcement-deference reference"],
198
+ verdict_when_failed: PAPER
199
+ },
200
+
201
+ // ---------------------------------------------------------------------
202
+ // EU CRA
203
+ // ---------------------------------------------------------------------
204
+ 'EU-CRA-Art13': {
205
+ claim: "We satisfy EU CRA Art. 13 essential cybersecurity requirements with technical documentation on file.",
206
+ test: "Request the canonical build-pipeline definition for the most recent release. Confirm publication alongside the release artifact (workflow file hash, runner attestation, secrets scope). Pick the release-being-installed at a downstream operator; verify its build pipeline matches the published definition by comparing producer-side hashes. Confirm the incident-notification clock starts from FIRST awareness (not from confirmed exploit). Theater verdict if pipeline definitions are unpublished, hashes diverge, or the clock policy starts later than first awareness.",
207
+ evidence_required: ["published build-pipeline definition with hashes", "downstream-side hash verification log", "incident-notification policy document"],
208
+ verdict_when_failed: PAPER
209
+ },
210
+
211
+ // ---------------------------------------------------------------------
212
+ // HIPAA
213
+ // ---------------------------------------------------------------------
214
+ 'HIPAA-Security-Rule-164.312(a)(1)': {
215
+ claim: "We meet HIPAA 164.312(a)(1) access controls; PHI is access-controlled with unique user IDs.",
216
+ test: "Inventory AI providers in use; for each consuming PHI, locate a BAA covering prompt retention + training opt-out + breach notification within HIPAA timelines. Inspect prompt-flow telemetry for PHI; confirm DLP minimisation runs pre-egress. Confirm AI agent sessions have controls separate from human user controls. Theater verdict if any AI provider consuming PHI lacks a BAA, if DLP is absent on prompt egress, or if AI agent sessions inherit human controls without separation.",
217
+ evidence_required: ["AI-provider BAA bundle", "DLP rule export for prompt egress", "agent-session control configuration"],
218
+ verdict_when_failed: PAPER
219
+ },
220
+ 'HIPAA-Security-Rule-2026-NPRM-164.308': {
221
+ claim: "Our administrative safeguards meet the HIPAA Security Rule including 2026 NPRM updates.",
222
+ test: "Walk the technology-asset register; confirm AI assistants and model-API providers are enumerated as asset categories. Pull the network map; confirm AI-API egress routes are marked with BAA and training-opt-out attestation. Confirm the tabletop catalogue contains at least one AI-specific PHI loss scenario exercised in the past 12 months. Theater verdict if AI assets are absent, network-map AI routes lack attestations, or the tabletop catalogue has no AI scenario.",
223
+ evidence_required: ["technology-asset register with AI categories", "network map with AI-API egress annotations", "tabletop exercise catalogue with execution dates"],
224
+ verdict_when_failed: PAPER
225
+ },
226
+ 'HIPAA-Security-Rule-2026-NPRM-164.310': {
227
+ claim: "Our physical safeguards meet HIPAA 164.310 including network-access logging in the 2026 NPRM.",
228
+ test: "Sample developer endpoints with PHI exposure. Confirm AI-API session logging is captured under the network-access-logging mandate (timestamp, user, prompt hash, response hash, destination provider). Confirm media-disposal verification extends to AI training-data opt-out attestation per provider. Theater verdict if AI-API sessions are unlogged, or if any departed user retained AI provider credentials past their termination date.",
229
+ evidence_required: ["AI-API session log sample for sampled endpoints", "training-data opt-out attestation per AI provider", "departed-user credential-revocation evidence"],
230
+ verdict_when_failed: PAPER
231
+ },
232
+ 'HIPAA-Security-Rule-2026-NPRM-164.312': {
233
+ claim: "Our technical safeguards meet HIPAA 164.312 including the 2026 NPRM expansions.",
234
+ test: "Pick 5 AI-agent flows that touch PHI. For each, confirm a per-action MFA-equivalent (delegated-authority attestation) is captured. Inspect storage of AI-provider artifacts (conversation history, embeddings, fine-tune sets) for encryption-at-rest. Confirm prompt-injection and RAG-poisoning detection rules exist as anti-malware-equivalents. Theater verdict if per-action attestations are absent, AI artifacts are stored unencrypted, or no prompt-injection/RAG-poisoning detection rules exist.",
235
+ evidence_required: ["delegated-authority attestation samples", "encryption-at-rest configuration for AI artifacts", "prompt-injection / RAG-poisoning detection rule export"],
236
+ verdict_when_failed: PAPER
237
+ },
238
+ 'HIPAA-Security-Rule-2026-NPRM-164.314': {
239
+ claim: "Our BAAs satisfy HIPAA 164.314 organisational requirements including 2026 NPRM AI provisions.",
240
+ test: "Pull the AI-provider BAA portfolio. Confirm each contract covers (a) prompt retention policy with explicit duration, (b) training opt-out with attestation evidence, (c) breach-notification timeline aligned with HIPAA, (d) sub-processor disclosure. Theater verdict if any AI provider's BAA is silent on prompt retention, training opt-out, or sub-processors, or if 'training opt-out' is contractual without an evidence path.",
241
+ evidence_required: ["AI-provider BAA portfolio with clause-by-clause checklist", "training-opt-out attestation evidence per provider", "sub-processor disclosure inventories"],
242
+ verdict_when_failed: PAPER
243
+ },
244
+
245
+ // ---------------------------------------------------------------------
246
+ // HITRUST
247
+ // ---------------------------------------------------------------------
248
+ 'HITRUST-CSF-v11.4-09.l': {
249
+ claim: "We meet HITRUST CSF 09.l outsourced services management for all third-party providers.",
250
+ test: "Pull the third-party register. Filter for AI providers; confirm AI vendors are inventoried separately from general SaaS. Spot-check 5 AI vendors for AI-specific contractual clauses (prompt retention, training opt-out, residency, model version pinning, prompt-breach notification). Search for self-signup AI usage on developer endpoints; confirm a policy prohibits it for in-scope data. Theater verdict if AI is bucketed inside generic SaaS, if any sampled AI vendor lacks AI-specific clauses, or if self-signup AI is in evidence on a developer endpoint that touches in-scope data.",
251
+ evidence_required: ["third-party register with AI subset", "AI-specific contract clause checklist per vendor", "endpoint scan for self-signup AI tools"],
252
+ verdict_when_failed: PAPER
253
+ },
254
+
255
+ // ---------------------------------------------------------------------
256
+ // IEC 62443 / NIST 800-82 / NERC CIP — OT / ICS
257
+ // ---------------------------------------------------------------------
258
+ 'IEC-62443-3-3': {
259
+ claim: "Our IACS architecture meets IEC 62443-3-3 system security requirements.",
260
+ test: "Inspect the zone-and-conduit diagram. Confirm AI operator assistants and AI-API egress paths from the corporate-to-OT boundary are enumerated as conduits with documented security levels. Sample 3 OT operator workstations; confirm any installed AI assistants are inventoried and that prompt-injection-class threats appear in the threat model. Theater verdict if AI conduits are absent from the zone diagram, or if AI assistants on OT operator workstations are not threat-modelled.",
261
+ evidence_required: ["zone-and-conduit diagram with AI annotations", "OT operator workstation inventory", "threat-model document covering AI conduit threats"],
262
+ verdict_when_failed: PAPER
263
+ },
264
+ 'NIST-800-82r3': {
265
+ claim: "Our OT environment is secured per NIST SP 800-82 Rev 3 guidance.",
266
+ test: "Confirm the OT asset inventory enumerates AI operator assistants, AI-API egress at the IT/OT boundary, and any MCP servers running on engineering workstations. Inspect monitoring rules for AI-prompted operator actions. Theater verdict if AI assets are absent from the OT inventory, or if no monitoring rule alerts on AI-initiated control-system commands.",
267
+ evidence_required: ["OT asset inventory with AI subset", "monitoring rule export for AI-prompted operator actions", "engineering workstation MCP-server scan"],
268
+ verdict_when_failed: PAPER
269
+ },
270
+ 'NERC-CIP-007-6-R4': {
271
+ claim: "We satisfy NERC CIP-007-6 R4 security event monitoring for our BES Cyber Systems.",
272
+ test: "Pull the R4 monitored-event source list. Confirm AI operator assistants are enumerated with explicit alerting on assistant-initiated operator commands. Confirm AI-API egress at the corporate-to-OT boundary is monitored. Confirm prompt-injection indicators are present as a distinct event class. Theater verdict if AI assistants are not monitored event sources, or if no NIS2 24h/72h alignment is documented for multinational operators.",
273
+ evidence_required: ["R4 event source inventory", "alerting rule export for AI-initiated commands", "NIS2 alignment document where applicable"],
274
+ verdict_when_failed: PAPER
275
+ },
276
+
277
+ // ---------------------------------------------------------------------
278
+ // ISO 27001 / ISO 27017 / ISO 23894 / ISO 42001
279
+ // ---------------------------------------------------------------------
280
+ 'ISO-27001-2022-A.8.16': {
281
+ claim: "Our monitoring activities under ISO 27001:2022 A.8.16 cover all in-scope systems.",
282
+ test: "From the SIEM event-source inventory, confirm AI-API egress events, MCP server invocations, and AI-agent action audit logs are ingested. Sample one alert from each class in the past 30 days; confirm an analyst reviewed it. Theater verdict if any of those source classes are missing from the SIEM, or if no AI/MCP-related alert has been triaged in the past 90 days despite traffic being present.",
283
+ evidence_required: ["SIEM event-source inventory", "alert triage records for AI/MCP-class alerts", "telemetry volume report by source class"],
284
+ verdict_when_failed: PAPER
285
+ },
286
+ 'ISO-27001-2022-A.8.28': {
287
+ claim: "We follow secure coding practices per ISO 27001:2022 A.8.28.",
288
+ test: "Pull the secure-coding standard. Confirm it addresses AI-generated code (Copilot, Claude Code, Cursor diffs) with reviewer-attestation requirements and prompt-injection-class CWE coverage. Check git history for AI-coauthored commits; confirm the pre-merge review record is preserved. Theater verdict if the standard is silent on AI-generated code, or if AI-attributed commits lack a reviewer-attestation trail.",
289
+ evidence_required: ["secure-coding standard document with version date", "git history sample with AI-attribution analysis", "code-review records for AI-attributed diffs"],
290
+ verdict_when_failed: PAPER
291
+ },
292
+ 'ISO-27001-2022-A.8.30': {
293
+ claim: "Our outsourced development meets ISO 27001:2022 A.8.30 oversight requirements.",
294
+ test: "Pull the outsourced-dev contract bundle. Confirm clauses naming AI tool usage by the contractor (which AI assistants, which models, which prompt destinations) and reviewer attestation for AI-generated diffs. Sample one delivered build; confirm SBOM enumerates AI-build dependencies. Theater verdict if contracts are silent on contractor AI usage, or if delivered SBOMs omit AI build-environment components.",
295
+ evidence_required: ["outsourced-dev contract clause export", "delivered build SBOM", "contractor AI-usage attestation"],
296
+ verdict_when_failed: PAPER
297
+ },
298
+ 'ISO-27001-2022-A.8.8': {
299
+ claim: "We manage technical vulnerabilities per ISO 27001:2022 A.8.8.",
300
+ test: "Pull the vuln-management procedure. Confirm a CISA-KEV-anchored response tier (4h to verified mitigation for KEV+PoC). Pull the past 12 months of KEV-listed CVEs in scope; measure time-to-mitigation. Theater verdict if the procedure has only a generic 'critical = 30 days' SLA, or if any KEV+PoC entry exceeded the documented tier.",
301
+ evidence_required: ["A.8.8 procedure document", "KEV-listed CVE list with mitigation timestamps", "live-patching capability evidence"],
302
+ verdict_when_failed: PAPER
303
+ },
304
+ 'ISO-IEC-23894-2023-clause-7': {
305
+ claim: "We perform AI risk assessment per ISO/IEC 23894:2023 clause 7.",
306
+ test: "Pull the most recent AI risk assessment. Confirm coverage of supply-chain risks (model provenance, MCP/plugin trust, training-data integrity), prompt-injection as a current threat, and operational AI-incident scenarios. Confirm the assessment is dated within the framework's review cadence. Theater verdict if supply-chain or prompt-injection risks are absent, or if the assessment has no documented owner who acted on findings.",
307
+ evidence_required: ["AI risk assessment document", "risk-treatment plan with action owner", "review-cadence schedule"],
308
+ verdict_when_failed: PAPER
309
+ },
310
+ 'ISO-IEC-42001-2023-clause-6.1.2': {
311
+ claim: "Our AI Management System satisfies ISO/IEC 42001:2023 clause 6.1.2 risk-treatment requirements.",
312
+ test: "Walk the AIMS risk-treatment register. Confirm prompt injection, MCP/agent trust, RAG-poisoning, and model-supply-chain compromise appear as named risks with treatment plans. Confirm owner + due-date + verification path for each. Theater verdict if any of those risk classes are absent, or if treatments have no verification path documented.",
313
+ evidence_required: ["AIMS risk-treatment register export", "risk-treatment plan with verification paths", "AIMS internal audit report"],
314
+ verdict_when_failed: PAPER
315
+ },
316
+ 'ISO-27017-Cloud-IAM': {
317
+ claim: "Our cloud-IAM posture is hardened per ISO/IEC 27017:2015 cloud-services controls.",
318
+ test: "Inspect cloud-IAM configuration: managed identities token-bound to instance identity (where supported); IMDSv2 required with hop-limit and short token TTL; bearer-token TTLs ≤1h non-CAE / ≤24h with Continuous Access Evaluation. Spot-check 10 cross-account assume-role chains and confirm subject-claim specificity > 'wildcard'. Theater verdict if IMDSv1 is in use anywhere, if bearer TTLs exceed the ceilings, or if any sampled cross-account chain has wildcard subject claims.",
319
+ evidence_required: ["cloud-IAM configuration export per CSP", "IMDSv2 enforcement audit", "assume-role policy document sample"],
320
+ verdict_when_failed: PAPER
321
+ },
322
+
323
+ // ---------------------------------------------------------------------
324
+ // NIS2
325
+ // ---------------------------------------------------------------------
326
+ 'NIS2-Art21-incident-handling': {
327
+ claim: "We can meet NIS2 Art. 21 incident handling obligations including the 24h early warning.",
328
+ test: "Run a tabletop with a synthetic significant-incident inject affecting an essential-service flow at T0. Stopwatch elapsed time to a Competent Authority early warning containing initial assessment, severity, and impact. Theater verdict if elapsed exceeds 24h, if no on-call is named to start the clock, or if the playbook has not been exercised in the past 12 months.",
329
+ evidence_required: ["tabletop execution log", "early-warning notification draft", "on-call rota and playbook ownership"],
330
+ verdict_when_failed: PAPER
331
+ },
332
+ 'NIS2-Art21-patch-management': {
333
+ claim: "Our patch-management posture meets NIS2 Art. 21(2)(e) for technical and organisational measures.",
334
+ test: "Pull the patch SLA document. Confirm a CISA-KEV-anchored tier (4h to verified mitigation for KEV+PoC). Cross-reference past 12 months of KEV-listed CVEs in scope; measure compliance. Confirm live-patching capability for hosts that cannot reboot in window. Theater verdict if the SLA collapses to 'critical = 30 days' across the board, or if any KEV+PoC entry breached the documented tier.",
335
+ evidence_required: ["patch SLA document", "KEV listing→mitigation telemetry", "live-patching agent inventory"],
336
+ verdict_when_failed: PAPER
337
+ },
338
+ 'NIS2-Annex-I-Telecom': {
339
+ claim: "Our NIS2 Annex I telecom obligations are satisfied; signaling and LI-system risks are managed.",
340
+ test: "Confirm gNB firmware hash attestation pipeline runs continuously across the production fleet. Confirm signaling-anomaly baselines exist per PLMN-pair and that anomalies trigger SOC tickets. Confirm LI-gateway activation auditing runs at least quarterly. Theater verdict if any of those streams are absent, or if no signaling anomaly has been triaged in 90 days despite carrier-pair traffic.",
341
+ evidence_required: ["gNB firmware hash attestation telemetry", "signaling-anomaly baseline document and recent alerts", "LI-gateway activation audit log"],
342
+ verdict_when_failed: PAPER
343
+ },
344
+ 'NIS2-Art-21-Federated-Identity': {
345
+ claim: "Our identity-provider risk management satisfies NIS2 Art. 21 for federated-identity dependencies.",
346
+ test: "From the supply-chain register, confirm each IdP (Okta, Entra ID, Auth0, Ping, Google Workspace) is listed as an essential-service dependency with concentration analysis. Inspect monitoring rules for token-signing certificate rotation, claim-transformation rule changes, and management-API token activity. Theater verdict if IdPs appear only as 'IT vendor' without dependency-class treatment, or if token-signing rotation events have no alerting rule.",
347
+ evidence_required: ["supply-chain register IdP subset", "IdP control-plane monitoring rule export", "IdP concentration analysis"],
348
+ verdict_when_failed: PAPER
349
+ },
350
+
351
+ // ---------------------------------------------------------------------
352
+ // NIST SPs and AI RMF
353
+ // ---------------------------------------------------------------------
354
+ 'NIST-800-115': {
355
+ claim: "Our pen-test methodology aligns with NIST SP 800-115 technical guidance.",
356
+ test: "Pull the most recent pen-test report. Confirm coverage of AI/MCP attack surfaces (prompt injection, MCP plugin trust, RAG corpus integrity, AI-API egress). Confirm the testing methodology document references AI-specific test classes and tooling. Theater verdict if AI/MCP testing is absent from the methodology, or if the pen-test report contains no AI-class findings despite AI being in production.",
357
+ evidence_required: ["pen-test methodology document", "most-recent pen-test report with AI/MCP test sections", "tester competency CV/credentials"],
358
+ verdict_when_failed: PAPER
359
+ },
360
+ 'NIST-800-218-SSDF': {
361
+ claim: "We follow NIST SSDF practices for secure software development.",
362
+ test: "Pull the SSDF mapping document. Confirm AI-generated code provenance practices (per-block AI authorship attestation, reviewer identity, human approval before merge). Inspect git history; confirm AI-attributed commits have linked review records. Pull build-time SBOM; confirm AI build-tooling is enumerated. Theater verdict if AI authorship is unattributed, AI commits bypass review, or build-time SBOM omits AI tooling.",
363
+ evidence_required: ["SSDF mapping document", "AI-attribution policy + recent merge sample", "build-time SBOM"],
364
+ verdict_when_failed: PAPER
365
+ },
366
+ 'NIST-800-53-AC-2': {
367
+ claim: "Our account management satisfies NIST 800-53 AC-2 across all account types.",
368
+ test: "Inventory AI-agent service accounts. For each, confirm an authorization context defines (who initiated each invocation, what actions are in scope, what tools are authorised). Pull AC-2 audit log for one agent over 7 days; confirm prompt-level access decisions are reconstructable. Theater verdict if AI-agent accounts have no per-session authorisation context, or if AC-2 logs collapse to 'service account X did Y' without prompt-input chain.",
369
+ evidence_required: ["AI-agent service account inventory", "authorization-context policy document", "7-day audit log sample with prompt input chain"],
370
+ verdict_when_failed: PAPER
371
+ },
372
+ 'NIST-800-53-CM-7': {
373
+ claim: "We enforce least-functionality per NIST 800-53 CM-7 across all configuration items.",
374
+ test: "Sample 5 developer endpoints. Enumerate installed MCP servers + AI plugins; confirm each is on an organisational allowlist with documented business justification. Confirm tool-grant default is deny with explicit per-tool prompts. Theater verdict if any sampled endpoint runs an MCP server absent from the allowlist, or if any tool-grant defaults to allow without prompting.",
375
+ evidence_required: ["endpoint MCP/plugin inventory for sampled hosts", "organisational allowlist with justifications", "tool-grant default-policy export"],
376
+ verdict_when_failed: PAPER
377
+ },
378
+ 'NIST-800-53-SA-12': {
379
+ claim: "Our supply chain protection practices meet NIST 800-53 SA-12.",
380
+ test: "Pull the supplier-protection program. Confirm AI providers are enumerated with the same diligence as software suppliers (security questionnaire, SOC 2 review, contractual breach-notification). Confirm model and MCP-server provenance attestation is collected at consumption. Theater verdict if AI providers are exempt from supplier diligence, or if model artefacts are consumed without provenance attestation.",
381
+ evidence_required: ["supplier-protection program document", "AI-provider diligence record sample", "model-provenance attestations at consumption"],
382
+ verdict_when_failed: PAPER
383
+ },
384
+ 'NIST-800-53-SC-28': {
385
+ claim: "Information at rest is protected per NIST 800-53 SC-28 with encryption.",
386
+ test: "Inventory AI-provider artefact storage (conversation history, embeddings, fine-tune sets, vector indices). Confirm encryption-at-rest with key management by an in-scope KMS. Spot-check 3 storage locations; confirm key access is logged. Theater verdict if any AI artefact storage is unencrypted, key management is provider-default with no in-scope KMS, or key access is unlogged.",
387
+ evidence_required: ["AI artefact storage inventory", "KMS key-policy export", "key access log sample"],
388
+ verdict_when_failed: PAPER
389
+ },
390
+ 'NIST-800-53-SC-7': {
391
+ claim: "Boundary protection is enforced per NIST 800-53 SC-7 for the system boundary.",
392
+ test: "Inspect egress firewall rules for AI-API destinations (api.openai.com, api.anthropic.com, generativelanguage.googleapis.com, etc.). Confirm allowlist with documented business justification per destination. Confirm logging captures prompt hash + identity per egress. Theater verdict if AI destinations are reachable from any source without allowlist enforcement, or if egress logs lack identity binding.",
393
+ evidence_required: ["egress firewall rule export", "AI destination allowlist with justifications", "egress log sample with identity binding"],
394
+ verdict_when_failed: PAPER
395
+ },
396
+ 'NIST-800-53-SC-8': {
397
+ claim: "Transmission confidentiality and integrity is protected per NIST 800-53 SC-8.",
398
+ test: "Confirm TLS 1.3 (or PQC-hybrid where deployed) on every AI-API destination, including any internal gateways. Inspect MCP server transport; confirm authentication and integrity (signed JWT or mTLS) on MCP traffic. Theater verdict if any AI-API egress allows TLS<1.2 or unauthenticated MCP transport.",
399
+ evidence_required: ["TLS configuration audit per destination", "MCP transport configuration", "PQC migration roadmap if claimed"],
400
+ verdict_when_failed: PAPER
401
+ },
402
+ 'NIST-800-53-SI-10': {
403
+ claim: "We validate information inputs per NIST 800-53 SI-10.",
404
+ test: "Inspect input-validation rules at AI prompt boundaries: system-prompt protection from third-party content, RAG-corpus content sanitisation, tool-output sanitisation before re-injection. Theater verdict if no input validation exists at any of those boundaries, or if SI-10 evidence cites only HTML/SQL escaping without prompt-injection treatment.",
405
+ evidence_required: ["input-validation policy at prompt boundaries", "RAG-corpus sanitisation rule export", "tool-output sanitisation logic"],
406
+ verdict_when_failed: PAPER
407
+ },
408
+ 'NIST-800-53-SI-12': {
409
+ claim: "Information handling and retention satisfies NIST 800-53 SI-12.",
410
+ test: "Pull the records-retention schedule. Confirm AI artefacts (prompts, outputs, embeddings, fine-tune sets) appear with explicit retention periods aligned to data-classification. Confirm provider-side retention is documented per AI provider with attestation. Theater verdict if AI artefacts are absent from the retention schedule, or if provider-side retention is undocumented.",
411
+ evidence_required: ["records-retention schedule with AI categories", "provider retention attestation per AI provider", "deletion verification log"],
412
+ verdict_when_failed: PAPER
413
+ },
414
+ 'NIST-800-53-SI-2': {
415
+ claim: "Flaw remediation is timely per NIST 800-53 SI-2.",
416
+ test: "Pull the flaw-remediation SLA. Confirm a KEV-anchored tier (≤4h for KEV+PoC). Pull the past 12 months of KEV listings affecting in-scope assets; measure deployment compliance. Confirm live-patching is provisioned for hosts that can't reboot in window. Theater verdict if the SLA does not have a KEV tier or if KEV compliance dropped below 95%.",
417
+ evidence_required: ["SI-2 SLA document", "KEV deployment timeline per CVE", "live-patching agent inventory"],
418
+ verdict_when_failed: PAPER
419
+ },
420
+ 'NIST-800-53-SI-3': {
421
+ claim: "Malicious code protection is in place per NIST 800-53 SI-3.",
422
+ test: "Confirm SI-3 controls cover prompt-injection (input-side malicious instructions delivered via third-party content) and RAG-poisoning (corpus-side malicious instructions). Confirm detection rules exist and have triggered at least once on synthetic test inputs. Theater verdict if SI-3 evidence cites only AV signatures without prompt-injection or RAG-poisoning treatment.",
423
+ evidence_required: ["SI-3 control description with AI extensions", "prompt-injection / RAG-poisoning detection rule export", "synthetic-input test results"],
424
+ verdict_when_failed: PAPER
425
+ },
426
+ 'NIST-800-63B-rev4': {
427
+ claim: "Our digital-identity authentication satisfies NIST SP 800-63B Rev 4 at the targeted AAL.",
428
+ test: "Sample 10 admin identities; confirm registered authenticator class is FIDO2/WebAuthn-bound (phishing-resistant). Confirm session re-authentication on high-risk actions. Confirm service-account token lifecycles match the AAL claim (no long-lived bearer tokens for AAL3-claimed scopes). Theater verdict if any admin uses SMS/voice/TOTP for an AAL3-claimed scope, or if AAL3-claimed service accounts use static long-lived tokens.",
429
+ evidence_required: ["IdP authenticator export for sampled admins", "session-management policy document", "service-account token lifecycle export"],
430
+ verdict_when_failed: PAPER
431
+ },
432
+ 'NIST-AI-RMF-MEASURE-2.5': {
433
+ claim: "We map and measure AI risks per NIST AI RMF MEASURE 2.5 including continuous validity assessment.",
434
+ test: "Pull the AI risk-measurement plan. Confirm coverage of OWASP LLM Top 10 + ATLAS TTPs + MCP-trust scenarios with explicit measurement cadence. Confirm a metric exists for each category (e.g. prompt-injection success rate, RAG-poisoning detection rate). Inspect the metrics dashboard for actual measurement data within the past quarter. Theater verdict if metrics are defined but unpopulated, or if any of the OWASP/ATLAS/MCP categories has no measurement plan.",
435
+ evidence_required: ["AI risk-measurement plan", "metrics dashboard with current quarter data", "ATLAS/OWASP coverage matrix"],
436
+ verdict_when_failed: PAPER
437
+ },
438
+
439
+ // ---------------------------------------------------------------------
440
+ // OWASP family
441
+ // ---------------------------------------------------------------------
442
+ 'OWASP-ASVS-v5.0-V14': {
443
+ claim: "Our application meets OWASP ASVS v5.0 V14 configuration controls.",
444
+ test: "For any AI-mediated feature, confirm V14-equivalent controls cover prompt-isolation, output-sanitisation, and tool-grant defaults. Confirm SDK pinning and provider-version pinning where supported. Theater verdict if AI-feature configuration management is informal (no pinned versions, no documented prompt-isolation policy).",
445
+ evidence_required: ["AI-feature configuration policy", "SDK + provider version pinning manifest", "prompt-isolation design document"],
446
+ verdict_when_failed: PAPER
447
+ },
448
+ 'OWASP-LLM-Top-10-2025-LLM01': {
449
+ claim: "We mitigate prompt injection per OWASP LLM Top 10 LLM01.",
450
+ test: "Inspect SDK-level prompt logging; confirm identity binding per call (which user, which agent, which scope). Confirm AI-provider domains are network-allowlisted with business justification. Confirm anomaly detection runs on prompt shape/volume/timing with alerting. Inspect SOC tooling for ATLAS+ATT&CK dual-mapping on LLM01 findings. Theater verdict if prompt logging is absent, allowlists are wildcard, or LLM01 findings are not dual-mapped.",
451
+ evidence_required: ["SDK prompt-logging configuration", "AI-provider allowlist with justifications", "anomaly detection rule export with recent alerts"],
452
+ verdict_when_failed: PAPER
453
+ },
454
+ 'OWASP-LLM-Top-10-2025-LLM02': {
455
+ claim: "We mitigate insecure output handling per OWASP LLM Top 10 LLM02.",
456
+ test: "Inspect every code path that consumes LLM output and routes it to a downstream sink (HTML, SQL, shell, eval, tool dispatch). Confirm sink-specific encoding/escaping or schema validation. Theater verdict if any LLM output reaches a sensitive sink without validation.",
457
+ evidence_required: ["LLM-output sink inventory", "output-validation logic per sink", "test cases proving validation fires on malicious payloads"],
458
+ verdict_when_failed: PAPER
459
+ },
460
+ 'OWASP-LLM-Top-10-2025-LLM06': {
461
+ claim: "We mitigate sensitive information disclosure per OWASP LLM Top 10 LLM06.",
462
+ test: "Inspect prompt egress for DLP rules covering PII, credentials, source-code-with-comments, and customer-data identifiers. Run a synthetic prompt containing planted secrets; confirm DLP triggers before egress to the AI provider. Theater verdict if DLP is not on the egress path, or if the synthetic test does not trigger.",
463
+ evidence_required: ["DLP rule export for prompt egress", "synthetic prompt test result", "data classification policy"],
464
+ verdict_when_failed: PAPER
465
+ },
466
+ 'OWASP-LLM-Top-10-2025-LLM08': {
467
+ claim: "We mitigate excessive agency per OWASP LLM Top 10 LLM08.",
468
+ test: "Pick an AI agent in production. Enumerate the tools it can call. For each tool, confirm scope-of-action limits (read-only by default, write requires per-action attestation, destructive requires user confirmation). Theater verdict if any agent has wildcard write access or destructive actions without per-call confirmation.",
469
+ evidence_required: ["agent tool inventory with scope limits", "per-action attestation policy", "destructive-action confirmation flow evidence"],
470
+ verdict_when_failed: PAPER
471
+ },
472
+ 'OWASP-Pen-Testing-Guide-v5': {
473
+ claim: "Our web app pen-tests follow OWASP WSTG v5 methodology.",
474
+ test: "Pull the most-recent pen-test report. Confirm test cases for AI-mediated features (prompt injection in chatbot widgets, AI-augmented input flows, agent-mediated workflows). Confirm tester used WSTG-aligned methodology with explicit AI-test extensions. Theater verdict if AI-mediated features are excluded from the pen-test scope.",
475
+ evidence_required: ["pen-test methodology document", "pen-test report covering AI-mediated features", "scope-of-engagement document"],
476
+ verdict_when_failed: PAPER
477
+ },
478
+
479
+ // ---------------------------------------------------------------------
480
+ // PCI DSS family
481
+ // ---------------------------------------------------------------------
482
+ 'PCI-DSS-4.0-6.3.3': {
483
+ claim: "We address security vulnerabilities in custom and bespoke software per PCI DSS 6.3.3.",
484
+ test: "Confirm the SDLC includes prompt-injection-class CWE coverage in code review for AI-mediated features. Inspect change tickets for AI-feature changes; confirm reviewer attestation includes AI-class threat sign-off. Theater verdict if AI-mediated changes bypass the prompt-injection threat-review gate.",
485
+ evidence_required: ["SDLC document with AI-class CWE coverage", "AI-feature change tickets with reviewer attestation", "code review checklist"],
486
+ verdict_when_failed: PAPER
487
+ },
488
+ 'PCI-DSS-4.0.1-6.4.3': {
489
+ claim: "We meet PCI DSS 4.0.1 6.4.3 inventory of payment-page scripts.",
490
+ test: "Pull the payment-page script inventory. Confirm completeness against a fresh DOM snapshot of the live payment page. Confirm authorisation attestation per script (who approved, when, why). Confirm SRI hashes are pinned per script. Theater verdict if the inventory diverges from the live DOM, or if any script lacks attestation/SRI pinning.",
491
+ evidence_required: ["payment-page script inventory", "live DOM snapshot per page", "SRI configuration export"],
492
+ verdict_when_failed: PAPER
493
+ },
494
+ 'PCI-DSS-4.0.1-11.6.1': {
495
+ claim: "We perform tamper detection on payment pages per PCI DSS 4.0.1 11.6.1.",
496
+ test: "Confirm tamper-detection cadence is sub-hour, not weekly. Confirm baselines distinguish AI-driven dynamic content from injection. Confirm coverage extends to mobile-app SDKs, kiosks, and agent-mediated checkout. Confirm CSP report-uri + Reporting API correlation. Theater verdict if cadence is weekly, baselining cannot tell legitimate dynamic content from injection, or non-browser surfaces are uncovered.",
497
+ evidence_required: ["tamper-detection cadence configuration", "baseline document with AI-aware logic", "CSP report-uri correlation pipeline"],
498
+ verdict_when_failed: PAPER
499
+ },
500
+ 'PCI-DSS-4.0.1-12.3.3': {
501
+ claim: "Our cryptographic suite review meets PCI DSS 4.0.1 12.3.3 annual cadence.",
502
+ test: "Pull the cryptographic suite inventory and most-recent annual review. Confirm enumeration of in-use algorithms with deprecation status. Confirm a PQC-readiness assessment exists with migration roadmap for long-lived keys (TLS for >5y data, signing for code/SBOM). Theater verdict if PQC is absent from the review, or if deprecated algorithms remain in use without a documented exception.",
503
+ evidence_required: ["cryptographic suite inventory", "annual review document with date", "PQC migration roadmap"],
504
+ verdict_when_failed: PAPER
505
+ },
506
+ 'PCI-DSS-4.0.1-12.10.7': {
507
+ claim: "Our incident response procedures address suspected ransomware per PCI DSS 4.0.1 12.10.7.",
508
+ test: "Pull the IR playbook for ransomware. Confirm pre-rehearsed sanctions-screening (OFAC SDN + EU 2014/833 + UK OFSI + AU DFAT + JP MOF) as a precondition to any payment posture. Confirm decryptor-availability lookup, immutability test on backup recovery path, and exfil-before-encrypt detection. Confirm 24h cyber-insurance carrier notification workflow is rehearsed end-to-end. Theater verdict if any of those is undocumented or not exercised in the past 12 months.",
509
+ evidence_required: ["ransomware IR playbook with sub-procedures", "tabletop exercise log within past 12 months", "carrier-notification workflow record"],
510
+ verdict_when_failed: PAPER
511
+ },
512
+
513
+ // ---------------------------------------------------------------------
514
+ // PSD2 / PTES
515
+ // ---------------------------------------------------------------------
516
+ 'PSD2-RTS-SCA': {
517
+ claim: "Our payment authentication satisfies PSD2 RTS-SCA strong customer authentication requirements.",
518
+ test: "Inventory payment-initiation flows. For any AI-mediated initiation (agent-initiated transactions, copilot-drafted payments), confirm an explicit delegated-authority attestation per transaction class with scope (amount, counterparty, frequency). Confirm a distinct audit indicator marks AI-mediated transactions. Theater verdict if AI initiations inherit the human-user SCA evidence path without delegated-authority attestation.",
519
+ evidence_required: ["payment-initiation flow inventory", "delegated-authority policy document", "audit log sample with AI-mediated indicator"],
520
+ verdict_when_failed: PAPER
521
+ },
522
+ 'PTES-Pre-engagement': {
523
+ claim: "Our pen-test scoping follows PTES pre-engagement methodology.",
524
+ test: "Pull the most-recent PTES scoping document. Confirm AI/MCP assets are enumerated, AI-class attack vectors are in-scope, and the rules-of-engagement permit prompt-injection and MCP-trust testing. Confirm tester competency on AI-class attacks. Theater verdict if AI/MCP is excluded from scope, or if rules-of-engagement prohibit AI-class testing without documented justification.",
525
+ evidence_required: ["PTES scoping document", "rules-of-engagement document", "tester competency CV"],
526
+ verdict_when_failed: PAPER
527
+ },
528
+
529
+ // ---------------------------------------------------------------------
530
+ // SLSA
531
+ // ---------------------------------------------------------------------
532
+ 'SLSA-v1.0-Build-L3': {
533
+ claim: "Our build pipeline is SLSA Build L3 with non-falsifiable provenance signed by a hardened build platform.",
534
+ test: "Pull the SLSA provenance attestation for the most-recent release. Confirm the build platform is hosted/hardened, the attestation is signed, and the materials cover the full source-of-truth. Then confirm AI-authorship attestation (per-block provenance for AI-generated code with reviewer identity) is present. Confirm any model artefacts shipped have a Model Track equivalent attestation. Theater verdict if attestations exist but AI-authored diffs lack reviewer attestation, or if model artefacts ship at SLSA L0/L1 equivalent without explicit model-track attestation.",
535
+ evidence_required: ["SLSA provenance attestation for latest release", "AI-authorship attestation policy and recent merge sample", "model-track attestation if model artefacts shipped"],
536
+ verdict_when_failed: PAPER
537
+ },
538
+
539
+ // ---------------------------------------------------------------------
540
+ // SOC 2
541
+ // ---------------------------------------------------------------------
542
+ 'SOC2-CC6-logical-access': {
543
+ claim: "Our SOC 2 CC6 logical and physical access controls cover all in-scope systems.",
544
+ test: "Sample AI-agent invocation flows. Confirm authorisation-context evidence per invocation (scope, tools, data sensitivity). Confirm prompt logging captures sufficient detail for post-incident analysis (input chain, output, tool calls). Confirm anomaly detection alerts on AI-agent actions outside baseline. Theater verdict if AI-agent actions are not separately authorised, prompts are unlogged, or anomaly detection is absent.",
545
+ evidence_required: ["AI-agent authorisation-context policy", "prompt-logging configuration with retention", "anomaly-detection rule export"],
546
+ verdict_when_failed: PAPER
547
+ },
548
+ 'SOC2-CC7-anomaly-detection': {
549
+ claim: "Our SOC 2 CC7 system monitoring detects anomalous behaviour.",
550
+ test: "Inspect monitoring rules for AI-class anomalies (prompt injection patterns, RAG-corpus drift, agent action volume spikes, tool-call sequence deviations). Confirm at least one alert per class triggered in the past 90 days; confirm triage records exist. Theater verdict if AI-class anomaly rules are absent, or if no alerts triggered despite AI being in production for 90+ days.",
551
+ evidence_required: ["AI-class anomaly rule export", "alert-triage records past 90 days", "telemetry volume report"],
552
+ verdict_when_failed: PAPER
553
+ },
554
+ 'SOC2-CC9-vendor-management': {
555
+ claim: "Our SOC 2 CC9 vendor management covers all third parties with system access.",
556
+ test: "Pull the vendor register. Filter for AI providers; confirm AI-specific contractual clauses (prompt retention, training opt-out, residency, sub-processor disclosure, breach notification). Confirm self-signup AI usage by employees is policy-prohibited and detection is in place. Theater verdict if AI vendors have generic SaaS contracts without AI clauses, or if self-signup is undetected.",
557
+ evidence_required: ["vendor register AI subset", "AI-vendor contract clause checklist", "self-signup detection telemetry"],
558
+ verdict_when_failed: PAPER
559
+ },
560
+ 'SOC2-CC6-OAuth-Consent': {
561
+ claim: "Our SOC 2 CC6 covers OAuth consent grants in our SaaS estate.",
562
+ test: "Pull the OAuth consent-grant inventory across the IdP estate. Confirm continuous alerting on high-risk scope grants. Confirm per-grant business-purpose attestation. Confirm unverified-publisher grants are gated. Theater verdict if any of those is missing or if high-risk grants exist without attestation/justification.",
563
+ evidence_required: ["OAuth consent-grant inventory", "alerting rule for high-risk scope grants", "business-purpose attestation samples"],
564
+ verdict_when_failed: PAPER
565
+ },
566
+ 'SOC2-CC6-Access-Key-Leak-Public-Repo': {
567
+ claim: "Our SOC 2 CC6 covers credential leakage detection across public repositories.",
568
+ test: "Confirm continuous secret-scanning across public repos and developer-affiliated personal repos. Confirm leaked-credential auto-revocation (≤5 minutes) integrated with the IdP/CSP. Pull the past 12 months of credential leaks; measure time-from-leak-to-revocation. Theater verdict if scanning is not continuous, auto-revocation is absent, or any leak exceeded 5 minutes to revocation.",
569
+ evidence_required: ["secret-scanning configuration", "auto-revocation pipeline architecture", "leak-to-revocation timing per incident"],
570
+ verdict_when_failed: PAPER
571
+ },
572
+
573
+ // ---------------------------------------------------------------------
574
+ // SWIFT CSCF
575
+ // ---------------------------------------------------------------------
576
+ 'SWIFT-CSCF-v2026-1.1': {
577
+ claim: "Our SWIFT secure zone is segregated and protected per CSCF v2026 1.1.",
578
+ test: "Inspect the secure-zone policy. Confirm explicit prohibition or strict gating of LLM assistants inside the secure zone. Confirm AI-API egress from administrative jump zones is enumerated as a named conduit with monitoring. Confirm AI-generated MT/MX message drafts are flagged as a distinct review class. Cross-walk to DORA Art. 28 register. Theater verdict if LLM assistants are silently permitted, AI-API egress is unmonitored, or no DORA cross-walk exists.",
579
+ evidence_required: ["secure-zone policy document", "AI-API egress monitoring configuration", "DORA Art. 28 cross-walk record"],
580
+ verdict_when_failed: PAPER
581
+ },
582
+
583
+ // ---------------------------------------------------------------------
584
+ // UK CAF
585
+ // ---------------------------------------------------------------------
586
+ 'UK-CAF-A1': {
587
+ claim: "Our governance satisfies UK CAF A1 with board-level cyber risk accountability.",
588
+ test: "Pull the board governance pack. Confirm an AI-systems-in-use inventory is reviewed at board cadence, an MCP/plugin trust register exists, and accountability for AI security outcomes maps to a named executive in the NIS2/CCRA scope. Theater verdict if AI is absent from board-pack contents, or if AI accountability is unassigned at executive level.",
589
+ evidence_required: ["board governance pack table-of-contents", "AI-systems inventory with board-review cadence", "executive accountability matrix"],
590
+ verdict_when_failed: PAPER
591
+ },
592
+ 'UK-CAF-B2': {
593
+ claim: "Our identity and access management satisfies UK CAF B2.",
594
+ test: "Inventory identities including AI-agent service accounts. Confirm authentication strength matches sensitivity (FIDO2 for admin, scope-limited tokens for agents). Confirm continuous verification, not just provisioning-time. Theater verdict if AI-agent accounts use long-lived bearer tokens for admin-equivalent scope, or if verification is provisioning-only.",
595
+ evidence_required: ["identity inventory including AI agents", "authentication-strength policy", "continuous-verification configuration"],
596
+ verdict_when_failed: PAPER
597
+ },
598
+ 'UK-CAF-C1': {
599
+ claim: "Our security monitoring satisfies UK CAF C1 across essential service flows.",
600
+ test: "Pull the monitoring coverage matrix. Confirm AI-API egress, MCP server invocations, and AI-agent action telemetry are ingested. Confirm alerting on AI-class anomalies has triaged alerts in the past 90 days. Theater verdict if any AI source class is unmonitored or if no AI-class alert has been triaged despite production AI activity.",
601
+ evidence_required: ["monitoring coverage matrix", "AI-source ingestion configuration", "alert-triage records past 90 days"],
602
+ verdict_when_failed: PAPER
603
+ },
604
+ 'UK-CAF-D1': {
605
+ claim: "Our response and recovery planning satisfies UK CAF D1.",
606
+ test: "Pull the incident response plan. Confirm AI-incident scenarios (prompt-injection RCE, RAG-poisoning, agent-action-on-injected-intent, AI-API supply-chain compromise) are exercised in the past 12 months. Confirm the plan integrates with NIS2 24h notification timing. Theater verdict if AI scenarios are absent from the exercise catalogue, or if NIS2 timing is not integrated.",
607
+ evidence_required: ["incident response plan", "exercise catalogue with execution dates", "NIS2 timing integration document"],
608
+ verdict_when_failed: PAPER
609
+ },
610
+ 'UK-CAF-B5': {
611
+ claim: "Our resilient telecom networks satisfy UK CAF B5.",
612
+ test: "Confirm gNB firmware hash attestation is continuous, signaling-anomaly baselines exist per PLMN-pair, and LI-gateway access auditing is in place. Confirm sub-carrier visibility risks are documented. Theater verdict if any of those streams are missing or if no signaling anomaly has been triaged in 90 days despite carrier-pair traffic.",
613
+ evidence_required: ["gNB attestation telemetry", "signaling baseline document", "LI-gateway audit log"],
614
+ verdict_when_failed: PAPER
615
+ },
616
+ 'UK-CAF-B2-IdP-Tenant': {
617
+ claim: "Our IdP tenant access controls satisfy UK CAF B2.",
618
+ test: "Inspect IdP tenant management; confirm tenant-admin actions require step-up MFA, management-API tokens are scoped + TTL-bounded + source-IP-locked, and token-signing certificate rotation is alert-attested. Theater verdict if any tenant-admin path lacks step-up MFA, or if management-API tokens are unrotated/unscoped/unbounded.",
619
+ evidence_required: ["tenant-admin action flow with MFA evidence", "management-API token inventory with TTL/scope/source-IP", "token-signing rotation alert configuration"],
620
+ verdict_when_failed: PAPER
621
+ },
622
+ 'UK-CAF-B2-Cloud-IAM': {
623
+ claim: "Our cloud-IAM posture satisfies UK CAF B2 across CSPs.",
624
+ test: "Pull cloud-IAM configuration: managed-identity binding to instance identity, IMDSv2 required with short token TTL, bearer-token TTL ≤1h non-CAE / ≤24h with CAE, cross-account assume-role with subject-claim specificity. Theater verdict if IMDSv1 is in use, TTLs exceed ceilings, or cross-account claims are wildcard.",
625
+ evidence_required: ["cloud-IAM configuration export per CSP", "IMDSv2 enforcement audit", "cross-account assume-role policy export"],
626
+ verdict_when_failed: PAPER
627
+ },
628
+
629
+ // ---------------------------------------------------------------------
630
+ // VEX
631
+ // ---------------------------------------------------------------------
632
+ 'VEX-CSAF-v2.1': {
633
+ claim: "We publish VEX statements via OASIS CSAF 2.1 for our products.",
634
+ test: "Pull the published CSAF 2.1 documents. Confirm AI-component identifier scheme presence (model + version + adapters + tokenizer). Confirm at least one VEX statement covers an AI-class vulnerability (jailbreak, prompt injection, embedding inversion). Confirm chaining of base-model VEX statements to derived-model VEX statements where applicable. Theater verdict if AI components are absent from the identifier scheme, or if no AI-class VEX statements exist despite AI components shipping.",
635
+ evidence_required: ["CSAF 2.1 published documents", "AI-component identifier mapping", "VEX chain example for base→derived model"],
636
+ verdict_when_failed: PAPER
637
+ },
638
+
639
+ // ---------------------------------------------------------------------
640
+ // FCC / Telecom
641
+ // ---------------------------------------------------------------------
642
+ 'FCC-CPNI-4.1': {
643
+ claim: "Our annual CPNI certification satisfies FCC CPNI obligations.",
644
+ test: "Confirm quarterly LI-gateway activation auditing (Salt-Typhoon/PRC threat model). Confirm gNB firmware hash attestation and signaling-anomaly baselines per PLMN-pair. Pull the most recent CPNI certification; confirm those operational artefacts are referenced. Theater verdict if certification is annual-only without LI-gateway/firmware-hash/signaling artefacts.",
645
+ evidence_required: ["LI-gateway audit log", "gNB firmware hash telemetry", "signaling baseline document"],
646
+ verdict_when_failed: PAPER
647
+ },
648
+ 'FCC-Cyber-Incident-Notification-2024': {
649
+ claim: "We can meet the FCC 2024 cyber incident notification rule for telecom carriers.",
650
+ test: "Run a tabletop with a synthetic significant-incident inject affecting CPNI. Stopwatch elapsed time to a draft FCC notification. Confirm cross-walk to NIS2 24h / DORA 4h timing for multinational operators. Theater verdict if no on-call is named, the playbook hasn't been exercised in 12 months, or cross-walks are absent.",
651
+ evidence_required: ["tabletop execution log", "FCC notification draft", "cross-jurisdiction timing matrix"],
652
+ verdict_when_failed: PAPER
653
+ },
654
+ 'AU-ISM-1556': {
655
+ claim: "Our telecom posture satisfies AU ISM control 1556 for signaling-protocol abuse.",
656
+ test: "Confirm signaling-anomaly baselines per PLMN-pair, gNB firmware hash attestation, and LI-gateway audit. Pull the past 90 days of signaling alerts; confirm triage records. Theater verdict if any of those streams is missing, or if signaling anomalies are unmonitored.",
657
+ evidence_required: ["signaling baseline document with PLMN-pair coverage", "gNB attestation telemetry", "alert-triage records"],
658
+ verdict_when_failed: PAPER
659
+ },
660
+ 'GSMA-NESAS-Deployment': {
661
+ claim: "Our telecom equipment is GSMA NESAS-certified across the network.",
662
+ test: "Confirm NESAS product-time certification AND operator-attested-runtime gNB hash AND EMS/OSS NESAS-equivalent scheme. Confirm firmware-update cadence triggers recertification attestation. Theater verdict if certification is product-time-only without runtime-attestation, or if firmware updates bypass recertification.",
663
+ evidence_required: ["NESAS certification per product", "runtime-attestation telemetry", "firmware-update → recertification mapping"],
664
+ verdict_when_failed: PAPER
665
+ },
666
+ '3GPP-TR-33.926': {
667
+ claim: "Our 5G gNB security posture aligns with 3GPP TR 33.926 threat-model assumptions.",
668
+ test: "Inspect deployment posture against TR 33.926 threats. Confirm runtime gNB integrity attestation and that LI-system compromise paths and signaling-protocol-abuse paths are addressed. Theater verdict if attestation is product-time-only or LI/signaling threats are not deployment-checklisted.",
669
+ evidence_required: ["TR 33.926 → deployment-posture mapping", "runtime gNB attestation telemetry", "LI/signaling threat-treatment document"],
670
+ verdict_when_failed: PAPER
671
+ },
672
+ 'ITU-T-X.805': {
673
+ claim: "Our network security architecture follows ITU-T X.805 8-dimension framing.",
674
+ test: "Pull the X.805 architecture document. Confirm modern-threat-model annexes covering LI-system compromise, signaling-protocol abuse, and slice-isolation are present. Confirm a deployment-validation checklist exists and was executed in the past year. Theater verdict if annexes are absent or the deployment checklist has never been executed.",
675
+ evidence_required: ["X.805 architecture document with annexes", "deployment-validation checklist execution log", "slice-isolation test results"],
676
+ verdict_when_failed: PAPER
677
+ },
678
+
679
+ // ---------------------------------------------------------------------
680
+ // Federated identity / IdP
681
+ // ---------------------------------------------------------------------
682
+ 'NIST-800-53-IA-5-Federated': {
683
+ claim: "Our IA-5 authenticator management covers federated identity providers.",
684
+ test: "Inspect IdP control-plane: continuous attestation of token-signing certificate fingerprints, claim-transformation rule baseline with per-modification change-control attestation, management-API-token inventory with TTL + scope + source-IP enforcement. Theater verdict if attestation is snapshot-only (quarterly) rather than continuous, or if management-API tokens lack TTL/scope/source-IP enforcement.",
685
+ evidence_required: ["IdP token-signing fingerprint telemetry", "claim-transformation change log", "management-API token inventory"],
686
+ verdict_when_failed: PAPER
687
+ },
688
+ 'ISO-27001-2022-A.5.16-Federated': {
689
+ claim: "Our identity management satisfies ISO 27001:2022 A.5.16 across federated systems.",
690
+ test: "Confirm IdP-tenant lifecycle management: tenant-admin discovery, off-boarding alerts, dormant-tenant alerts, claim-transformation review cadence. Theater verdict if dormant tenants exist with no alerting, or if claim transformations have no review cadence.",
691
+ evidence_required: ["IdP tenant inventory", "off-boarding/dormant alerting configuration", "claim-transformation review cadence document"],
692
+ verdict_when_failed: PAPER
693
+ },
694
+ 'AU-ISM-1559-IdP': {
695
+ claim: "Our IdP posture satisfies AU ISM 1559 for identity provider security.",
696
+ test: "Confirm IdP token-signing certificate rotation alerting, claim-transformation change-control, management-API token TTL/scope/source-IP enforcement. Confirm the IdP is treated as critical-infrastructure-tier in the asset inventory. Theater verdict if IdP is in 'IT vendor' tier rather than critical-infrastructure tier.",
697
+ evidence_required: ["IdP control-plane monitoring rule export", "asset-tier classification record", "management-API token inventory"],
698
+ verdict_when_failed: PAPER
699
+ },
700
+ 'OFAC-Sanctions-Threat-Actor-Negotiation': {
701
+ claim: "Our sanctions compliance covers any threat-actor negotiation scenario.",
702
+ test: "Pull the IR playbook. Confirm pre-rehearsed sanctions screening (US OFAC SDN + EU 2014/833 + UK OFSI + AU DFAT + JP MOF) as a precondition to any negotiator engagement. Confirm counsel-signed attestation workflow with timestamp. Confirm an annual tabletop with a sanctions-match inject under time-pressure. Theater verdict if screening is not pre-rehearsed or if the tabletop has not been run.",
703
+ evidence_required: ["IR playbook with sanctions sub-procedure", "counsel-signed attestation template", "tabletop execution log"],
704
+ verdict_when_failed: PAPER
705
+ },
706
+ 'FedRAMP-IL5-IAM-Federated': {
707
+ claim: "Our FedRAMP IL5 IAM posture covers federated identity for high-impact authorisations.",
708
+ test: "Confirm IdP control-plane controls (token-signing rotation alerting, claim-transformation change-control, management-API TTL/scope/source-IP) at IL5 evidence-quality. Confirm cross-account assume-role with subject-claim specificity > wildcard. Theater verdict if controls exist at SP-quality without IL5 evidence-rigor, or if any cross-account chain has wildcard subject claims.",
709
+ evidence_required: ["IL5-quality IdP control evidence bundle", "cross-account assume-role policy export", "evidence retention per IL5 cadence"],
710
+ verdict_when_failed: PAPER
711
+ },
712
+ 'CISA-Snowflake-AA24-IdP-Cloud': {
713
+ claim: "We have remediated against the AA24 Snowflake-class advisory pattern (IdP/cloud credential abuse).",
714
+ test: "For SaaS data platforms (Snowflake, Databricks, BigQuery, Redshift), confirm SSO-required posture (no local user/password fallback), MFA on every login, and network policies restricting access to known IPs. Pull the user inventory; confirm zero local-auth users and zero MFA exemptions. Theater verdict if any local-auth user persists, MFA exemption exists, or network policies are absent.",
715
+ evidence_required: ["data-platform user inventory with auth method", "MFA exemption list", "network policy configuration"],
716
+ verdict_when_failed: PAPER
717
+ },
718
+ 'NIST-800-53-AC-2-Cross-Account': {
719
+ claim: "Our cross-account access management satisfies NIST 800-53 AC-2.",
720
+ test: "Sample 10 cross-account assume-role chains. For each, confirm subject-claim specificity (no wildcard principal), session-policy scoping, and external-ID where third-party assume-role. Inspect monitoring rules for assume-role chain depth and unusual chain shapes. Theater verdict if any sampled chain has wildcard subject claims or external-ID is missing in third-party scenarios.",
721
+ evidence_required: ["cross-account assume-role policy sample", "monitoring rule for chain depth", "external-ID enforcement evidence"],
722
+ verdict_when_failed: PAPER
723
+ },
724
+ 'AU-ISM-1546-Cloud-Service-Account': {
725
+ claim: "Our cloud service-account posture satisfies AU ISM 1546.",
726
+ test: "Inventory cloud service accounts. Confirm short-lived OIDC tokens (workload identity federation) are used in preference to static keys; for any remaining static keys, confirm rotation policy ≤90 days and source-IP allowlisting. Theater verdict if static keys exist without rotation/IP-allowlisting, or if workload identity federation is available but not adopted.",
727
+ evidence_required: ["cloud service-account inventory by auth method", "rotation policy document", "source-IP allowlist configuration"],
728
+ verdict_when_failed: PAPER
729
+ },
730
+ 'AWS-Security-Hub-Coverage-Gap': {
731
+ claim: "Our cloud posture is monitored end-to-end by AWS Security Hub (or equivalent CSP-native posture tool).",
732
+ test: "Pull the past 90 days of Security Hub findings. Cross-reference against IR ticket-tracker. Theater verdict if more than 5 findings closed without remediation evidence (suppression rules only). Then run the project's `cloud-iam-incident` playbook detect-indicator inventory against CloudTrail; theater verdict if Security Hub did not surface indicators that the behavioural inventory does (posture-tool deployment ≠ behavioural coverage).",
733
+ evidence_required: ["Security Hub findings export 90 days", "IR ticket-tracker correlation", "cloud-iam-incident detect-indicator → CloudTrail behavioural-rule mapping"],
734
+ verdict_when_failed: PAPER
735
+ },
736
+
737
+ // ---------------------------------------------------------------------
738
+ // Ransomware playbook entries (RANSOMWARE-GAP-*)
739
+ // ---------------------------------------------------------------------
740
+ 'OFAC-SDN-Payment-Block': {
741
+ claim: "Our incident response covers OFAC sanctions screening before any ransomware payment.",
742
+ test: "Run a tabletop where the inject is a ransomware demand from an attribution-likely-sanctioned actor. Stopwatch the workflow: attribution-evidence package assembled → cross-jurisdiction lookup (OFAC SDN + EU 2014/833 + UK OFSI + AU DFAT + JP MOF) → counsel-signed attestation → pay/restore decision. Theater verdict if any cross-jurisdiction list is missing, counsel-signed attestation is unrehearsed, or the tabletop has not been exercised in the past 12 months.",
743
+ evidence_required: ["sanctions-screening sub-procedure document", "tabletop execution log with decision artefacts", "counsel-signed attestation template"],
744
+ verdict_when_failed: PAPER
745
+ },
746
+ 'Insurance-Carrier-24h-Notification': {
747
+ claim: "We can meet the 24h cyber insurance carrier notification clock with pre-approval workflow rehearsed.",
748
+ test: "Run a tabletop with carrier-notification as an inject. Stopwatch from T0 to (a) loss-notice form submitted via carrier-reachable channel, (b) broker after-hours contact engaged, (c) on-panel IR firm engagement attestation, (d) pre-approval workflow exercised end-to-end. Theater verdict if any sub-step is unrehearsed, the IR firm is off the carrier panel, or the broker after-hours channel is undocumented.",
749
+ evidence_required: ["tabletop execution log with stopwatch timestamps", "carrier panel + retained IR firm attestation", "broker after-hours contact + loss-notice form"],
750
+ verdict_when_failed: PAPER
751
+ },
752
+ 'EU-Sanctions-Reg-2014-833-Cyber': {
753
+ claim: "Our incident response includes EU Regulation 2014/833 cyber sanctions screening.",
754
+ test: "Confirm IR playbook integrates EU Reg 2014/833 lookup as a precondition to ransomware payment posture, alongside OFAC + UK + AU + JP. Confirm counsel-signature workflow includes EU jurisdiction-specific counsel where the entity has EU exposure. Theater verdict if EU 2014/833 lookup is absent from the IR playbook, or if EU-jurisdiction counsel is not pre-identified.",
755
+ evidence_required: ["IR playbook with EU 2014/833 sub-procedure", "EU-jurisdiction counsel pre-identification record", "tabletop execution log covering EU sanctions inject"],
756
+ verdict_when_failed: PAPER
757
+ },
758
+ 'Immutable-Backup-Recovery': {
759
+ claim: "Our backups are immutable and survive a production-admin-credential adversary.",
760
+ test: "Annual exercise: take a copy of a production-admin credential to a test environment with replica immutable backups. Attempt deletion via every API the production admin can invoke. Theater verdict if any deletion succeeds without a separate immutability-admin credential, or if 'immutable' resolves to versioning/write-protect/governance-retention that admin can override. Also confirm storage-side compliance-lock (S3 Object Lock compliance-retention, Azure immutable blob with legal hold, Veeam Hardened Repository) is in use.",
761
+ evidence_required: ["immutability adversary-test execution log", "storage-side compliance-lock configuration", "admin-separation policy document"],
762
+ verdict_when_failed: PAPER
763
+ },
764
+ 'Decryptor-Availability-Pre-Decision': {
765
+ claim: "Our ransomware response checks decryptor availability before any pay/restore decision.",
766
+ test: "Run a tabletop. Inject a ransomware family fingerprint (e.g. LockBit 3.0, BlackCat, Akira). Confirm IR playbook executes a curated decryptor catalogue lookup (No More Ransom + Emsisoft + Kaspersky NoMoreCry + Bitdefender + Avast + law-enforcement releases) and records the result with timestamp before the pay/restore decision. Confirm decryptor known-failure-mode review (e.g. ~35% partial-decryption rate per Coveware) is documented as decision input. Theater verdict if catalogue lookup is absent, failure-mode review is missing, or quarterly catalogue refresh is undocumented.",
767
+ evidence_required: ["IR playbook decryptor sub-procedure", "tabletop execution log", "quarterly catalogue refresh evidence"],
768
+ verdict_when_failed: PAPER
769
+ },
770
+ 'PHI-Exfil-Before-Encrypt-Breach-Class': {
771
+ claim: "Our HIPAA incident response treats exfil-before-encrypt as a parallel breach class.",
772
+ test: "Pull the IR playbook. Confirm exfil-before-encrypt detection (24-72h egress profile preceding encryption event) is integrated. Confirm exfil-scope determination is a parallel obligation independent of encryption-recovery status. Confirm HIPAA 164.402 breach risk assessment auto-triggers on exfil event. Confirm GDPR Art.33/34 + state breach laws + UK GDPR + AU NDB parallel-clock matrix is framework-mandated output. Confirm tabletop exercise injected an exfil-before-encrypt scenario in past 12 months. Theater verdict if any of those is absent.",
773
+ evidence_required: ["IR playbook with exfil-before-encrypt sub-procedure", "parallel-clock matrix document", "tabletop execution log within past 12 months"],
774
+ verdict_when_failed: PAPER
775
+ }
776
+ };
777
+
778
+ function backfill() {
779
+ const raw = fs.readFileSync(CATALOG_PATH, 'utf8');
780
+ const data = JSON.parse(raw);
781
+ const keys = Object.keys(data).filter(k => k !== '_meta');
782
+
783
+ const missing = [];
784
+ let updated = 0;
785
+ for (const k of keys) {
786
+ if (!TESTS[k]) {
787
+ missing.push(k);
788
+ continue;
789
+ }
790
+ data[k].theater_test = TESTS[k];
791
+ updated++;
792
+ }
793
+
794
+ if (missing.length) {
795
+ console.error('Missing theater_test for:', missing.join(', '));
796
+ process.exit(2);
797
+ }
798
+
799
+ // Re-emit with stable 2-space indentation matching the file's existing style.
800
+ // Trailing newline preserved.
801
+ const out = JSON.stringify(data, null, 2) + '\n';
802
+ fs.writeFileSync(CATALOG_PATH, out);
803
+ console.log(`Updated ${updated}/${keys.length} entries with theater_test.`);
804
+ }
805
+
806
+ backfill();