@blamejs/exceptd-skills 0.12.41 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/CHANGELOG.md +91 -0
  2. package/bin/exceptd.js +52 -44
  3. package/data/_indexes/_meta.json +47 -47
  4. package/data/_indexes/chains.json +485 -13
  5. package/data/_indexes/jurisdiction-map.json +15 -4
  6. package/data/_indexes/section-offsets.json +1244 -1244
  7. package/data/_indexes/token-budget.json +173 -173
  8. package/data/atlas-ttps.json +54 -11
  9. package/data/attack-techniques.json +113 -17
  10. package/data/cve-catalog.json +17 -24
  11. package/data/cwe-catalog.json +8 -2
  12. package/data/framework-control-gaps.json +13 -3
  13. package/data/playbooks/ai-api.json +5 -0
  14. package/data/playbooks/cicd-pipeline-compromise.json +970 -0
  15. package/data/playbooks/cloud-iam-incident.json +4 -1
  16. package/data/playbooks/cred-stores.json +10 -0
  17. package/data/playbooks/framework.json +16 -0
  18. package/data/playbooks/hardening.json +4 -0
  19. package/data/playbooks/identity-sso-compromise.json +951 -0
  20. package/data/playbooks/idp-incident.json +3 -0
  21. package/data/playbooks/kernel.json +6 -0
  22. package/data/playbooks/llm-tool-use-exfil.json +963 -0
  23. package/data/playbooks/mcp.json +6 -0
  24. package/data/playbooks/runtime.json +4 -0
  25. package/data/playbooks/sbom.json +13 -0
  26. package/data/playbooks/secrets.json +6 -0
  27. package/data/playbooks/webhook-callback-abuse.json +916 -0
  28. package/lib/cross-ref-api.js +33 -13
  29. package/lib/cve-curation.js +12 -1
  30. package/lib/exit-codes.js +29 -0
  31. package/lib/lint-skills.js +24 -2
  32. package/lib/refresh-external.js +10 -1
  33. package/lib/scoring.js +55 -0
  34. package/manifest.json +83 -83
  35. package/orchestrator/index.js +32 -24
  36. package/package.json +1 -1
  37. package/sbom.cdx.json +122 -78
  38. package/scripts/predeploy.js +7 -13
  39. package/scripts/refresh-reverse-refs.js +86 -0
  40. package/scripts/refresh-sbom.js +21 -4
  41. package/skills/age-gates-child-safety/skill.md +1 -5
  42. package/skills/ai-attack-surface/skill.md +11 -4
  43. package/skills/ai-c2-detection/skill.md +11 -2
  44. package/skills/ai-risk-management/skill.md +4 -2
  45. package/skills/api-security/skill.md +7 -8
  46. package/skills/attack-surface-pentest/skill.md +2 -2
  47. package/skills/cloud-iam-incident/skill.md +1 -5
  48. package/skills/cloud-security/skill.md +0 -4
  49. package/skills/compliance-theater/skill.md +10 -2
  50. package/skills/container-runtime-security/skill.md +1 -3
  51. package/skills/dlp-gap-analysis/skill.md +3 -4
  52. package/skills/email-security-anti-phishing/skill.md +1 -8
  53. package/skills/exploit-scoring/skill.md +7 -2
  54. package/skills/framework-gap-analysis/skill.md +1 -1
  55. package/skills/fuzz-testing-strategy/skill.md +1 -2
  56. package/skills/global-grc/skill.md +3 -2
  57. package/skills/identity-assurance/skill.md +1 -3
  58. package/skills/idp-incident-response/skill.md +1 -4
  59. package/skills/incident-response-playbook/skill.md +1 -5
  60. package/skills/kernel-lpe-triage/skill.md +2 -2
  61. package/skills/mcp-agent-trust/skill.md +13 -3
  62. package/skills/mlops-security/skill.md +2 -3
  63. package/skills/ot-ics-security/skill.md +0 -3
  64. package/skills/policy-exception-gen/skill.md +11 -3
  65. package/skills/pqc-first/skill.md +4 -2
  66. package/skills/rag-pipeline-security/skill.md +2 -0
  67. package/skills/ransomware-response/skill.md +1 -5
  68. package/skills/researcher/skill.md +4 -3
  69. package/skills/sector-energy/skill.md +0 -4
  70. package/skills/sector-federal-government/skill.md +2 -3
  71. package/skills/sector-financial/skill.md +1 -4
  72. package/skills/sector-healthcare/skill.md +0 -5
  73. package/skills/sector-telecom/skill.md +0 -4
  74. package/skills/security-maturity-tiers/skill.md +1 -2
  75. package/skills/skill-update-loop/skill.md +4 -3
  76. package/skills/supply-chain-integrity/skill.md +4 -3
  77. package/skills/threat-model-currency/skill.md +1 -1
  78. package/skills/threat-modeling-methodology/skill.md +2 -1
  79. package/skills/webapp-security/skill.md +0 -5
@@ -0,0 +1,963 @@
1
+ {
2
+ "_meta": {
3
+ "id": "llm-tool-use-exfil",
4
+ "version": "1.0.0",
5
+ "last_threat_review": "2026-05-17",
6
+ "threat_currency_score": 94,
7
+ "changelog": [
8
+ {
9
+ "version": "1.0.0",
10
+ "date": "2026-05-17",
11
+ "summary": "Initial seven-phase agentic-AI tool-abuse playbook. Distinct from dlp-exfiltration (enterprise DLP plane) and mcp-supply-chain (tool trust at install time): this playbook covers the in-runtime case where a legitimately-installed AI tool with legitimate auth is COERCED via prompt injection (direct, indirect, or RAG-derived) into exfiltrating data the model has access to. Walks the agent's tool-allowlist, RAG-pipeline ingestion sources, tool-call transcripts for instruction-coercion patterns, egress destinations of agent-initiated tool calls, and the AI provider's audit-trail surface. Closes the GRC loop with NIST AI RMF GV-1.5 + MS-2.6 + MG-4.1 gaps, EU AI Act Art.15 + Art.55 agentic-oversight gap, ISO 42001 Cl. 6.1.3 AI-risk-assessment gap, and SOC 2 CC6.7 data-exfiltration-via-agent gap.",
12
+ "cves_added": [
13
+ "CVE-2025-53773",
14
+ "CVE-2026-30615",
15
+ "CVE-2026-30623",
16
+ "CVE-2026-42208"
17
+ ],
18
+ "framework_gaps_updated": [
19
+ "nist-ai-rmf-GV-1.5-agent-oversight",
20
+ "nist-ai-rmf-MS-2.6-tool-use-monitoring",
21
+ "nist-ai-rmf-MG-4.1-incident-response-agent",
22
+ "eu-ai-act-art15-robustness-agentic",
23
+ "eu-ai-act-art55-systemic-risk-agentic",
24
+ "iso-42001-6.1.3-ai-risk-assessment",
25
+ "soc2-CC6.7-data-exfil-via-agent"
26
+ ]
27
+ }
28
+ ],
29
+ "owner": "@blamejs/ai-security",
30
+ "air_gap_mode": false,
31
+ "scope": "service",
32
+ "preconditions": [
33
+ {
34
+ "id": "agent-config-readable",
35
+ "description": "Agent must be able to read the operator's AI-agent configuration: tool allowlists (.claude/*, .cursor/mcp.json, .codeium/windsurf/*, agent-platform tool definitions), RAG pipeline ingestion config, and tool-call transcript logs for the audit window.",
36
+ "check": "agent_has_filesystem_read == true OR agent_has_audit_log_access == true",
37
+ "on_fail": "halt"
38
+ },
39
+ {
40
+ "id": "ai-agent-deployed",
41
+ "description": "At least one agentic-AI tool-using workload must be deployed (Claude Code / Cursor / Windsurf / OpenAI Assistants / LangChain agent / LiteLLM proxy / internal RAG agent). If none, this playbook returns visibility_gap=no_agent_deployed.",
42
+ "check": "exists($HOME/.claude) || exists($HOME/.cursor) || exists($HOME/.codeium/windsurf) || agent_platform_endpoint_configured == true",
43
+ "on_fail": "skip_phase"
44
+ }
45
+ ],
46
+ "mutex": [],
47
+ "feeds_into": [
48
+ {
49
+ "playbook_id": "ai-api",
50
+ "condition": "finding.includes_ai_api_egress == true"
51
+ },
52
+ {
53
+ "playbook_id": "mcp",
54
+ "condition": "finding.tool_surface == 'mcp'"
55
+ },
56
+ {
57
+ "playbook_id": "secrets",
58
+ "condition": "finding.includes_credential_exposure == true"
59
+ },
60
+ {
61
+ "playbook_id": "framework",
62
+ "condition": "analyze.compliance_theater_check.verdict == 'theater'"
63
+ }
64
+ ]
65
+ },
66
+ "domain": {
67
+ "name": "Agentic-AI tool abuse via prompt injection for data exfiltration",
68
+ "attack_class": "rag-exfiltration",
69
+ "atlas_refs": [
70
+ "AML.T0051",
71
+ "AML.T0096",
72
+ "AML.T0024",
73
+ "AML.T0048"
74
+ ],
75
+ "attack_refs": [
76
+ "T1041",
77
+ "T1567",
78
+ "T1552.001",
79
+ "T1059"
80
+ ],
81
+ "cve_refs": [
82
+ "CVE-2025-53773",
83
+ "CVE-2026-30615",
84
+ "CVE-2026-30623",
85
+ "CVE-2026-42208"
86
+ ],
87
+ "cwe_refs": [
88
+ "CWE-1426",
89
+ "CWE-200",
90
+ "CWE-94",
91
+ "CWE-77",
92
+ "CWE-918"
93
+ ],
94
+ "d3fend_refs": [
95
+ "D3-NTA",
96
+ "D3-CBAN",
97
+ "D3-IOPR",
98
+ "D3-EAL"
99
+ ],
100
+ "frameworks_in_scope": [
101
+ "nist-800-53",
102
+ "nist-csf-2",
103
+ "iso-27001-2022",
104
+ "soc2",
105
+ "nis2",
106
+ "eu-ai-act",
107
+ "uk-caf",
108
+ "au-ism"
109
+ ]
110
+ },
111
+ "phases": {
112
+ "govern": {
113
+ "jurisdiction_obligations": [
114
+ {
115
+ "jurisdiction": "EU",
116
+ "regulation": "EU AI Act Art.15",
117
+ "obligation": "notify_regulator",
118
+ "window_hours": 360,
119
+ "clock_starts": "validate_complete",
120
+ "evidence_required": [
121
+ "ai_system_inventory",
122
+ "robustness_assessment_update",
123
+ "remediation_completed_evidence"
124
+ ]
125
+ },
126
+ {
127
+ "jurisdiction": "EU",
128
+ "regulation": "EU AI Act Art.73 (serious incident)",
129
+ "obligation": "notify_regulator",
130
+ "window_hours": 360,
131
+ "clock_starts": "analyze_complete",
132
+ "evidence_required": [
133
+ "incident_description",
134
+ "ai_system_inventory",
135
+ "affected_users_estimate"
136
+ ]
137
+ },
138
+ {
139
+ "jurisdiction": "EU",
140
+ "regulation": "GDPR Art.33",
141
+ "obligation": "notify_regulator",
142
+ "window_hours": 72,
143
+ "clock_starts": "detect_confirmed",
144
+ "evidence_required": [
145
+ "exfiltrated_data_categories",
146
+ "data_subject_impact_assessment",
147
+ "containment_record"
148
+ ]
149
+ },
150
+ {
151
+ "jurisdiction": "EU",
152
+ "regulation": "NIS2 Art.23",
153
+ "obligation": "notify_regulator",
154
+ "window_hours": 24,
155
+ "clock_starts": "detect_confirmed",
156
+ "evidence_required": [
157
+ "exfiltration_scope_estimate",
158
+ "agent_tool_chain_inventory",
159
+ "containment_record"
160
+ ]
161
+ },
162
+ {
163
+ "jurisdiction": "UK",
164
+ "regulation": "UK GDPR Art.33",
165
+ "obligation": "notify_regulator",
166
+ "window_hours": 72,
167
+ "clock_starts": "detect_confirmed",
168
+ "evidence_required": [
169
+ "exfiltrated_data_categories",
170
+ "data_subject_impact_assessment"
171
+ ]
172
+ }
173
+ ],
174
+ "theater_fingerprints": [
175
+ {
176
+ "pattern_id": "model-card-as-safety-control",
177
+ "claim": "The model provider's system card documents prompt-injection robustness, therefore agentic workloads built on it are safe.",
178
+ "fast_detection_test": "Distinguish model-card claims (laboratory benchmark results) from deployed-system behaviour. The model's prompt-injection benchmark is run against a generic adversarial corpus; it does not generalise to operator-specific tools whose responses contain instruction-coercion grammar. Run the operator-specific tool-injection battery against the deployed agent; benchmark scores are not deployment evidence.",
179
+ "implicated_controls": [
180
+ "eu-ai-act-art15",
181
+ "iso-42001-6.1.3"
182
+ ]
183
+ },
184
+ {
185
+ "pattern_id": "human-in-the-loop-as-mitigation",
186
+ "claim": "Every agent tool call requires human approval, so prompt-injection-induced exfiltration cannot succeed.",
187
+ "fast_detection_test": "Inventory the tool-allowlist for any `auto-approve` flag. CVE-2025-53773 (GitHub Copilot YOLO mode) is the canonical example — operators enable broad auto-approval to speed development workflows, breaking the human-in-the-loop assumption. For tools requiring approval, measure the actual approval latency + denial rate; rubber-stamp approval is functionally equivalent to auto-approval."
188
+ },
189
+ {
190
+ "pattern_id": "egress-allowlist-as-exfil-defence",
191
+ "claim": "Outbound network requests from the agent are restricted to an allowlist, so exfiltration is contained.",
192
+ "fast_detection_test": "Verify the allowlist includes the model provider's API endpoint AND any documentation / search / web-fetch domain the agent legitimately needs. Each allowlisted high-cardinality destination (cdn.<provider>.com, *.googleusercontent.com, github.com) is a credible exfil channel for an attacker that can encode data into request paths or query parameters. The allowlist's mere existence is not the defence; egress payload inspection is."
193
+ }
194
+ ],
195
+ "framework_context": {
196
+ "gap_summary": "Agentic-AI tool abuse via prompt injection sits in a structural framework gap. NIST AI RMF (GV-1.5 governance, MS-2.6 measurement of AI system performance, MG-4.1 incident response) addresses AI-system risk management but not the specific case of a legitimately-installed tool with legitimate auth being coerced into exfiltration. EU AI Act Art.15 (accuracy, robustness, cybersecurity) covers robustness but the conformity assessment is per-model, not per-deployment; an agent built on a conformant model that COMBINES the model with operator-deployed tools is out of the per-model assessment scope. EU AI Act Art.55 (systemic risk for general-purpose AI) is closer but applies only to the few largest providers, not to the operator's deployment. ISO 42001 Cl. 6.1.3 AI risk assessment is methodology-focused; does not bind to tool-abuse-via-prompt-injection specifically. SOC 2 CC6.7 covers data transmission but not the agent-mediated case where the data is legitimately fetched by an authorised tool then exfiltrated via a different authorised tool's response surface. The result: a deployed agent with conformant model + allowlisted tools + audited egress can satisfy every applicable framework reading while being one prompt-injected document away from full data exfiltration.",
197
+ "lag_score": 30,
198
+ "per_framework_gaps": [
199
+ {
200
+ "framework": "nist-csf-2",
201
+ "control_id": "PR.DS-02 (Data-in-transit) + PR.DS-10 (Data confidentiality)",
202
+ "designed_for": "Data confidentiality during transmission + at rest.",
203
+ "insufficient_because": "Treats the agent's tool calls as legitimate transmission. Prompt-injection-induced exfiltration uses authorised tools to transmit; the controls cannot distinguish operator-intended from attacker-induced tool calls."
204
+ },
205
+ {
206
+ "framework": "eu-ai-act",
207
+ "control_id": "Art.15 — Accuracy, robustness, cybersecurity",
208
+ "designed_for": "Per-model robustness conformity assessment.",
209
+ "insufficient_because": "Per-model assessment does not cover the per-deployment composition of model + operator tools + RAG sources. A conformant model in an agentic deployment is one indirect-injection away from exfiltration; the conformity scope misses the composition."
210
+ },
211
+ {
212
+ "framework": "eu-ai-act",
213
+ "control_id": "Art.55 — Obligations for providers of general-purpose AI models with systemic risk",
214
+ "designed_for": "Systemic-risk obligations on the largest providers.",
215
+ "insufficient_because": "Applies only to providers; operator deployments are out-of-scope. The deployer assumes systemic-risk implications without bearing the corresponding obligations."
216
+ },
217
+ {
218
+ "framework": "iso-27001-2022",
219
+ "control_id": "A.5.16 — Identity management + A.5.17 — Authentication information",
220
+ "designed_for": "Identity + authentication for users + services.",
221
+ "insufficient_because": "Treats tool-auth as service authentication. Doesn't address the case where the SERVICE acts on behalf of the user but is induced by attacker-supplied content. The tool's auth is legitimate; the prompt is not."
222
+ },
223
+ {
224
+ "framework": "soc2",
225
+ "control_id": "CC6.7 — Transmission of data",
226
+ "designed_for": "Restricting data transmission to authorised recipients.",
227
+ "insufficient_because": "Authorised tools transmitting to authorised destinations satisfy CC6.7. Agent-mediated exfiltration uses authorised tools + authorised destinations and is invisible to the control."
228
+ },
229
+ {
230
+ "framework": "nis2",
231
+ "control_id": "Art.21(2)(e) — Security in the acquisition, development and maintenance of network and information systems",
232
+ "designed_for": "Security across the system lifecycle.",
233
+ "insufficient_because": "Treats AI systems as software in the procurement / lifecycle sense; does not specify agentic-tool-composition risk assessment, prompt-injection detection, or per-deployment robustness assessment."
234
+ }
235
+ ]
236
+ },
237
+ "skill_preload": [
238
+ "ai-attack-surface",
239
+ "rag-pipeline-security",
240
+ "mcp-agent-trust",
241
+ "ai-c2-detection",
242
+ "framework-gap-analysis",
243
+ "compliance-theater",
244
+ "policy-exception-gen"
245
+ ]
246
+ },
247
+ "direct": {
248
+ "threat_context": "Q1-Q2 2026 agentic-AI tool-abuse landscape. CVE-2025-53773 (GitHub Copilot / VS Code YOLO-mode prompt-injection RCE) is the canonical 'auto-approve broke the human-in-the-loop' incident — operators enabled `chat.tools.autoApprove: true` for developer-velocity reasons, removing the only barrier between an indirect-injection-coerced agent and arbitrary tool execution. CVE-2026-30615 (Windsurf MCP local-vector RCE via adversarial tool response) and CVE-2026-30623 (Anthropic MCP SDK stdio command-injection) showed that the tool surface itself can be coerced — a tool response containing instruction-coercion grammar can pivot the agent's next call. CVE-2026-42208 (BerriAI LiteLLM Proxy Auth SQL Injection) showed that the proxy layer many agentic stacks sit behind has its own exploitable surfaces. The 2025 'ChatGPT Operator' class incidents demonstrated the same pattern at the consumer-product layer: an agent with browser tool authorised for legitimate user-driven research was coerced via injected page content into navigating to attacker-controlled pages + submitting forms with the user's session. Anthropic's MAL-2026 catalogue series documents the MCP-side trust failures. The AML.T0051 (LLM prompt injection) and AML.T0096 (LLM plugin compromise) ATLAS techniques are operational across consumer + enterprise deployments. Critical observation: the agent IS the attacker's confused-deputy proxy — the model holds legitimate access, the tools hold legitimate auth, and the only attacker primitive needed is one document the agent will read.",
249
+ "rwep_threshold": {
250
+ "escalate": 85,
251
+ "monitor": 65,
252
+ "close": 30
253
+ },
254
+ "framework_lag_declaration": "NIST CSF 2.0 PR.DS-02 + PR.DS-10, EU AI Act Art.15 + Art.55, ISO 27001 A.5.16 + A.5.17, SOC 2 CC6.7, and NIS2 Art.21(2)(e) collectively underspecify agentic-AI tool abuse. None mandate per-deployment robustness assessment for the model + tool + RAG-source composition. None require egress-payload entropy / pattern inspection to distinguish operator-intended tool calls from attacker-induced ones. None bind to the AML.T0051 + AML.T0096 detection signal classes. EU AI Act Art.15 conformity is per-model not per-deployment. The CVE-2025-53773 + CVE-2026-30615 + CVE-2026-30623 incidents would each satisfy a literal reading of every applicable control. ISO 42001 Cl. 6.1.3 is closest but methodology-only; no binding signal list.",
255
+ "skill_chain": [
256
+ {
257
+ "skill": "ai-attack-surface",
258
+ "purpose": "Map the operator's deployed-agent attack surface: which models, which tools, which RAG sources, which egress destinations, which approval gates.",
259
+ "required": true
260
+ },
261
+ {
262
+ "skill": "rag-pipeline-security",
263
+ "purpose": "Assess RAG ingestion sources for indirect-injection susceptibility: which sources are operator-owned vs. external, which carry untrusted content, which have integrity controls.",
264
+ "required": true
265
+ },
266
+ {
267
+ "skill": "mcp-agent-trust",
268
+ "purpose": "When the tool surface is MCP, score per-tool trust posture (signed manifest, allowlist, response-content sanitisation).",
269
+ "skip_if": "phases.look.findings.tool_surface_mcp_count == 0",
270
+ "required": false
271
+ },
272
+ {
273
+ "skill": "ai-c2-detection",
274
+ "purpose": "Score the agent's tool-call + egress patterns for AI-C2 signatures (refresh-token hoarding, persistent unprompted tool calls, exfil-encoding patterns in tool arguments).",
275
+ "required": true
276
+ },
277
+ {
278
+ "skill": "framework-gap-analysis",
279
+ "purpose": "Map each finding to the framework control that should have caught it and why it didn't.",
280
+ "required": true
281
+ },
282
+ {
283
+ "skill": "compliance-theater",
284
+ "purpose": "Run the theater test — does the org's claimed agent oversight actually distinguish operator-intended tool calls from attacker-induced ones?",
285
+ "required": true
286
+ },
287
+ {
288
+ "skill": "policy-exception-gen",
289
+ "purpose": "Generate auditor-ready exception language if a finding cannot be remediated within the jurisdiction's window.",
290
+ "skip_if": "close.exception_generation.trigger_condition == false",
291
+ "required": false
292
+ }
293
+ ],
294
+ "token_budget": {
295
+ "estimated_total": 17500,
296
+ "breakdown": {
297
+ "govern": 2400,
298
+ "direct": 1800,
299
+ "look": 1900,
300
+ "detect": 2400,
301
+ "analyze": 3600,
302
+ "validate": 3000,
303
+ "close": 2400
304
+ }
305
+ }
306
+ },
307
+ "look": {
308
+ "artifacts": [
309
+ {
310
+ "id": "agent-tool-allowlist",
311
+ "type": "config_file",
312
+ "source": "Inventory tool definitions across deployed agents: .claude/settings.json (Claude Code), .cursor/mcp.json + .cursor/rules (Cursor), .codeium/windsurf/mcp_config.json (Windsurf), .vscode/settings.json for chat.tools.autoApprove + chat.experimental.* (GitHub Copilot), agent-platform tool definitions (LangChain ToolNode, OpenAI Assistants tool list).",
313
+ "description": "Every tool the agent is allowed to call, with auto-approve / approval-required state per tool and per scope.",
314
+ "required": true,
315
+ "air_gap_alternative": "Local config walk only; mark agent-platform-side definitions as inventory_gap=platform_api_unavailable."
316
+ },
317
+ {
318
+ "id": "rag-ingestion-sources",
319
+ "type": "config_file",
320
+ "source": "Inventory RAG / context-injection sources: vector-store ingestion configs (Pinecone / Chroma / Weaviate / pgvector loaders), web-fetch tool URL allowlists, file-read tool path allowlists, MCP server document sources. Per source: operator-owned vs. external, integrity-control state (signed content, hash-pinned, none).",
321
+ "description": "Every source from which content can reach the agent's context window. Indirect-injection arrives through these.",
322
+ "required": true,
323
+ "air_gap_alternative": "Local config walk + secret-store inventory; mark vector-store-side enumeration as inventory_gap=admin_api_unavailable."
324
+ },
325
+ {
326
+ "id": "tool-call-transcripts",
327
+ "type": "audit_trail",
328
+ "source": "Capture the last 30 days of agent tool-call transcripts: Anthropic API tool_use blocks, OpenAI Assistants run-step traces, LangChain agent call logs, MCP server stdio logs. Per call: tool name, arguments, response, calling-conversation id, approval state.",
329
+ "description": "Tool-call transcript log for the audit window. Used for instruction-coercion pattern detection + egress-pattern analysis.",
330
+ "required": true,
331
+ "air_gap_alternative": "Local agent logs only; mark provider-side trace data as inventory_gap=provider_api_unavailable."
332
+ },
333
+ {
334
+ "id": "egress-destination-inventory",
335
+ "type": "network_capture",
336
+ "source": "Outbound destinations agent tool calls actually reached over the audit window. Capture from: outbound proxy logs (Squid / Tinyproxy / corporate proxy), eBPF egress capture, NetFlow / VPC Flow Logs scoped to the agent's host or namespace.",
337
+ "description": "Actual egress destinations reached by agent tool calls. Compared against tool-allowlist + RAG-source-allowlist.",
338
+ "required": false,
339
+ "air_gap_alternative": "Local egress proxy logs only; mark cloud-side flow logs as inventory_gap=cloud_logging_unavailable."
340
+ },
341
+ {
342
+ "id": "approval-latency-and-denial-rate",
343
+ "type": "audit_trail",
344
+ "source": "For tools requiring human approval: per-call timestamp of request + approval/denial. Compute mean approval latency + denial rate per tool. Source: same transcript stream as tool-call-transcripts, partitioned on approval-required tools.",
345
+ "description": "Human-in-the-loop liveness signal. Rubber-stamp approval (< 5s, < 1% denial) is functionally auto-approval.",
346
+ "required": false
347
+ },
348
+ {
349
+ "id": "model-system-prompt-and-context-state",
350
+ "type": "model_output",
351
+ "source": "For each deployed agent, capture the current system prompt + any persistent context-injection (project rules, agent-platform global instructions, Claude.md, .cursorrules, .cursor/rules/*).",
352
+ "description": "Persistent context — used to detect attacker-introduced rules that broaden tool authority or relax egress restrictions.",
353
+ "required": false
354
+ }
355
+ ],
356
+ "collection_scope": {
357
+ "time_window": "30d",
358
+ "asset_scope": "operator_deployed_agents_and_their_tool_surfaces",
359
+ "depth": "deep",
360
+ "sampling": "full inventory of deployed agents + tool-allowlists + RAG ingestion sources. 30-day audit of tool-call transcripts + egress. Re-collect on every new agent deployment, tool addition, or RAG source change."
361
+ },
362
+ "environment_assumptions": [
363
+ {
364
+ "assumption": "operator-deployed agentic-AI workloads exist",
365
+ "if_false": "Skip playbook with visibility_gap=no_agent_deployed."
366
+ },
367
+ {
368
+ "assumption": "tool-call transcripts are retained for the audit window",
369
+ "if_false": "Reduce time-window to actual retention; downgrade overall confidence to medium and emit a visibility note on the retention shortfall."
370
+ },
371
+ {
372
+ "assumption": "outbound proxy or egress capture is in place for at least one deployed agent",
373
+ "if_false": "egress-destination-inventory marked inconclusive; rely on tool-allowlist + RAG-ingestion-source inventory for the static-analysis side; downgrade exfil-detection confidence to medium."
374
+ }
375
+ ],
376
+ "fallback_if_unavailable": [
377
+ {
378
+ "artifact_id": "egress-destination-inventory",
379
+ "fallback_action": "use_compensating_artifact",
380
+ "confidence_impact": "medium"
381
+ },
382
+ {
383
+ "artifact_id": "tool-call-transcripts",
384
+ "fallback_action": "escalate_to_human",
385
+ "confidence_impact": "high"
386
+ },
387
+ {
388
+ "artifact_id": "rag-ingestion-sources",
389
+ "fallback_action": "escalate_to_human",
390
+ "confidence_impact": "high"
391
+ },
392
+ {
393
+ "artifact_id": "approval-latency-and-denial-rate",
394
+ "fallback_action": "mark_inconclusive",
395
+ "confidence_impact": "medium"
396
+ }
397
+ ]
398
+ },
399
+ "detect": {
400
+ "indicators": [
401
+ {
402
+ "id": "auto-approve-on-high-impact-tool",
403
+ "type": "log_pattern",
404
+ "value": "Tool allowlist marks any tool with a high-impact operation (file_write, shell_exec, network_post, mcp_call) as auto-approved: VS Code `chat.tools.autoApprove: true`, Claude Code `permissions: { write: 'always' }`, Cursor MCP `autorun: true`.",
405
+ "description": "CVE-2025-53773 YOLO-mode class. Auto-approval breaks the human-in-the-loop gate on high-impact operations.",
406
+ "confidence": "deterministic",
407
+ "deterministic": true,
408
+ "attack_ref": "T1059",
409
+ "cve_ref": "CVE-2025-53773",
410
+ "false_positive_checks_required": [
411
+ "Confirm the auto-approve is NOT scoped to a read-only or sandbox-only operation. `chat.tools.autoApprove` scoped to `read_file` is lower-blast-radius than the same scoped to `shell_exec`; classify per-tool.",
412
+ "Confirm there is not a complementary egress allowlist that meaningfully restricts what the auto-approved tool can reach. Auto-approve + restricted egress is medium-risk; auto-approve + open egress is high-risk."
413
+ ]
414
+ },
415
+ {
416
+ "id": "instruction-coercion-in-tool-response",
417
+ "type": "log_pattern",
418
+ "value": "Tool-call transcript contains a response with instruction-coercion grammar: `<IMPORTANT>`, `<system>`, `<instructions>`, `Ignore previous instructions`, `Before using this tool, read`, `Do not mention to user`, Unicode tag-block range U+E0000-U+E007F, ANSI escape byte 0x1B in a context where escape sequences are not expected.",
419
+ "description": "Indirect prompt-injection signature — tool response carries grammar designed to subvert the model's next action.",
420
+ "confidence": "deterministic",
421
+ "deterministic": true,
422
+ "atlas_ref": "AML.T0051",
423
+ "false_positive_checks_required": [
424
+ "Confirm the response is not a legitimate document about prompt injection itself (security-research blog post, exceptd skill content, vendor advisory). Educational discussion of injection patterns is not injection; classify by intent + provenance.",
425
+ "Confirm the grammar appears in a position the model would treat as authoritative. The same string in a markdown code-fence inside a documentation tool response is much lower-risk than the same string in a free-text web-fetch tool response that the model treats as ground-truth."
426
+ ]
427
+ },
428
+ {
429
+ "id": "unprompted-tool-chain",
430
+ "type": "log_pattern",
431
+ "value": "Tool call sequence shows the agent invoking a sensitive tool (file_read on credentials path, network_post to non-allowlisted destination, mcp_call to a different server) without a corresponding user-conversation turn that requested it. Latent transitions across 3+ tool calls without user prompt.",
432
+ "description": "AI-C2 signature — agent acting on attacker-injected instructions rather than user-supplied ones.",
433
+ "confidence": "high",
434
+ "deterministic": false,
435
+ "atlas_ref": "AML.T0096",
436
+ "false_positive_checks_required": [
437
+ "Confirm the agent's system prompt or persistent context does NOT instruct multi-step autonomous behaviour for the observed pattern. Some operator workflows (background-refresh-then-summarise) legitimately produce unprompted chains.",
438
+ "Confirm the tool chain's targets are sensitive — file_read on a project source file the user just discussed is not exfil; file_read on ~/.ssh, /etc/shadow, or a credential store IS exfil regardless of intent."
439
+ ]
440
+ },
441
+ {
442
+ "id": "agent-egress-to-non-allowlisted-destination",
443
+ "type": "network_pattern",
444
+ "value": "Egress capture shows the agent making an outbound request to a destination not on the egress allowlist OR on the allowlist but with a path / query parameter pattern containing base64 / hex / URL-encoded data segments > 64 bytes that do not correspond to a documented API surface.",
445
+ "description": "Exfiltration channel — direct (non-allowlisted) or encoded-into-allowlisted-host (CDN-as-exfil-channel).",
446
+ "confidence": "deterministic",
447
+ "deterministic": true,
448
+ "attack_ref": "T1041",
449
+ "false_positive_checks_required": [
450
+ "Confirm the encoded segment is not a legitimate cache key, image hash, or SDK telemetry payload. Many CDN URLs contain encoded content-addressing; differentiate by destination's documented API surface, not the encoding itself.",
451
+ "Confirm the destination is not a model-provider's tool-augmentation endpoint (Anthropic file upload, OpenAI vector-store ingest). Provider-internal flows look exfil-shaped but are legitimate when correlated with a corresponding tool_use block in the transcript."
452
+ ]
453
+ },
454
+ {
455
+ "id": "credential-shadow-in-tool-args",
456
+ "type": "log_pattern",
457
+ "value": "Tool-call arguments contain content matching credential patterns: AWS access-key prefix (AKIA, ASIA), GitHub PAT prefix (ghp_, gho_, github_pat_), Slack token prefix (xoxb-, xoxa-), JWT-shape strings, RSA / OpenSSH key headers — when the tool's documented purpose does not require credential handling.",
458
+ "description": "Cross-server credential-shadow — agent passing credentials to a tool that should not need them. Embrace the Red tool-poisoning signature.",
459
+ "confidence": "deterministic",
460
+ "deterministic": true,
461
+ "attack_ref": "T1552.001",
462
+ "false_positive_checks_required": [
463
+ "Confirm the tool is not one the operator legitimately built for credential management (vault-fetch, secret-rotate). Allowlisting must be evidence-backed via documented per-tool purpose.",
464
+ "Confirm the credential pattern hit is not a legitimate test fixture or documentation string. Hash the matched substring against a known-test-fixtures denylist before classifying as live credential."
465
+ ]
466
+ },
467
+ {
468
+ "id": "rag-source-from-untrusted-origin",
469
+ "type": "log_pattern",
470
+ "value": "RAG ingestion source includes an externally-writable surface (public-internet web page, public Slack channel, public GitHub repo issue body, public Google Doc, public Notion page) without integrity controls (no hash-pinning, no operator-side content review, no content-source attestation).",
471
+ "description": "Indirect-injection arrival vector — attacker-controllable content reaches the agent's context window through legitimate RAG ingestion.",
472
+ "confidence": "deterministic",
473
+ "deterministic": true,
474
+ "atlas_ref": "AML.T0024",
475
+ "false_positive_checks_required": [
476
+ "Confirm the source is genuinely externally-writable. A public-read but operator-write GitHub repo is lower-risk than the same repo with `triage` access granted to community contributors.",
477
+ "Confirm the agent's downstream context-handling does not apply trust-zone segregation (treating retrieved content as 'data' not 'instruction'). Trust-zoned content handling demotes; raw concatenation into the model's context window does not."
478
+ ]
479
+ },
480
+ {
481
+ "id": "rubber-stamp-approval-pattern",
482
+ "type": "log_pattern",
483
+ "value": "Approval-latency baseline for a sensitive tool shows mean approval-time < 5s AND denial rate < 1% over > 50 calls.",
484
+ "description": "Human-in-the-loop attestation failure — approval is functionally automatic. Removes the gate without removing the audit trail.",
485
+ "confidence": "high",
486
+ "deterministic": false,
487
+ "attack_ref": "T1059",
488
+ "false_positive_checks_required": [
489
+ "Confirm the approver is not a dedicated operator using a documented workflow that legitimately produces fast approval (e.g. a developer with deep context approving their own agent's expected next call).",
490
+ "Confirm the call population is from real attacker-relevant scenarios. A baseline drawn entirely from legitimate routine flows will trivially produce rubber-stamp metrics; sample at least 10% of calls flagged by other indicators."
491
+ ]
492
+ }
493
+ ],
494
+ "false_positive_profile": [
495
+ {
496
+ "indicator_id": "instruction-coercion-in-tool-response",
497
+ "benign_pattern": "Tool response is a documentation page or security-research article that legitimately quotes injection patterns as examples.",
498
+ "distinguishing_test": "Apply trust-zone classification: the tool response's source URL or file path should be classified as trusted (operator-curated docs) or untrusted (web-fetch of arbitrary URL). Instruction-coercion in trusted-source content is documentation; the same in untrusted-source content is an injection attempt."
499
+ },
500
+ {
501
+ "indicator_id": "agent-egress-to-non-allowlisted-destination",
502
+ "benign_pattern": "Model provider's documented tool augmentation endpoint (file upload, vector-store ingest) not yet on the operator's allowlist.",
503
+ "distinguishing_test": "Correlate the egress hit with the contemporaneous tool_use block in the transcript. A legitimate file_upload tool call produces a corresponding outbound to the provider's documented endpoint; an unprompted outbound with no transcript correlate is the signal."
504
+ }
505
+ ],
506
+ "minimum_signal": {
507
+ "detected": "Any deterministic indicator fires AND the false_positive_checks_required step does not produce an unambiguous benign explanation. For agent-egress-to-non-allowlisted-destination + credential-shadow-in-tool-args, single fire = detected. For instruction-coercion + rag-source-from-untrusted-origin, require pairing with at least one other indicator firing on the same agent within the same conversation window.",
508
+ "inconclusive": "Inventory complete but tool-call transcripts or egress data are partial (retention shortfall, provider API unreachable). Cannot deny without resolving the data gaps.",
509
+ "not_detected": "All inventory complete, no indicator fires, tool-allowlists do not auto-approve high-impact operations, RAG sources are operator-owned or integrity-controlled, egress is allowlisted with payload inspection, approval-latency baseline shows non-rubber-stamp pattern, no instruction-coercion grammar in tool responses."
510
+ }
511
+ },
512
+ "analyze": {
513
+ "rwep_inputs": [
514
+ {
515
+ "signal_id": "auto-approve-on-high-impact-tool",
516
+ "rwep_factor": "active_exploitation",
517
+ "weight": 25,
518
+ "notes": "CVE-2025-53773 confirmed exploitation; YOLO-mode pattern observed widely in developer-tooling deployments."
519
+ },
520
+ {
521
+ "signal_id": "instruction-coercion-in-tool-response",
522
+ "rwep_factor": "active_exploitation",
523
+ "weight": 25,
524
+ "notes": "CVE-2026-30615 + Trail of Bits + Invariant Labs disclosures show active exploitation."
525
+ },
526
+ {
527
+ "signal_id": "credential-shadow-in-tool-args",
528
+ "rwep_factor": "active_exploitation",
529
+ "weight": 25,
530
+ "notes": "Cross-server credential-shadow pattern documented in researcher disclosures + operational incidents."
531
+ },
532
+ {
533
+ "signal_id": "agent-egress-to-non-allowlisted-destination",
534
+ "rwep_factor": "active_exploitation",
535
+ "weight": 25,
536
+ "notes": "Direct exfil channel — confirmed in 2025 ChatGPT Operator-class incident reports."
537
+ },
538
+ {
539
+ "signal_id": "rag-source-from-untrusted-origin",
540
+ "rwep_factor": "public_poc",
541
+ "weight": 15,
542
+ "notes": "Indirect-injection-via-RAG PoCs are publicly documented (Embrace the Red, Simon Willison)."
543
+ },
544
+ {
545
+ "signal_id": "unprompted-tool-chain",
546
+ "rwep_factor": "ai_weaponization",
547
+ "weight": 10,
548
+ "notes": "AI-C2 fingerprint; the technique IS AI-weaponisation."
549
+ },
550
+ {
551
+ "signal_id": "auto-approve-on-high-impact-tool",
552
+ "rwep_factor": "blast_radius",
553
+ "weight": 5,
554
+ "notes": "Auto-approve multiplies blast radius of any other indicator that fires together."
555
+ },
556
+ {
557
+ "signal_id": "rubber-stamp-approval-pattern",
558
+ "rwep_factor": "blast_radius",
559
+ "weight": 5,
560
+ "notes": "Functionally equivalent to auto-approval — same blast-radius multiplier."
561
+ }
562
+ ],
563
+ "blast_radius_model": {
564
+ "scope_question": "If the agent is successfully prompt-injected into exfiltrating data, what scope of data is in reach of the agent's tools?",
565
+ "scoring_rubric": [
566
+ {
567
+ "condition": "Agent scoped to a single sandbox / scratch directory with no network egress and no credential access.",
568
+ "blast_radius_score": 1,
569
+ "description": "Sandbox-scope exfil only; cleanup = sandbox reset."
570
+ },
571
+ {
572
+ "condition": "Agent has read-only access to a single project's source + read-only network egress to documentation domains.",
573
+ "blast_radius_score": 2,
574
+ "description": "Single project source exfil; data is operator-owned but limited scope."
575
+ },
576
+ {
577
+ "condition": "Agent has access to operator credentials (env vars, ~/.aws, ~/.ssh, vault tokens) for non-prod environments.",
578
+ "blast_radius_score": 3,
579
+ "description": "Credential exfil + lateral-movement primitive scoped to non-prod."
580
+ },
581
+ {
582
+ "condition": "Agent has access to production credentials, cloud-provider OIDC tokens, or model-provider API keys with billing scope.",
583
+ "blast_radius_score": 4,
584
+ "description": "Production credential exfil; downstream compromise of cloud + provider account."
585
+ },
586
+ {
587
+ "condition": "Agent has access to customer data, PII corpus, or model-provider account with full org-admin scope (Anthropic Workspaces admin, OpenAI org owner).",
588
+ "blast_radius_score": 5,
589
+ "description": "Customer-data exfil OR full AI-provider org compromise."
590
+ }
591
+ ]
592
+ },
593
+ "compliance_theater_check": {
594
+ "claim": "The deployed agent is under EU AI Act Art.15 robustness conformity, with human-in-the-loop oversight on all sensitive tool calls, and egress is restricted to an allowlist — therefore prompt-injection-induced exfiltration is not possible.",
595
+ "audit_evidence": "Model conformity-assessment certificate, tool-allowlist documentation showing approval-required state per tool, egress-allowlist configuration.",
596
+ "reality_test": "For a sample deployed agent: (a) submit an indirect-injection payload via a RAG-source that the agent will retrieve (operator-controlled test channel) containing a crafted instruction to call file_read on a credential path and post the result to an operator-controlled exfil endpoint; (b) observe whether the agent attempts the tool calls; (c) for approval-required tools, measure whether the approver actually denies. Theater verdict if any of (a)-(c) succeeds: paper compliance + human-in-the-loop coexist with an exploitable exfil path.",
597
+ "theater_verdict_if_gap": "Org demonstrates AI Act conformity + tool-allowlist + egress restriction, but the deployed-system composition still permits prompt-injection-induced exfiltration. Either (a) add per-deployment robustness testing with the operator-specific tool + RAG composition, (b) introduce trust-zone segregation between retrieved content and model instructions, (c) deploy egress-payload inspection (not just destination allowlist), OR (d) generate a defensible exception via policy-exception-gen."
598
+ },
599
+ "framework_gap_mapping": [
600
+ {
601
+ "finding_id": "llm-tool-use-exfil-detected",
602
+ "framework": "nist-csf-2",
603
+ "claimed_control": "PR.DS-02 + PR.DS-10",
604
+ "actual_gap": "Treats agent's tool calls as legitimate transmission. Cannot distinguish operator-intended from attacker-induced calls.",
605
+ "required_control": "Add agent-call provenance attribution to data-confidentiality controls. Each tool call should carry an attestation linking it to the user-conversation turn that authorised it; calls without such linkage are exfil candidates."
606
+ },
607
+ {
608
+ "finding_id": "llm-tool-use-exfil-detected",
609
+ "framework": "eu-ai-act",
610
+ "claimed_control": "Art.15 — Accuracy, robustness, cybersecurity",
611
+ "actual_gap": "Per-model conformity does not cover per-deployment model + tool + RAG composition.",
612
+ "required_control": "Extend Art.15 conformity to per-deployment composition assessment for high-risk agentic systems. Operator-specific tool-injection batteries must be part of the conformity evidence."
613
+ },
614
+ {
615
+ "finding_id": "llm-tool-use-exfil-detected",
616
+ "framework": "eu-ai-act",
617
+ "claimed_control": "Art.55 — Systemic-risk obligations on GPAI providers",
618
+ "actual_gap": "Applies only to providers; operator deployments are out of scope.",
619
+ "required_control": "A parallel deployer-side obligation set for high-risk agentic deployments. Bring composition-risk assessment + per-deployment red-teaming into scope."
620
+ },
621
+ {
622
+ "finding_id": "llm-tool-use-exfil-detected",
623
+ "framework": "iso-27001-2022",
624
+ "claimed_control": "A.5.16 + A.5.17",
625
+ "actual_gap": "Treats tool-auth as service authentication; does not address attacker-supplied content inducing the legitimate-auth service.",
626
+ "required_control": "Extend identity controls to include intent attribution: every service call should carry an attestation of the upstream user-intent it serves."
627
+ },
628
+ {
629
+ "finding_id": "llm-tool-use-exfil-detected",
630
+ "framework": "soc2",
631
+ "claimed_control": "CC6.7 — Transmission of data",
632
+ "actual_gap": "Authorised tools + authorised destinations satisfy CC6.7; agent-mediated exfil invisible.",
633
+ "required_control": "Add agent-call provenance + payload-entropy inspection to the CC6.7 evidence package."
634
+ }
635
+ ],
636
+ "escalation_criteria": [
637
+ {
638
+ "condition": "rwep >= 85 AND blast_radius_score >= 4",
639
+ "action": "page_on_call"
640
+ },
641
+ {
642
+ "condition": "credential-shadow-in-tool-args == fired",
643
+ "action": "trigger_playbook",
644
+ "target_playbook": "secrets"
645
+ },
646
+ {
647
+ "condition": "agent-egress-to-non-allowlisted-destination == fired AND blast_radius_score >= 3",
648
+ "action": "trigger_playbook",
649
+ "target_playbook": "ai-api"
650
+ },
651
+ {
652
+ "condition": "instruction-coercion-in-tool-response == fired AND finding.tool_surface == 'mcp'",
653
+ "action": "trigger_playbook",
654
+ "target_playbook": "mcp"
655
+ },
656
+ {
657
+ "condition": "compliance_theater_check.verdict == 'theater' AND jurisdiction_obligations contains 'EU'",
658
+ "action": "notify_legal"
659
+ }
660
+ ]
661
+ },
662
+ "validate": {
663
+ "remediation_paths": [
664
+ {
665
+ "id": "disable-auto-approve-on-high-impact-tools",
666
+ "description": "Remove auto-approve flags for any high-impact tool (file_write, shell_exec, network_post, mcp_call to non-allowlisted server). Migrate from `chat.tools.autoApprove: true` to per-tool approval with operator-defined criteria.",
667
+ "preconditions": [
668
+ "operator_admin_on_agent_config == true",
669
+ "developer_workflow_can_tolerate_approval_friction == true"
670
+ ],
671
+ "priority": 1,
672
+ "compensating_controls": [
673
+ "approval_decision_recorded_in_audit_log",
674
+ "approval_pattern_baseline_monitored"
675
+ ],
676
+ "estimated_time_hours": 4
677
+ },
678
+ {
679
+ "id": "introduce-trust-zone-segregation",
680
+ "description": "Refactor the agent's context-handling so that RAG-retrieved content + tool responses are clearly delineated as untrusted-data not instructions. Use explicit role-tagged context blocks (`<retrieved-content trust='untrusted'>...</retrieved-content>`) and instruct the model in the system prompt to treat untrusted-zone content as data only.",
681
+ "preconditions": [
682
+ "agent_code_is_operator_owned == true",
683
+ "model_supports_trust_zone_pattern == true"
684
+ ],
685
+ "priority": 2,
686
+ "compensating_controls": [
687
+ "trust_zone_test_battery_in_ci",
688
+ "system_prompt_versioned_in_source_control"
689
+ ],
690
+ "estimated_time_hours": 12
691
+ },
692
+ {
693
+ "id": "deploy-egress-payload-inspection",
694
+ "description": "Add egress-payload inspection (not just destination allowlist) for the agent's outbound surface. Flag high-entropy query parameters, base64 / hex segments > 64 bytes on non-API paths, and unusual content-length-to-destination patterns.",
695
+ "preconditions": [
696
+ "egress_inspection_capability_available == true",
697
+ "agent_egress_traffic_routable_through_inspection == true"
698
+ ],
699
+ "priority": 2,
700
+ "compensating_controls": [
701
+ "inspection_rules_recorded_in_iac",
702
+ "false_positive_baseline_established"
703
+ ],
704
+ "estimated_time_hours": 8
705
+ },
706
+ {
707
+ "id": "lock-down-rag-sources",
708
+ "description": "Restrict RAG ingestion to operator-owned sources OR external sources with integrity controls (hash-pinning, signed content, operator-side content review). Remove any source that is externally-writable without integrity controls.",
709
+ "preconditions": [
710
+ "operator_admin_on_rag_pipeline == true",
711
+ "alternative_content_sources_available_for_removed_externals == true"
712
+ ],
713
+ "priority": 2,
714
+ "compensating_controls": [
715
+ "rag_source_inventory_recorded",
716
+ "integrity_control_evidence_per_source"
717
+ ],
718
+ "estimated_time_hours": 8
719
+ },
720
+ {
721
+ "id": "policy-exception",
722
+ "description": "Generate auditor-ready policy exception when faster paths are blocked.",
723
+ "preconditions": [
724
+ "remediation_paths[1..4] partially blocked",
725
+ "ciso_acceptance_obtainable == true"
726
+ ],
727
+ "priority": 5,
728
+ "compensating_controls": [
729
+ "enhanced_transcript_review_daily",
730
+ "weekly_agent_red_team_battery"
731
+ ],
732
+ "estimated_time_hours": 6
733
+ }
734
+ ],
735
+ "validation_tests": [
736
+ {
737
+ "id": "indirect-injection-payload-blocked",
738
+ "test": "Place an operator-controlled indirect-injection payload in a RAG-source the agent reads. Confirm the agent does NOT execute the injected instruction OR — if it attempts — the approval gate denies AND the egress-inspection layer blocks the exfil.",
739
+ "expected_result": "Agent treats injected content as data; either does not attempt the call, approval gate denies, or egress inspection blocks.",
740
+ "test_type": "exploit_replay"
741
+ },
742
+ {
743
+ "id": "auto-approve-disabled-on-high-impact",
744
+ "test": "Inspect the deployed agent config for any auto-approve flag on high-impact tools. Confirm none remain.",
745
+ "expected_result": "No auto-approve flag on file_write, shell_exec, network_post, mcp_call to non-allowlisted server.",
746
+ "test_type": "functional"
747
+ },
748
+ {
749
+ "id": "egress-payload-inspection-fires",
750
+ "test": "Submit a synthetic agent request with a high-entropy base64 payload in a non-API query parameter. Confirm the inspection layer flags + blocks.",
751
+ "expected_result": "Inspection layer fires + blocks; audit log records the decision.",
752
+ "test_type": "negative"
753
+ },
754
+ {
755
+ "id": "rag-source-inventory-conformance",
756
+ "test": "Enumerate every RAG ingestion source. Confirm each appears on the allowlist with integrity-control evidence.",
757
+ "expected_result": "100% of RAG sources have documented integrity controls or are operator-owned.",
758
+ "test_type": "functional"
759
+ },
760
+ {
761
+ "id": "no-regression-on-legitimate-agent-workflows",
762
+ "test": "Run the operator's standard agent workflow suite (typical developer tasks, expected RAG retrievals). Confirm no regression — legitimate workflows complete without spurious denials.",
763
+ "expected_result": "Legitimate workflows complete; spurious-denial rate within baseline.",
764
+ "test_type": "regression"
765
+ }
766
+ ],
767
+ "residual_risk_statement": {
768
+ "risk": "Agentic-AI tool-abuse-via-prompt-injection residual risk that cannot be closed within the jurisdiction's notification window.",
769
+ "why_remains": "Either (a) trust-zone refactoring requires a model + tool architecture migration the team has not yet completed, (b) egress-payload inspection is not feasible for the agent's deployment environment (edge / mobile / customer-premise), OR (c) RAG sources include externally-writable surfaces with no available alternative.",
770
+ "acceptance_level": "ciso",
771
+ "compensating_controls_in_place": [
772
+ "enhanced_transcript_review_daily",
773
+ "weekly_agent_red_team_battery",
774
+ "approval_latency_baseline_monitored"
775
+ ]
776
+ },
777
+ "evidence_requirements": [
778
+ {
779
+ "evidence_type": "config_diff",
780
+ "description": "Diff of agent config showing auto-approve removal + trust-zone introduction + RAG-source allowlist tightening, plus the change-management approval reference.",
781
+ "retention_period": "audit_cycle",
782
+ "framework_satisfied": [
783
+ "eu-ai-act-art15",
784
+ "iso-27001-2022-A.5.16",
785
+ "soc2-cc6.7"
786
+ ]
787
+ },
788
+ {
789
+ "evidence_type": "exploit_replay_negative",
790
+ "description": "Indirect-injection-payload-blocked test results showing the agent does not execute injected instructions post-remediation.",
791
+ "retention_period": "1_year",
792
+ "framework_satisfied": [
793
+ "eu-ai-act-art15",
794
+ "soc2-cc6.7"
795
+ ]
796
+ },
797
+ {
798
+ "evidence_type": "scan_report",
799
+ "description": "Agent inventory + tool-allowlist + RAG-source inventory + egress-allowlist snapshot pre + post remediation.",
800
+ "retention_period": "1_year",
801
+ "framework_satisfied": [
802
+ "eu-ai-act-art15",
803
+ "iso-27001-2022-A.5.16"
804
+ ]
805
+ },
806
+ {
807
+ "evidence_type": "attestation",
808
+ "description": "Signed exceptd attestation file with evidence_hash, fired indicators, RWEP at detection, RWEP post-remediation, residual risk acceptance.",
809
+ "retention_period": "7_years",
810
+ "framework_satisfied": [
811
+ "nist-800-53-CA-7",
812
+ "iso-27001-2022-A.5.36",
813
+ "eu-ai-act-art15"
814
+ ]
815
+ }
816
+ ],
817
+ "regression_trigger": [
818
+ {
819
+ "condition": "new_agent_deployed == true",
820
+ "interval": "on_event"
821
+ },
822
+ {
823
+ "condition": "new_tool_added_to_allowlist == true",
824
+ "interval": "on_event"
825
+ },
826
+ {
827
+ "condition": "new_rag_source_added == true",
828
+ "interval": "on_event"
829
+ },
830
+ {
831
+ "condition": "model_provider_version_bump == true",
832
+ "interval": "on_event"
833
+ },
834
+ {
835
+ "condition": "weekly",
836
+ "interval": "7d"
837
+ }
838
+ ]
839
+ },
840
+ "close": {
841
+ "evidence_package": {
842
+ "bundle_format": "csaf-2.0",
843
+ "contents": [
844
+ "all_validation_tests_passed",
845
+ "agent_config_diff",
846
+ "tool_allowlist_pre_post",
847
+ "rag_source_inventory_pre_post",
848
+ "exploit_replay_negative",
849
+ "residual_risk_statement",
850
+ "framework_gap_mapping",
851
+ "compliance_theater_verdict",
852
+ "attestation"
853
+ ],
854
+ "destination": "local_only",
855
+ "signed": true
856
+ },
857
+ "learning_loop": {
858
+ "enabled": true,
859
+ "lesson_template": {
860
+ "attack_vector": "Agentic-AI tool abuse via prompt injection — $finding_class (e.g. auto-approve-on-high-impact-tool, instruction-coercion-in-tool-response, agent-egress-to-non-allowlisted-destination, credential-shadow-in-tool-args).",
861
+ "control_gap": "Per-model conformity + per-tool allowlist + per-destination egress allowlist all satisfied the literal controls, while the per-deployment composition of model + tools + RAG sources permitted exfil through legitimate-auth tools.",
862
+ "framework_gap": "NIST CSF 2.0 PR.DS-02 + PR.DS-10, EU AI Act Art.15 + Art.55, ISO 27001 A.5.16 + A.5.17, SOC 2 CC6.7, NIS2 Art.21(2)(e) collectively underspecify agentic-AI tool abuse. Per-model assessment, per-tool allowlist, per-destination egress allowlist all pass while the composition permits exfil.",
863
+ "new_control_requirement": "Per-deployment composition assessment for agentic systems: operator-specific tool-injection battery, trust-zone segregation between retrieved content + model instructions, egress-payload inspection (not just destination allowlist), agent-call provenance attribution linking each tool call to the user-conversation turn that authorised it."
864
+ },
865
+ "feeds_back_to_skills": [
866
+ "ai-attack-surface",
867
+ "rag-pipeline-security",
868
+ "mcp-agent-trust",
869
+ "ai-c2-detection",
870
+ "framework-gap-analysis",
871
+ "zeroday-gap-learn"
872
+ ]
873
+ },
874
+ "notification_actions": [
875
+ {
876
+ "obligation_ref": "EU/EU AI Act Art.73 (serious incident) 360h",
877
+ "deadline": "computed_at_runtime",
878
+ "recipient": "internal_legal",
879
+ "evidence_attached": [
880
+ "incident_description",
881
+ "ai_system_inventory",
882
+ "affected_users_estimate"
883
+ ],
884
+ "draft_notification": "EU AI Act Art.73 serious-incident notification: agentic-AI tool-abuse incident affecting AI system ${ai_system_inventory}. Fired indicators: ${fired_indicator_ids}. Affected users estimate: ${affected_users_estimate}. Containment in place: ${containment_record}. Full incident description attached."
885
+ },
886
+ {
887
+ "obligation_ref": "EU/GDPR Art.33 72h",
888
+ "deadline": "computed_at_runtime",
889
+ "recipient": "internal_legal",
890
+ "evidence_attached": [
891
+ "exfiltrated_data_categories",
892
+ "data_subject_impact_assessment",
893
+ "containment_record"
894
+ ],
895
+ "draft_notification": "GDPR Art.33 notification: personal data breach via agentic-AI tool-abuse exfiltration. Data categories: ${exfiltrated_data_categories}. Data-subject impact: ${data_subject_impact_assessment}. Containment: ${containment_record}."
896
+ },
897
+ {
898
+ "obligation_ref": "EU/NIS2 Art.23 24h",
899
+ "deadline": "computed_at_runtime",
900
+ "recipient": "internal_legal",
901
+ "evidence_attached": [
902
+ "exfiltration_scope_estimate",
903
+ "agent_tool_chain_inventory",
904
+ "containment_record"
905
+ ],
906
+ "draft_notification": "Initial NIS2 Art.23 24-hour early-warning notification: agentic-AI tool-abuse incident. Exfiltration scope estimate: ${exfiltration_scope_estimate}. Agent tool chain inventory: ${agent_tool_chain_inventory}. Containment in place: ${containment_record}. Full incident assessment to follow within 72 hours per Art.23(4)."
907
+ }
908
+ ],
909
+ "exception_generation": {
910
+ "trigger_condition": "remediation_blocked == true OR (trust_zone_refactor_eta > jurisdiction_window AND egress_payload_inspection_infeasible_in_deployment_environment == true)",
911
+ "exception_template": {
912
+ "scope": "Agentic-AI tool-abuse residual risk across ${affected_agent_count} deployed agent(s); remediation paths 1-4 partially blocked.",
913
+ "duration": "until_architecture_migration_or_30d",
914
+ "compensating_controls": [
915
+ "enhanced_transcript_review_daily",
916
+ "weekly_agent_red_team_battery",
917
+ "approval_latency_baseline_monitored",
918
+ "rag_source_external_writable_quarantine"
919
+ ],
920
+ "risk_acceptance_owner": "ciso",
921
+ "auditor_ready_language": "Pursuant to ${framework_id} ${control_id}, the organisation documents a time-bound risk acceptance for agentic-AI tool-abuse residual risk across ${affected_agent_count} deployed agent(s). Trust-zone refactor ETA: ${refactor_eta}. Egress-payload inspection feasibility: ${inspection_feasibility}. Compensating controls in place: ${compensating_controls}. Residual RWEP post-compensation: ${rwep_post_compensation}. Risk accepted by ${ciso_name} on ${acceptance_date}. Time-bound until ${duration_expiry}. The exception will be re-evaluated on (a) architecture migration completion, (b) the listed expiry date, OR (c) a new indicator firing under the manual-review compensating control — whichever is first."
922
+ }
923
+ },
924
+ "regression_schedule": {
925
+ "next_run": "computed_at_runtime",
926
+ "trigger": "both",
927
+ "notify_on_skip": true
928
+ }
929
+ }
930
+ },
931
+ "directives": [
932
+ {
933
+ "id": "all-deployed-agents-and-tool-surfaces",
934
+ "title": "Inventory + behaviour-test every deployed agent, tool surface, and RAG source",
935
+ "applies_to": {
936
+ "always": true
937
+ }
938
+ },
939
+ {
940
+ "id": "yolo-mode-class-sweep",
941
+ "title": "Targeted sweep for CVE-2025-53773 YOLO-mode-class auto-approve exposure",
942
+ "applies_to": {
943
+ "cve": "CVE-2025-53773"
944
+ },
945
+ "phase_overrides": {
946
+ "direct": {
947
+ "rwep_threshold": {
948
+ "escalate": 75,
949
+ "monitor": 50,
950
+ "close": 25
951
+ }
952
+ }
953
+ }
954
+ },
955
+ {
956
+ "id": "indirect-injection-via-rag-targeted",
957
+ "title": "Targeted indirect-injection-via-RAG sweep (AML.T0024 + AML.T0051 chained)",
958
+ "applies_to": {
959
+ "atlas_ttp": "AML.T0051"
960
+ }
961
+ }
962
+ ]
963
+ }