@blamejs/exceptd-skills 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/AGENTS.md +232 -0
  2. package/ARCHITECTURE.md +267 -0
  3. package/CHANGELOG.md +616 -0
  4. package/CONTEXT.md +203 -0
  5. package/LICENSE +200 -0
  6. package/NOTICE +82 -0
  7. package/README.md +307 -0
  8. package/SECURITY.md +73 -0
  9. package/agents/README.md +81 -0
  10. package/agents/report-generator.md +156 -0
  11. package/agents/skill-updater.md +102 -0
  12. package/agents/source-validator.md +119 -0
  13. package/agents/threat-researcher.md +149 -0
  14. package/bin/exceptd.js +183 -0
  15. package/data/_indexes/_meta.json +88 -0
  16. package/data/_indexes/activity-feed.json +362 -0
  17. package/data/_indexes/catalog-summaries.json +229 -0
  18. package/data/_indexes/chains.json +7135 -0
  19. package/data/_indexes/currency.json +359 -0
  20. package/data/_indexes/did-ladders.json +451 -0
  21. package/data/_indexes/frequency.json +2072 -0
  22. package/data/_indexes/handoff-dag.json +476 -0
  23. package/data/_indexes/jurisdiction-clocks.json +967 -0
  24. package/data/_indexes/jurisdiction-map.json +536 -0
  25. package/data/_indexes/recipes.json +319 -0
  26. package/data/_indexes/section-offsets.json +3656 -0
  27. package/data/_indexes/stale-content.json +14 -0
  28. package/data/_indexes/summary-cards.json +1736 -0
  29. package/data/_indexes/theater-fingerprints.json +381 -0
  30. package/data/_indexes/token-budget.json +2137 -0
  31. package/data/_indexes/trigger-table.json +1374 -0
  32. package/data/_indexes/xref.json +818 -0
  33. package/data/atlas-ttps.json +282 -0
  34. package/data/cve-catalog.json +496 -0
  35. package/data/cwe-catalog.json +1017 -0
  36. package/data/d3fend-catalog.json +738 -0
  37. package/data/dlp-controls.json +1039 -0
  38. package/data/exploit-availability.json +67 -0
  39. package/data/framework-control-gaps.json +1255 -0
  40. package/data/global-frameworks.json +2913 -0
  41. package/data/rfc-references.json +324 -0
  42. package/data/zeroday-lessons.json +377 -0
  43. package/keys/public.pem +3 -0
  44. package/lib/framework-gap.js +328 -0
  45. package/lib/job-queue.js +195 -0
  46. package/lib/lint-skills.js +536 -0
  47. package/lib/prefetch.js +372 -0
  48. package/lib/refresh-external.js +713 -0
  49. package/lib/schemas/cve-catalog.schema.json +151 -0
  50. package/lib/schemas/manifest.schema.json +106 -0
  51. package/lib/schemas/skill-frontmatter.schema.json +113 -0
  52. package/lib/scoring.js +149 -0
  53. package/lib/sign.js +197 -0
  54. package/lib/ttp-mapper.js +80 -0
  55. package/lib/validate-catalog-meta.js +198 -0
  56. package/lib/validate-cve-catalog.js +213 -0
  57. package/lib/validate-indexes.js +83 -0
  58. package/lib/validate-package.js +162 -0
  59. package/lib/validate-vendor.js +85 -0
  60. package/lib/verify.js +216 -0
  61. package/lib/worker-pool.js +84 -0
  62. package/manifest-snapshot.json +1833 -0
  63. package/manifest.json +2108 -0
  64. package/orchestrator/README.md +124 -0
  65. package/orchestrator/dispatcher.js +140 -0
  66. package/orchestrator/event-bus.js +146 -0
  67. package/orchestrator/index.js +874 -0
  68. package/orchestrator/pipeline.js +201 -0
  69. package/orchestrator/scanner.js +327 -0
  70. package/orchestrator/scheduler.js +137 -0
  71. package/package.json +113 -0
  72. package/sbom.cdx.json +158 -0
  73. package/scripts/audit-cross-skill.js +261 -0
  74. package/scripts/audit-perf.js +160 -0
  75. package/scripts/bootstrap.js +205 -0
  76. package/scripts/build-indexes.js +721 -0
  77. package/scripts/builders/activity-feed.js +79 -0
  78. package/scripts/builders/catalog-summaries.js +67 -0
  79. package/scripts/builders/currency.js +109 -0
  80. package/scripts/builders/cwe-chains.js +105 -0
  81. package/scripts/builders/did-ladders.js +149 -0
  82. package/scripts/builders/frequency.js +89 -0
  83. package/scripts/builders/jurisdiction-clocks.js +126 -0
  84. package/scripts/builders/recipes.js +159 -0
  85. package/scripts/builders/section-offsets.js +162 -0
  86. package/scripts/builders/stale-content.js +171 -0
  87. package/scripts/builders/summary-cards.js +166 -0
  88. package/scripts/builders/theater-fingerprints.js +198 -0
  89. package/scripts/builders/token-budget.js +96 -0
  90. package/scripts/check-manifest-snapshot.js +217 -0
  91. package/scripts/predeploy.js +267 -0
  92. package/scripts/refresh-manifest-snapshot.js +57 -0
  93. package/scripts/refresh-sbom.js +222 -0
  94. package/skills/age-gates-child-safety/skill.md +456 -0
  95. package/skills/ai-attack-surface/skill.md +282 -0
  96. package/skills/ai-c2-detection/skill.md +440 -0
  97. package/skills/ai-risk-management/skill.md +311 -0
  98. package/skills/api-security/skill.md +287 -0
  99. package/skills/attack-surface-pentest/skill.md +381 -0
  100. package/skills/cloud-security/skill.md +384 -0
  101. package/skills/compliance-theater/skill.md +365 -0
  102. package/skills/container-runtime-security/skill.md +379 -0
  103. package/skills/coordinated-vuln-disclosure/skill.md +473 -0
  104. package/skills/defensive-countermeasure-mapping/skill.md +300 -0
  105. package/skills/dlp-gap-analysis/skill.md +337 -0
  106. package/skills/email-security-anti-phishing/skill.md +206 -0
  107. package/skills/exploit-scoring/skill.md +331 -0
  108. package/skills/framework-gap-analysis/skill.md +374 -0
  109. package/skills/fuzz-testing-strategy/skill.md +313 -0
  110. package/skills/global-grc/skill.md +564 -0
  111. package/skills/identity-assurance/skill.md +272 -0
  112. package/skills/incident-response-playbook/skill.md +546 -0
  113. package/skills/kernel-lpe-triage/skill.md +303 -0
  114. package/skills/mcp-agent-trust/skill.md +326 -0
  115. package/skills/mlops-security/skill.md +325 -0
  116. package/skills/ot-ics-security/skill.md +340 -0
  117. package/skills/policy-exception-gen/skill.md +437 -0
  118. package/skills/pqc-first/skill.md +546 -0
  119. package/skills/rag-pipeline-security/skill.md +294 -0
  120. package/skills/researcher/skill.md +310 -0
  121. package/skills/sector-energy/skill.md +409 -0
  122. package/skills/sector-federal-government/skill.md +302 -0
  123. package/skills/sector-financial/skill.md +398 -0
  124. package/skills/sector-healthcare/skill.md +373 -0
  125. package/skills/security-maturity-tiers/skill.md +464 -0
  126. package/skills/skill-update-loop/skill.md +463 -0
  127. package/skills/supply-chain-integrity/skill.md +318 -0
  128. package/skills/threat-model-currency/skill.md +404 -0
  129. package/skills/threat-modeling-methodology/skill.md +312 -0
  130. package/skills/webapp-security/skill.md +281 -0
  131. package/skills/zeroday-gap-learn/skill.md +350 -0
  132. package/vendor/blamejs/LICENSE +201 -0
  133. package/vendor/blamejs/README.md +54 -0
  134. package/vendor/blamejs/_PROVENANCE.json +54 -0
  135. package/vendor/blamejs/retry.js +335 -0
  136. package/vendor/blamejs/worker-pool.js +418 -0
@@ -0,0 +1,1039 @@
1
+ {
2
+ "_meta": {
3
+ "schema_version": "1.0.0",
4
+ "last_updated": "2026-05-11",
5
+ "skill_refs_field": "dlp_refs",
6
+ "note": "DLP controls relevant to mid-2026 threat reality. Legacy controls (email/web/USB) are kept because compliance frameworks still cite them; modern controls (LLM prompt, MCP tool-arg, embedding-store) are the actual operational gap. The dlp-gap-analysis skill consumes this catalog.",
7
+ "tlp": "CLEAR",
8
+ "source_confidence": {
9
+ "scheme": "Admiralty (A-F + 1-6)",
10
+ "default": "B2",
11
+ "note": "B = usually reliable; 2 = probably true. Per-entry overrides via entry-level source_confidence field. Public-record catalogs (NVD, ATLAS, CWE, RFC, framework publishers) get A1 (completely reliable, confirmed). Project-curated catalogs (zeroday-lessons, exploit-availability) default to B2 with source citations."
12
+ },
13
+ "freshness_policy": {
14
+ "default_review_cadence_days": 90,
15
+ "stale_after_days": 180,
16
+ "rebuild_after_days": 365,
17
+ "note": "Per-entry last_verified governs decay. Skills depending on this catalog must check entry freshness before high-stakes use."
18
+ }
19
+ },
20
+ "DLP-CHAN-EMAIL-OUT": {
21
+ "id": "DLP-CHAN-EMAIL-OUT",
22
+ "name": "Outbound email DLP",
23
+ "category": "channel",
24
+ "modern_or_legacy": "legacy",
25
+ "description": "Inspection of outbound SMTP/Exchange/M365/Workspace mail flow for content matching protected classes (PII, PHI, PCI, source code, classified markings). The canonical DLP channel since the early 2000s.",
26
+ "covers": [
27
+ "outbound corporate email body and attachments",
28
+ "BCC/forward to personal addresses",
29
+ "auto-forwarding rules"
30
+ ],
31
+ "does_not_cover": [
32
+ "mail sent from unmanaged personal accounts on managed endpoints",
33
+ "encrypted attachments without key escrow",
34
+ "content typed into LLM web UIs"
35
+ ],
36
+ "attack_techniques_addressed": [
37
+ "T1048.003",
38
+ "T1567.002"
39
+ ],
40
+ "framework_controls_partially_mapping": [
41
+ "NIST-800-53-AC-4",
42
+ "NIST-800-53-SC-7",
43
+ "ISO-27001-2022-A.8.12",
44
+ "PCI-DSS-4.0-3.4",
45
+ "GDPR-Art32"
46
+ ],
47
+ "vendor_categories": [
48
+ "Microsoft Purview DLP",
49
+ "Google Workspace DLP",
50
+ "Symantec DLP",
51
+ "Forcepoint DLP",
52
+ "Proofpoint Information Protection"
53
+ ],
54
+ "ai_pipeline_applicability": "Not applicable as a primary control point. Email DLP sees prompts only if a user emails them; the LLM channel bypasses it entirely.",
55
+ "lag_notes": "Frameworks treat email as the canonical egress channel. In mid-2026 telemetry, LLM-prompt egress outpaces email egress for sensitive content in dev-heavy orgs.",
56
+ "evidence_examples": [
57
+ "Purview DLP policy hit logs",
58
+ "Symantec DLP incident records with SMTP channel tag"
59
+ ],
60
+ "privacy_regimes": [
61
+ "GDPR",
62
+ "CCPA",
63
+ "LGPD",
64
+ "PIPEDA",
65
+ "POPIA"
66
+ ],
67
+ "last_verified": "2026-05-11"
68
+ },
69
+ "DLP-CHAN-WEB-UPLOAD": {
70
+ "id": "DLP-CHAN-WEB-UPLOAD",
71
+ "name": "Web upload / HTTP(S) egress DLP",
72
+ "category": "channel",
73
+ "modern_or_legacy": "legacy",
74
+ "description": "Proxy or endpoint inspection of HTTP(S) file uploads to web destinations (file-sharing, webmail, social, generic POST). Implemented in SWG / SASE / CASB.",
75
+ "covers": [
76
+ "multipart/form-data file uploads",
77
+ "POST bodies to known categories (file share, paste sites)",
78
+ "managed-browser uploads"
79
+ ],
80
+ "does_not_cover": [
81
+ "uploads inside TLS-pinned native apps without endpoint agent",
82
+ "content pasted into web LLM UIs (technically a POST, but classification rarely tuned for prompt text)",
83
+ "WebRTC data channels"
84
+ ],
85
+ "attack_techniques_addressed": [
86
+ "T1567",
87
+ "T1041"
88
+ ],
89
+ "framework_controls_partially_mapping": [
90
+ "NIST-800-53-SC-7",
91
+ "NIST-800-53-AC-4",
92
+ "ISO-27001-2022-A.8.20",
93
+ "ISO-27001-2022-A.8.23"
94
+ ],
95
+ "vendor_categories": [
96
+ "Secure Web Gateway (Zscaler, Netskope, iboss, Cloudflare Gateway)",
97
+ "CASB (Microsoft Defender for Cloud Apps, Netskope CASB)",
98
+ "Endpoint DLP (Trellix, Forcepoint)"
99
+ ],
100
+ "ai_pipeline_applicability": "Partial. SWG/CASB can see POSTs to chat.openai.com / claude.ai / gemini.google.com, but content classification was tuned for file uploads, not free-form prompt text — high false-negative rate on prompts.",
101
+ "lag_notes": "NIST AC-4 and ISO A.8.23 treat 'web' as a uniform channel. In practice the LLM-prompt sub-channel needs distinct classification logic.",
102
+ "evidence_examples": [
103
+ "SWG block logs by URL category 'Generative AI'",
104
+ "CASB shadow-IT discovery reports with AI-tool tag"
105
+ ],
106
+ "privacy_regimes": [
107
+ "GDPR",
108
+ "CCPA",
109
+ "LGPD"
110
+ ],
111
+ "last_verified": "2026-05-11"
112
+ },
113
+ "DLP-CHAN-USB-REMOVABLE": {
114
+ "id": "DLP-CHAN-USB-REMOVABLE",
115
+ "name": "Removable media / USB DLP",
116
+ "category": "channel",
117
+ "modern_or_legacy": "legacy",
118
+ "description": "Endpoint control over data written to USB mass storage, MTP devices, and other removable media. Block, read-only, or content-classify-then-allow.",
119
+ "covers": [
120
+ "USB mass-storage write",
121
+ "MTP transfers to phones",
122
+ "SD cards, optical media"
123
+ ],
124
+ "does_not_cover": [
125
+ "device-to-device wireless (AirDrop, Quick Share)",
126
+ "tethered phone hotspot exfil",
127
+ "any AI-pipeline channel"
128
+ ],
129
+ "attack_techniques_addressed": [
130
+ "T1052.001",
131
+ "T1025"
132
+ ],
133
+ "framework_controls_partially_mapping": [
134
+ "NIST-800-53-MP-7",
135
+ "NIST-800-53-SC-41",
136
+ "ISO-27001-2022-A.7.10",
137
+ "PCI-DSS-4.0-9.4.6"
138
+ ],
139
+ "vendor_categories": [
140
+ "Endpoint DLP (Microsoft Purview Endpoint DLP, Trellix DLP Endpoint, Forcepoint DLP Endpoint)",
141
+ "EDR with device-control (CrowdStrike Falcon Device Control, SentinelOne)"
142
+ ],
143
+ "ai_pipeline_applicability": "Not applicable. USB controls are orthogonal to AI egress.",
144
+ "lag_notes": "PCI-DSS 9.4.6 still explicitly cites removable media. Coverage of this channel is required for paper compliance even though it is a shrinking fraction of real exfil.",
145
+ "evidence_examples": [
146
+ "Endpoint DLP USB block events",
147
+ "Device control allow-list configuration"
148
+ ],
149
+ "privacy_regimes": [
150
+ "GDPR",
151
+ "CCPA",
152
+ "HIPAA"
153
+ ],
154
+ "last_verified": "2026-05-11"
155
+ },
156
+ "DLP-CHAN-LLM-PROMPT": {
157
+ "id": "DLP-CHAN-LLM-PROMPT",
158
+ "name": "LLM prompt-text egress detection",
159
+ "category": "channel",
160
+ "modern_or_legacy": "modern",
161
+ "description": "Inspection of free-form text submitted to LLM endpoints (Anthropic, OpenAI, Google, Meta, third-party hosted, on-prem) for protected content. Implemented at SDK, proxy, or browser-isolation layer.",
162
+ "covers": [
163
+ "text typed or pasted into LLM web/desktop UIs",
164
+ "API prompts sent via SDK",
165
+ "system-prompt leakage via prompt injection responses"
166
+ ],
167
+ "does_not_cover": [
168
+ "binary attachments to LLM (separate control)",
169
+ "RAG retrievals embedded in context (separate control)",
170
+ "MCP tool arguments (separate control)"
171
+ ],
172
+ "attack_techniques_addressed": [
173
+ "AML.T0024",
174
+ "AML.T0051",
175
+ "AML.T0057"
176
+ ],
177
+ "framework_controls_partially_mapping": [
178
+ "NIST-800-53-AC-4",
179
+ "NIST-800-53-SC-7",
180
+ "NIST-AI-RMF-MAP-4.1",
181
+ "ISO-27001-2022-A.8.12",
182
+ "ISO-42001-A.6.2.4",
183
+ "EU-AI-Act-Art15"
184
+ ],
185
+ "vendor_categories": [
186
+ "AI-aware DLP (Nightfall, Microsoft Purview AI Hub, Symantec DLP 2026, Forcepoint AI Security, Netskope GenAI)",
187
+ "Browser-isolation prompt inspection (Island, Talon, Menlo Security)",
188
+ "LLM gateway with policy (Portkey, LiteLLM-proxy, Cloudflare AI Gateway)"
189
+ ],
190
+ "ai_pipeline_applicability": "Primary control point. Without SDK-level prompt logging or an LLM gateway, this control cannot detect at all — TLS-pinned native apps bypass network DLP.",
191
+ "lag_notes": "Legacy frameworks (NIST SC-7, AC-4; ISO A.8.12) assume the egress channel set is email/web/USB. The LLM prompt is a fourth channel with no existing framework requirement. EU AI Act Art 15 cites robustness but not DLP. ISO/IEC 42001:2023 A.6.2.4 references information classification for AI but is not prescriptive on prompt-egress controls.",
192
+ "evidence_examples": [
193
+ "SDK-level prompt logging on Anthropic / OpenAI SDKs",
194
+ "LLM gateway audit logs (Portkey, Cloudflare AI Gateway)",
195
+ "Microsoft Defender for Cloud Apps AI policies with prompt-content alerts"
196
+ ],
197
+ "privacy_regimes": [
198
+ "GDPR (Art 5 minimisation, Art 32 security)",
199
+ "CCPA/CPRA",
200
+ "LGPD",
201
+ "EU AI Act Art 10 (data governance)"
202
+ ],
203
+ "last_verified": "2026-05-11"
204
+ },
205
+ "DLP-CHAN-LLM-CONTEXT": {
206
+ "id": "DLP-CHAN-LLM-CONTEXT",
207
+ "name": "LLM context-window egress detection",
208
+ "category": "channel",
209
+ "modern_or_legacy": "modern",
210
+ "description": "Inspection of non-prompt content placed into the model context: file attachments, RAG retrievals, tool outputs, document summarization inputs. Distinct from the user-prompt channel because content origin and classification path differ.",
211
+ "covers": [
212
+ "file attachments to LLM (PDF, code, images with embedded text)",
213
+ "RAG retrievals injected into context",
214
+ "tool/function outputs returned to the model",
215
+ "long-document summarization payloads"
216
+ ],
217
+ "does_not_cover": [
218
+ "the user-typed prompt itself (separate control)",
219
+ "model output / response data (separate control)"
220
+ ],
221
+ "attack_techniques_addressed": [
222
+ "AML.T0024",
223
+ "AML.T0048",
224
+ "AML.T0057"
225
+ ],
226
+ "framework_controls_partially_mapping": [
227
+ "NIST-800-53-AC-4",
228
+ "NIST-800-53-SI-12",
229
+ "NIST-AI-RMF-MEASURE-2.10",
230
+ "ISO-27001-2022-A.5.34",
231
+ "ISO-42001-A.7.4",
232
+ "EU-AI-Act-Art10"
233
+ ],
234
+ "vendor_categories": [
235
+ "AI-aware DLP (Microsoft Purview AI Hub, Nightfall AI, Forcepoint AI Security)",
236
+ "RAG-aware gateways (Portkey, Cloudflare AI Gateway with content rules)",
237
+ "Document-classification engines feeding RAG (Microsoft Purview Information Protection labels propagated to RAG corpus)"
238
+ ],
239
+ "ai_pipeline_applicability": "Primary control point for RAG-heavy deployments. Without content classification at retrieval time, the RAG layer becomes a confused-deputy egress channel — sensitive corpus content can be returned to under-cleared users.",
240
+ "lag_notes": "ISO A.5.34 covers privacy in PII processing but does not contemplate retrieval-time classification enforcement. NIST AI-RMF MEASURE 2.10 references information integrity but not retrieval-time egress filtering.",
241
+ "evidence_examples": [
242
+ "RAG retrieval audit logs with sensitivity labels",
243
+ "Purview labels visible on documents in vector store",
244
+ "Retrieval-time access decisions logged with user clearance vs. label"
245
+ ],
246
+ "privacy_regimes": [
247
+ "GDPR (Art 5, Art 32)",
248
+ "CCPA/CPRA",
249
+ "LGPD",
250
+ "HIPAA (where PHI enters RAG)"
251
+ ],
252
+ "last_verified": "2026-05-11"
253
+ },
254
+ "DLP-CHAN-MCP-TOOL-ARG": {
255
+ "id": "DLP-CHAN-MCP-TOOL-ARG",
256
+ "name": "MCP tool-call argument DLP",
257
+ "category": "channel",
258
+ "modern_or_legacy": "modern",
259
+ "description": "Inspection of arguments passed by an LLM-driven agent to an MCP (Model Context Protocol) tool call. The arguments may contain protected content extracted from the model context and routed to a third-party tool (search, file write, HTTP fetch).",
260
+ "covers": [
261
+ "tool-call argument payloads (JSON) sent to MCP servers",
262
+ "URL parameters in tool-initiated HTTP fetches",
263
+ "filesystem write paths and content"
264
+ ],
265
+ "does_not_cover": [
266
+ "the prompt that produced the tool call (separate control)",
267
+ "MCP server-side response data (separate control)",
268
+ "non-MCP plugin frameworks (OpenAI plugins, custom function-calling) — same idea, different implementation"
269
+ ],
270
+ "attack_techniques_addressed": [
271
+ "AML.T0051",
272
+ "AML.T0053",
273
+ "AML.T0010"
274
+ ],
275
+ "framework_controls_partially_mapping": [
276
+ "NIST-800-53-AC-4",
277
+ "NIST-800-53-CA-3",
278
+ "NIST-AI-RMF-GOVERN-1.3",
279
+ "ISO-27001-2022-A.5.14",
280
+ "ISO-42001-A.6.2.6"
281
+ ],
282
+ "vendor_categories": [
283
+ "MCP gateway / proxy with content inspection (emerging — Portkey MCP support, Anthropic enterprise MCP gateway preview)",
284
+ "Agent observability platforms (LangSmith, Helicone, Langfuse) with policy plugins",
285
+ "AI-aware DLP applied at the LLM gateway layer (Cloudflare AI Gateway with tool-arg rules)"
286
+ ],
287
+ "ai_pipeline_applicability": "Primary control point for agentic workflows. Tool calls are the agent's egress hands; uninspected tool-arg traffic is the modern equivalent of unauthenticated SMTP relay. Per CVE-2026-30615, the MCP trust surface is also an inbound vector.",
288
+ "lag_notes": "No major framework names MCP or tool-call argument inspection as a control. ISO/IEC 42001 references AI system component governance abstractly. The control category is operationally required but compliance-invisible — DR-1 risk.",
289
+ "evidence_examples": [
290
+ "MCP gateway audit logs with tool-name + arg-hash",
291
+ "Agent observability traces (LangSmith / Langfuse) with arg content",
292
+ "LLM-gateway tool-call policy hits"
293
+ ],
294
+ "privacy_regimes": [
295
+ "GDPR",
296
+ "CCPA/CPRA",
297
+ "LGPD"
298
+ ],
299
+ "last_verified": "2026-05-11"
300
+ },
301
+ "DLP-CHAN-CLIPBOARD-AI": {
302
+ "id": "DLP-CHAN-CLIPBOARD-AI",
303
+ "name": "Clipboard-to-AI-tool egress detection",
304
+ "category": "channel",
305
+ "modern_or_legacy": "modern",
306
+ "description": "Detection of clipboard content being pasted into AI tool surfaces (LLM web UIs, IDE AI assistants, AI-enabled desktop apps). Implemented at endpoint or via managed-browser hooks.",
307
+ "covers": [
308
+ "paste into chat.openai.com / claude.ai / gemini.google.com / copilot.microsoft.com",
309
+ "paste into IDE AI sidebars (Cursor, Windsurf, Copilot Chat)",
310
+ "paste into ChatGPT desktop app, Claude desktop app"
311
+ ],
312
+ "does_not_cover": [
313
+ "clipboard content used outside AI tools (general clipboard DLP, separate control)",
314
+ "drag-and-drop into AI tools (file-handling path)"
315
+ ],
316
+ "attack_techniques_addressed": [
317
+ "AML.T0024",
318
+ "T1056.004"
319
+ ],
320
+ "framework_controls_partially_mapping": [
321
+ "NIST-800-53-AC-4",
322
+ "NIST-800-53-SC-7",
323
+ "ISO-27001-2022-A.8.12"
324
+ ],
325
+ "vendor_categories": [
326
+ "Endpoint DLP with clipboard awareness (Microsoft Purview Endpoint DLP, Forcepoint DLP Endpoint, Trellix DLP)",
327
+ "Managed enterprise browser (Island, Talon, Chrome Enterprise Premium, Edge for Business with policy)",
328
+ "Endpoint AI guardrails (Polymer, Zscaler AI Protection)"
329
+ ],
330
+ "ai_pipeline_applicability": "Primary control point for unmanaged-LLM use. Paste is the dominant ingress path for prompt content; SDK logging doesn't see it because it happens before the LLM call.",
331
+ "lag_notes": "Frameworks have no clipboard-specific control. ISO A.8.12 covers data leakage generally; clipboard→AI is a sub-channel that needs explicit instrumentation.",
332
+ "evidence_examples": [
333
+ "Endpoint DLP clipboard-paste event with destination process name",
334
+ "Managed-browser paste-blocked events on Generative AI URL category"
335
+ ],
336
+ "privacy_regimes": [
337
+ "GDPR",
338
+ "CCPA/CPRA",
339
+ "LGPD"
340
+ ],
341
+ "last_verified": "2026-05-11"
342
+ },
343
+ "DLP-CHAN-CODE-COMPLETION": {
344
+ "id": "DLP-CHAN-CODE-COMPLETION",
345
+ "name": "Code-completion context exfiltration DLP",
346
+ "category": "channel",
347
+ "modern_or_legacy": "modern",
348
+ "description": "Inspection or suppression of source code, secrets, and proprietary content sent to AI code-completion services (GitHub Copilot, Cursor, Codeium, Tabnine, Amazon Q Developer) as completion context.",
349
+ "covers": [
350
+ "surrounding-file context windows shipped with completion requests",
351
+ "secrets present in source that get included in context",
352
+ "proprietary algorithms / IP shipped as completion context"
353
+ ],
354
+ "does_not_cover": [
355
+ "user-typed chat to the assistant (separate prompt-egress control)",
356
+ "training-time data ingestion by the vendor (contractual control, not technical)"
357
+ ],
358
+ "attack_techniques_addressed": [
359
+ "AML.T0024",
360
+ "AML.T0048",
361
+ "T1213.003"
362
+ ],
363
+ "framework_controls_partially_mapping": [
364
+ "NIST-800-53-AC-4",
365
+ "NIST-800-53-SC-7",
366
+ "NIST-800-53-SA-15",
367
+ "ISO-27001-2022-A.8.12",
368
+ "ISO-27001-2022-A.8.28",
369
+ "EU-AI-Act-Art10"
370
+ ],
371
+ "vendor_categories": [
372
+ "Code-assistant vendor enterprise controls (GitHub Copilot Business/Enterprise content exclusions, Cursor Privacy Mode, Codeium enterprise context-filtering)",
373
+ "Pre-commit / pre-context secret scanners (gitleaks, trufflehog, GitHub secret scanning) feeding exclusion lists",
374
+ "AI-aware DLP at the network egress (Nightfall for code, Polymer)"
375
+ ],
376
+ "ai_pipeline_applicability": "Primary control point. Once context leaves the IDE, no downstream DLP sees it. Vendor-side exclusions are the only practical control unless the assistant is self-hosted.",
377
+ "lag_notes": "NIST SA-15 (development process security) and ISO A.8.28 (secure coding) predate AI code assistants and do not address vendor-side context retention. Frameworks treat this as a 'third-party data sharing' question, which is contractual, not technical.",
378
+ "evidence_examples": [
379
+ "Copilot Business content-exclusion policy with file globs",
380
+ "Cursor Privacy Mode org setting confirmation",
381
+ "Pre-commit secret-scan blocks preventing secrets entering repo (preventive)"
382
+ ],
383
+ "privacy_regimes": [
384
+ "GDPR (where source contains personal data)",
385
+ "trade-secret regimes (US DTSA, EU Trade Secrets Directive 2016/943)"
386
+ ],
387
+ "last_verified": "2026-05-11"
388
+ },
389
+ "DLP-CHAN-IDE-TELEMETRY": {
390
+ "id": "DLP-CHAN-IDE-TELEMETRY",
391
+ "name": "IDE / dev-tool telemetry DLP",
392
+ "category": "channel",
393
+ "modern_or_legacy": "modern",
394
+ "description": "Control over telemetry emitted by IDEs and dev tools (VS Code, JetBrains, Visual Studio, terminal tools) that may include file paths, repository names, error payloads containing snippets, and crash dumps with memory content.",
395
+ "covers": [
396
+ "IDE crash dumps and error reports",
397
+ "extension marketplace telemetry",
398
+ "AI-extension usage telemetry with prompt previews",
399
+ "terminal command telemetry in instrumented shells"
400
+ ],
401
+ "does_not_cover": [
402
+ "primary AI assistant context (separate control DLP-CHAN-CODE-COMPLETION)",
403
+ "git push to vendor-hosted repos (covered by source-control egress controls)"
404
+ ],
405
+ "attack_techniques_addressed": [
406
+ "T1213.003",
407
+ "T1119"
408
+ ],
409
+ "framework_controls_partially_mapping": [
410
+ "NIST-800-53-AC-4",
411
+ "NIST-800-53-SI-12",
412
+ "ISO-27001-2022-A.8.16",
413
+ "ISO-27001-2022-A.5.34"
414
+ ],
415
+ "vendor_categories": [
416
+ "IDE policy management (JetBrains policy server, VS Code enterprise policy, Visual Studio enterprise admin)",
417
+ "Extension allow-list and telemetry-suppression GPO/MDM",
418
+ "Network egress controls suppressing telemetry endpoints (typically SWG)"
419
+ ],
420
+ "ai_pipeline_applicability": "Partial. AI extensions emit telemetry separate from their primary inference traffic; suppressing this catches secondary leakage paths but not the primary prompt/context channels.",
421
+ "lag_notes": "ISO A.5.34 and GDPR Art 25 (data-protection by design) require minimisation; vendor telemetry defaults rarely meet this without configuration.",
422
+ "evidence_examples": [
423
+ "GPO/MDM telemetry settings deployed",
424
+ "SWG block rules on vendor telemetry domains",
425
+ "JetBrains data-sharing-disabled policy confirmation"
426
+ ],
427
+ "privacy_regimes": [
428
+ "GDPR (Art 5 minimisation, Art 25 PbD)",
429
+ "CCPA/CPRA"
430
+ ],
431
+ "last_verified": "2026-05-11"
432
+ },
433
+ "DLP-CLASS-REGEX-PII": {
434
+ "id": "DLP-CLASS-REGEX-PII",
435
+ "name": "Regex / keyword / dictionary classification",
436
+ "category": "classification",
437
+ "modern_or_legacy": "legacy",
438
+ "description": "Pattern-based content classification (regex, exact-data-match, keyword lists, document fingerprinting). Required by every prescriptive privacy regime that names specific data types.",
439
+ "covers": [
440
+ "structured PII (SSN, IBAN, credit card with Luhn, passport)",
441
+ "PHI identifiers (NPI, MRN, DEA)",
442
+ "PCI cardholder data",
443
+ "exact-data-match for customer DBs"
444
+ ],
445
+ "does_not_cover": [
446
+ "semantically equivalent paraphrases",
447
+ "obfuscated content (base64, leetspeak, ROT)",
448
+ "unstructured trade-secret content"
449
+ ],
450
+ "attack_techniques_addressed": [
451
+ "T1048",
452
+ "T1567",
453
+ "T1213"
454
+ ],
455
+ "framework_controls_partially_mapping": [
456
+ "PCI-DSS-4.0-3.4",
457
+ "HIPAA-164.312(e)",
458
+ "GDPR-Art32",
459
+ "NIST-800-53-SI-12",
460
+ "ISO-27001-2022-A.8.12"
461
+ ],
462
+ "vendor_categories": [
463
+ "Every commercial DLP (Microsoft Purview, Symantec, Forcepoint, Trellix, Proofpoint, Netskope, Zscaler)",
464
+ "Open-source (Apache Tika + custom regex, presidio from Microsoft)"
465
+ ],
466
+ "ai_pipeline_applicability": "Partial. Regex on LLM prompts catches structured identifiers but misses the dominant AI-pipeline risk (unstructured proprietary content). Necessary but insufficient.",
467
+ "lag_notes": "PCI-DSS and HIPAA still require pattern-based detection. Frameworks treat regex coverage as table stakes; it remains required for compliance even though it is the weakest classifier.",
468
+ "evidence_examples": [
469
+ "DLP policy with PCI cardholder-data classifier",
470
+ "Presidio analyzer config in prompt-inspection pipeline"
471
+ ],
472
+ "privacy_regimes": [
473
+ "GDPR",
474
+ "CCPA/CPRA",
475
+ "LGPD",
476
+ "PIPEDA",
477
+ "POPIA",
478
+ "HIPAA",
479
+ "PCI-DSS"
480
+ ],
481
+ "last_verified": "2026-05-11"
482
+ },
483
+ "DLP-CLASS-ML-CLASSIFIER": {
484
+ "id": "DLP-CLASS-ML-CLASSIFIER",
485
+ "name": "ML-based content classification",
486
+ "category": "classification",
487
+ "modern_or_legacy": "modern",
488
+ "description": "Supervised or zero-shot ML classifiers that label content (financial, legal, source-code, M&A-confidential) without explicit pattern rules. Used to catch unstructured proprietary content regex cannot match.",
489
+ "covers": [
490
+ "unstructured proprietary content categories",
491
+ "intent-based classifications ('this looks like an M&A document')",
492
+ "code vs. natural language discrimination"
493
+ ],
494
+ "does_not_cover": [
495
+ "adversarial-paraphrase bypasses",
496
+ "novel content classes the model wasn't trained on",
497
+ "low-resource languages with sparse training data"
498
+ ],
499
+ "attack_techniques_addressed": [
500
+ "AML.T0024",
501
+ "T1567",
502
+ "T1213"
503
+ ],
504
+ "framework_controls_partially_mapping": [
505
+ "NIST-800-53-SI-12",
506
+ "NIST-AI-RMF-MEASURE-2.7",
507
+ "ISO-27001-2022-A.8.12",
508
+ "ISO-42001-A.7.4",
509
+ "EU-AI-Act-Art15"
510
+ ],
511
+ "vendor_categories": [
512
+ "AI-classifier DLP (Microsoft Purview trainable classifiers, Nightfall AI ML detectors, Symantec DLP ML, Forcepoint Risk-Adaptive Protection)",
513
+ "Custom (sentence-transformers + scikit-learn / lightgbm pipelines)"
514
+ ],
515
+ "ai_pipeline_applicability": "Primary control point for unstructured AI-pipeline egress. The only practical answer for 'is this prompt revealing our roadmap?'",
516
+ "lag_notes": "Frameworks reference 'data classification' abstractly. EU AI Act Art 15 cites robustness requirements but does not name DLP classifiers as a control category. NIST AI-RMF MEASURE 2.7 covers performance / robustness of AI systems used in controls — applies recursively when the DLP classifier itself is AI.",
517
+ "evidence_examples": [
518
+ "Trainable-classifier accuracy report (P/R/F1) on labeled validation set",
519
+ "Adversarial-paraphrase robustness test results",
520
+ "False-positive rate by content category"
521
+ ],
522
+ "privacy_regimes": [
523
+ "GDPR (where classifier outputs are decisional)",
524
+ "EU AI Act (where DLP classifier qualifies as high-risk AI system used in workplace monitoring — Annex III consideration)"
525
+ ],
526
+ "last_verified": "2026-05-11"
527
+ },
528
+ "DLP-CLASS-EMBEDDING-MATCH": {
529
+ "id": "DLP-CLASS-EMBEDDING-MATCH",
530
+ "name": "Embedding-similarity match to protected corpus",
531
+ "category": "classification",
532
+ "modern_or_legacy": "modern",
533
+ "description": "Compute embedding of candidate egress content, compare to embedding index of protected corpus (source repos, sensitive documents, customer data). High similarity triggers DLP action.",
534
+ "covers": [
535
+ "paraphrased restatements of protected documents",
536
+ "code fragments that semantically match private repos",
537
+ "leakage of training-data-equivalent content"
538
+ ],
539
+ "does_not_cover": [
540
+ "content with no representative in the protected corpus",
541
+ "adversarially perturbed embeddings (rare in practice for short text)"
542
+ ],
543
+ "attack_techniques_addressed": [
544
+ "AML.T0024",
545
+ "AML.T0048",
546
+ "AML.T0057"
547
+ ],
548
+ "framework_controls_partially_mapping": [
549
+ "NIST-800-53-SI-12",
550
+ "NIST-AI-RMF-MEASURE-2.10",
551
+ "ISO-27001-2022-A.8.12",
552
+ "ISO-42001-A.7.4"
553
+ ],
554
+ "vendor_categories": [
555
+ "AI-aware DLP with embedding match (Nightfall AI, Microsoft Purview AI Hub semantic match preview, Forcepoint AI Security)",
556
+ "Custom (pgvector / Qdrant / Pinecone over protected corpus + cosine threshold)"
557
+ ],
558
+ "ai_pipeline_applicability": "Primary control point for trade-secret and code leakage. The only classifier that can catch 'they explained our private algorithm in their own words.'",
559
+ "lag_notes": "No framework names embedding-similarity as a control. Compliance auditors will not ask for it; security teams should require it for high-IP-value organizations.",
560
+ "evidence_examples": [
561
+ "Embedding index manifest with corpus inventory",
562
+ "Threshold tuning report (cosine similarity vs. FP/FN rate)",
563
+ "Per-event similarity score in DLP incident log"
564
+ ],
565
+ "privacy_regimes": [
566
+ "trade-secret regimes (US DTSA, EU 2016/943)",
567
+ "GDPR (where corpus contains personal data and embeddings are themselves processing)"
568
+ ],
569
+ "last_verified": "2026-05-11"
570
+ },
571
+ "DLP-CLASS-WATERMARK": {
572
+ "id": "DLP-CLASS-WATERMARK",
573
+ "name": "Output watermarking and provenance signals",
574
+ "category": "classification",
575
+ "modern_or_legacy": "modern",
576
+ "description": "Embed detectable provenance markers (steganographic watermarks, statistical watermarks for LLM outputs, C2PA content credentials) so that subsequent appearance of protected content can be traced.",
577
+ "covers": [
578
+ "AI-generated text watermarking (statistical token-distribution marks)",
579
+ "image / video provenance (C2PA, Content Credentials)",
580
+ "document watermarks (visible + invisible)"
581
+ ],
582
+ "does_not_cover": [
583
+ "paraphrased text that breaks statistical watermarks",
584
+ "screenshotted content that bypasses document watermarks",
585
+ "re-encoded media without provenance preservation"
586
+ ],
587
+ "attack_techniques_addressed": [
588
+ "AML.T0024",
589
+ "T1567"
590
+ ],
591
+ "framework_controls_partially_mapping": [
592
+ "EU-AI-Act-Art50",
593
+ "NIST-AI-RMF-MEASURE-3.1",
594
+ "ISO-42001-A.6.2.5"
595
+ ],
596
+ "vendor_categories": [
597
+ "LLM watermarking research / vendor APIs (Google SynthID for text/image, OpenAI watermarking work)",
598
+ "C2PA content credentials (Adobe, Microsoft, Sony, Leica camera firmware)",
599
+ "Document watermarking (Adobe Acrobat, Microsoft Information Protection labels with visible marks)"
600
+ ],
601
+ "ai_pipeline_applicability": "Partial. Provides post-hoc forensic value when content reappears. Does not prevent egress in real time. EU AI Act Art 50 makes AI-output marking a legal obligation for general-purpose AI systems serving EU users from 2026.",
602
+ "lag_notes": "EU AI Act Art 50 is ahead of NIST and ISO; US frameworks have no equivalent prescriptive watermarking requirement. Statistical text watermarks remain a research-grade control with documented bypass methods.",
603
+ "evidence_examples": [
604
+ "SynthID watermark detection report on suspicious content",
605
+ "C2PA manifest verification result",
606
+ "Document-watermark match in forensic review"
607
+ ],
608
+ "privacy_regimes": [
609
+ "EU AI Act Art 50 (transparency)",
610
+ "EU DSA (where applicable)"
611
+ ],
612
+ "last_verified": "2026-05-11"
613
+ },
614
+ "DLP-SURFACE-RAG-CORPUS": {
615
+ "id": "DLP-SURFACE-RAG-CORPUS",
616
+ "name": "RAG corpus as protected surface",
617
+ "category": "surface",
618
+ "modern_or_legacy": "modern",
619
+ "description": "Treat the document corpus indexed for RAG as a protected data surface. Includes ingest-time classification, label propagation, retrieval-time access control, and per-document retention.",
620
+ "covers": [
621
+ "sensitivity labeling of corpus documents",
622
+ "retrieval-time access-control evaluation (user clearance vs. document label)",
623
+ "ingest exclusions for documents that should never enter the index",
624
+ "right-to-erasure propagation to corpus and to derived embeddings"
625
+ ],
626
+ "does_not_cover": [
627
+ "the embedding store as a separate surface (DLP-SURFACE-EMBEDDING-STORE)",
628
+ "model training data (DLP-SURFACE-TRAINING-DATA)"
629
+ ],
630
+ "attack_techniques_addressed": [
631
+ "AML.T0024",
632
+ "AML.T0048",
633
+ "AML.T0057"
634
+ ],
635
+ "framework_controls_partially_mapping": [
636
+ "NIST-800-53-AC-3",
637
+ "NIST-800-53-AC-4",
638
+ "NIST-800-53-SC-28",
639
+ "NIST-AI-RMF-MEASURE-2.10",
640
+ "ISO-27001-2022-A.5.34",
641
+ "ISO-42001-A.7.4",
642
+ "GDPR-Art5",
643
+ "GDPR-Art17",
644
+ "EU-AI-Act-Art10"
645
+ ],
646
+ "vendor_categories": [
647
+ "RAG platforms with label-aware retrieval (Microsoft Copilot for M365 with Purview label honoring, Glean with permission mirroring, Vertex AI Search with IAM propagation)",
648
+ "Document classification engines feeding RAG (Microsoft Purview Information Protection, BigID, OneTrust DSAR)"
649
+ ],
650
+ "ai_pipeline_applicability": "Primary control point. RAG without label-aware retrieval is the confused-deputy default state — under-cleared users get over-cleared retrievals.",
651
+ "lag_notes": "GDPR Art 17 (right to erasure) does not anticipate that erasure must propagate to a vector store derived from the source document. Most RAG implementations cannot answer 'have you forgotten this person?' Until major frameworks codify retrieval-time access control, the operational requirement is documented but compliance-invisible.",
652
+ "evidence_examples": [
653
+ "Permission-mirroring configuration in RAG platform",
654
+ "Retrieval audit log with user clearance + document label",
655
+ "Erasure-propagation procedure with vector-store re-index step"
656
+ ],
657
+ "privacy_regimes": [
658
+ "GDPR (Art 5, Art 17, Art 32)",
659
+ "CCPA/CPRA (deletion)",
660
+ "LGPD",
661
+ "HIPAA"
662
+ ],
663
+ "last_verified": "2026-05-11"
664
+ },
665
+ "DLP-SURFACE-EMBEDDING-STORE": {
666
+ "id": "DLP-SURFACE-EMBEDDING-STORE",
667
+ "name": "Vector / embedding store as protected surface",
668
+ "category": "surface",
669
+ "modern_or_legacy": "modern",
670
+ "description": "Treat the vector database / embedding store as an independent protected data surface. Embeddings can be inverted to recover content (embedding-inversion attacks); the store must be access-controlled, encrypted, and audited.",
671
+ "covers": [
672
+ "IAM and network controls on vector DB (Pinecone, Qdrant, Weaviate, pgvector, OpenSearch k-NN, Milvus)",
673
+ "encryption at rest of embedding vectors",
674
+ "audit logging of vector queries and dumps",
675
+ "embedding-inversion resistance considerations"
676
+ ],
677
+ "does_not_cover": [
678
+ "the underlying corpus (DLP-SURFACE-RAG-CORPUS)",
679
+ "the model weights themselves"
680
+ ],
681
+ "attack_techniques_addressed": [
682
+ "AML.T0044",
683
+ "AML.T0048",
684
+ "T1530"
685
+ ],
686
+ "framework_controls_partially_mapping": [
687
+ "NIST-800-53-AC-3",
688
+ "NIST-800-53-SC-28",
689
+ "NIST-800-53-AU-2",
690
+ "ISO-27001-2022-A.8.20",
691
+ "ISO-27001-2022-A.8.24",
692
+ "GDPR-Art32"
693
+ ],
694
+ "vendor_categories": [
695
+ "Managed vector DB with enterprise controls (Pinecone enterprise, Qdrant Cloud, Weaviate Cloud, Azure AI Search, Vertex AI Vector Search)",
696
+ "Self-hosted with standard data-plane controls (pgvector inside Postgres with row-level security)"
697
+ ],
698
+ "ai_pipeline_applicability": "Primary control point. The embedding store is a derived dataset that inherits the sensitivity of its source corpus but is rarely scoped into DLP inventories.",
699
+ "lag_notes": "GDPR processors-and-controllers framing does not contemplate embeddings as a separate processing artifact. NIST 800-53 SC-28 (protection at rest) applies but is not specific to vector representations. Embedding-inversion as an attack class is not named in any current framework.",
700
+ "evidence_examples": [
701
+ "Vector-DB IAM policy snapshot",
702
+ "Encryption-at-rest configuration on vector store",
703
+ "Audit log of administrative dumps or large query exfil"
704
+ ],
705
+ "privacy_regimes": [
706
+ "GDPR (Art 32 security of processing; embedding-inversion considerations for personal data)",
707
+ "CCPA/CPRA",
708
+ "LGPD"
709
+ ],
710
+ "last_verified": "2026-05-11"
711
+ },
712
+ "DLP-SURFACE-TRAINING-DATA": {
713
+ "id": "DLP-SURFACE-TRAINING-DATA",
714
+ "name": "Model training data as protected surface",
715
+ "category": "surface",
716
+ "modern_or_legacy": "modern",
717
+ "description": "Treat data used for model fine-tuning, distillation, or pre-training as a protected surface, with governance over lineage, consent, sensitivity, and post-training extraction risk.",
718
+ "covers": [
719
+ "training-set lineage and consent records",
720
+ "membership-inference and training-data-extraction risk assessment",
721
+ "differential-privacy or de-identification of training data",
722
+ "opt-out propagation from upstream sources"
723
+ ],
724
+ "does_not_cover": [
725
+ "the corpus used at retrieval time (DLP-SURFACE-RAG-CORPUS)",
726
+ "the live prompt traffic (DLP-CHAN-LLM-PROMPT)"
727
+ ],
728
+ "attack_techniques_addressed": [
729
+ "AML.T0024",
730
+ "AML.T0044",
731
+ "AML.T0048"
732
+ ],
733
+ "framework_controls_partially_mapping": [
734
+ "NIST-AI-RMF-MAP-4.1",
735
+ "NIST-AI-RMF-MEASURE-2.10",
736
+ "ISO-42001-A.7.4",
737
+ "EU-AI-Act-Art10",
738
+ "GDPR-Art5",
739
+ "GDPR-Art6"
740
+ ],
741
+ "vendor_categories": [
742
+ "Data governance platforms (Collibra, BigID, OneTrust DataDiscovery)",
743
+ "ML lineage / observability (Weights & Biases, MLflow, Comet)",
744
+ "Differential-privacy training tooling (Google DP libraries, Opacus for PyTorch)"
745
+ ],
746
+ "ai_pipeline_applicability": "Primary control point for any organization fine-tuning or distilling models. Once data enters training, removal is not free — the model is the artifact.",
747
+ "lag_notes": "EU AI Act Art 10 (data and data governance) is the most prescriptive existing framework; ISO/IEC 42001:2023 A.7.4 references training data quality but is not prescriptive on extraction-risk testing. NIST AI-RMF treats this under MAP-4 and MEASURE-2.10. GDPR right-to-erasure interacts uncomfortably with trained model weights.",
748
+ "evidence_examples": [
749
+ "Training-set lineage manifest",
750
+ "Membership-inference attack test report",
751
+ "Differential-privacy epsilon budget configuration"
752
+ ],
753
+ "privacy_regimes": [
754
+ "GDPR (Art 5, Art 6, Art 17)",
755
+ "EU AI Act Art 10",
756
+ "CCPA/CPRA",
757
+ "LGPD"
758
+ ],
759
+ "last_verified": "2026-05-11"
760
+ },
761
+ "DLP-ENFORCE-BLOCK": {
762
+ "id": "DLP-ENFORCE-BLOCK",
763
+ "name": "Hard block enforcement",
764
+ "category": "enforcement",
765
+ "modern_or_legacy": "legacy",
766
+ "description": "Terminate or refuse the egress action when classification triggers. The strongest enforcement mode; produces the highest disruption and forces the highest classifier quality.",
767
+ "covers": [
768
+ "block on confirmed PCI, PHI, secrets, source-code patterns",
769
+ "block on embedding-similarity to protected corpus above threshold",
770
+ "block on regulated cross-border transfer"
771
+ ],
772
+ "does_not_cover": [
773
+ "false-positive cases — block converts FP into business-impact incident",
774
+ "asynchronous channels where 'block' is meaningless (after-the-fact analytics)"
775
+ ],
776
+ "attack_techniques_addressed": [
777
+ "T1048",
778
+ "T1567",
779
+ "AML.T0024"
780
+ ],
781
+ "framework_controls_partially_mapping": [
782
+ "NIST-800-53-AC-4",
783
+ "PCI-DSS-4.0-3.4",
784
+ "HIPAA-164.312(e)",
785
+ "ISO-27001-2022-A.8.12"
786
+ ],
787
+ "vendor_categories": [
788
+ "Every commercial DLP enforcement mode (Microsoft Purview, Symantec, Forcepoint, Trellix, Proofpoint, Netskope, Zscaler, Nightfall)"
789
+ ],
790
+ "ai_pipeline_applicability": "Applicable to channels with inline control (LLM gateway, managed browser, SDK proxy). Not applicable where DLP is observational only.",
791
+ "lag_notes": "Block enforcement is mandated implicitly by prescriptive privacy regimes (PCI-DSS, HIPAA). For AI-pipeline channels, inline blocking requires the org to operate through a gateway or proxy — not architecturally guaranteed.",
792
+ "evidence_examples": [
793
+ "DLP policy action='block' configuration",
794
+ "Block-event incident records with user, content class, channel"
795
+ ],
796
+ "privacy_regimes": [
797
+ "GDPR Art 32 (technical measures)",
798
+ "PCI-DSS",
799
+ "HIPAA",
800
+ "CCPA/CPRA",
801
+ "LGPD"
802
+ ],
803
+ "last_verified": "2026-05-11"
804
+ },
805
+ "DLP-ENFORCE-REDACT": {
806
+ "id": "DLP-ENFORCE-REDACT",
807
+ "name": "Inline redaction enforcement",
808
+ "category": "enforcement",
809
+ "modern_or_legacy": "modern",
810
+ "description": "Modify the egress payload in-flight to remove or pseudonymise classified content (replace SSNs with tokens, strip secret tokens, mask names). Lower disruption than block; preserves business workflow.",
811
+ "covers": [
812
+ "regex-defined PII tokens replaced with format-preserving placeholders",
813
+ "secrets stripped from prompts",
814
+ "named-entity redaction in LLM-bound text"
815
+ ],
816
+ "does_not_cover": [
817
+ "unstructured proprietary content that cannot be cleanly redacted",
818
+ "redaction-reversibility — once a name is replaced the LLM cannot reason about the entity correctly",
819
+ "false-positives over-redact and degrade output quality"
820
+ ],
821
+ "attack_techniques_addressed": [
822
+ "AML.T0024",
823
+ "T1567"
824
+ ],
825
+ "framework_controls_partially_mapping": [
826
+ "NIST-800-53-AC-4",
827
+ "NIST-800-53-SI-12",
828
+ "ISO-27001-2022-A.8.11",
829
+ "GDPR-Art5",
830
+ "GDPR-Art32"
831
+ ],
832
+ "vendor_categories": [
833
+ "LLM-aware redaction (Microsoft Presidio + custom, Nightfall AI redaction mode, Forcepoint AI Security redact action, Cloudflare AI Gateway with regex rewrite)",
834
+ "Pseudonymisation tooling at LLM gateway (Portkey transforms)"
835
+ ],
836
+ "ai_pipeline_applicability": "Primary modern enforcement mode for LLM prompts. Preserves usability while honoring minimisation (GDPR Art 5). Critical for cross-border AI-pipeline use where raw PII transfer is restricted.",
837
+ "lag_notes": "GDPR Art 5 minimisation arguably requires redaction-or-equivalent on any prompt containing personal data leaving the controller — but no enforcement decision has yet tested this for LLM use.",
838
+ "evidence_examples": [
839
+ "Redaction-policy configuration with token format",
840
+ "Pre/post redaction sample diff in audit log",
841
+ "Round-trip quality test confirming LLM output usefulness after redaction"
842
+ ],
843
+ "privacy_regimes": [
844
+ "GDPR (Art 5 minimisation, Art 32)",
845
+ "CCPA/CPRA (de-identification standard)",
846
+ "LGPD",
847
+ "HIPAA (Safe Harbor de-identification)"
848
+ ],
849
+ "last_verified": "2026-05-11"
850
+ },
851
+ "DLP-ENFORCE-COACH": {
852
+ "id": "DLP-ENFORCE-COACH",
853
+ "name": "User-coaching / justification enforcement",
854
+ "category": "enforcement",
855
+ "modern_or_legacy": "modern",
856
+ "description": "On classification trigger, present a coaching prompt to the user explaining the policy and either allowing with logged justification or routing to a higher-friction approval. Trades enforcement strength for false-positive tolerance and culture-building.",
857
+ "covers": [
858
+ "paste into AI tool with confirmation prompt",
859
+ "outbound mail with 'are you sure?' coaching",
860
+ "approval-with-justification workflow for unusual but legitimate cases"
861
+ ],
862
+ "does_not_cover": [
863
+ "malicious insider (will simply provide false justification)",
864
+ "channels with no UI to coach into (programmatic SDK use)"
865
+ ],
866
+ "attack_techniques_addressed": [
867
+ "AML.T0024",
868
+ "T1567"
869
+ ],
870
+ "framework_controls_partially_mapping": [
871
+ "NIST-800-53-AT-2",
872
+ "NIST-800-53-AC-4",
873
+ "ISO-27001-2022-A.6.3",
874
+ "ISO-27001-2022-A.8.12"
875
+ ],
876
+ "vendor_categories": [
877
+ "Microsoft Purview adaptive policies with coaching",
878
+ "Forcepoint Risk-Adaptive Protection with user-coaching",
879
+ "Managed-browser coaching (Island, Talon)",
880
+ "Polymer DLP coaching prompts"
881
+ ],
882
+ "ai_pipeline_applicability": "Primary control point for non-malicious LLM misuse. Coaching scales without per-event review and provides the audit trail that maturing AI-use policies need.",
883
+ "lag_notes": "Frameworks treat user training (NIST AT-2, ISO A.6.3) as periodic; just-in-time coaching is a stronger control unrecognised in current standards.",
884
+ "evidence_examples": [
885
+ "Coaching-event log with user, content class, justification text",
886
+ "Conversion rate (coached -> proceed vs. coached -> abort) as a culture signal"
887
+ ],
888
+ "privacy_regimes": [
889
+ "GDPR (lawful basis documentation through justification)",
890
+ "workplace-monitoring regimes (works-council notification in EU, employer notice in CA/IL)"
891
+ ],
892
+ "last_verified": "2026-05-11"
893
+ },
894
+ "DLP-EVIDENCE-AUDIT": {
895
+ "id": "DLP-EVIDENCE-AUDIT",
896
+ "name": "DLP audit trail for compliance evidence",
897
+ "category": "evidence",
898
+ "modern_or_legacy": "legacy",
899
+ "description": "Tamper-evident logging of DLP events (policy hits, blocks, allows, coaching responses) suitable for compliance attestation and post-incident review. Required by every prescriptive privacy framework.",
900
+ "covers": [
901
+ "per-event logs (user, time, channel, classifier, action)",
902
+ "retention aligned to regulatory requirement",
903
+ "log integrity (hash-chained or signed)"
904
+ ],
905
+ "does_not_cover": [
906
+ "forensic-grade payload capture (DLP-EVIDENCE-FORENSICS)",
907
+ "real-time alerting (operational, not evidence)"
908
+ ],
909
+ "attack_techniques_addressed": [
910
+ "T1070",
911
+ "T1562"
912
+ ],
913
+ "framework_controls_partially_mapping": [
914
+ "NIST-800-53-AU-2",
915
+ "NIST-800-53-AU-6",
916
+ "ISO-27001-2022-A.8.15",
917
+ "ISO-27001-2022-A.8.16",
918
+ "PCI-DSS-4.0-10",
919
+ "HIPAA-164.312(b)",
920
+ "GDPR-Art30"
921
+ ],
922
+ "vendor_categories": [
923
+ "Every DLP vendor (Microsoft Purview, Symantec, Forcepoint, Trellix, Proofpoint, Nightfall) with SIEM export",
924
+ "SIEM (Splunk, Sentinel, Elastic, Chronicle) ingesting DLP feeds"
925
+ ],
926
+ "ai_pipeline_applicability": "Primary control point for AI use evidence. Without DLP audit on prompt channels, EU AI Act Art 12 (record-keeping) is non-attestable for any high-risk AI use case.",
927
+ "lag_notes": "PCI-DSS Requirement 10 and HIPAA 164.312(b) presume audit logs on classical channels. EU AI Act Art 12 extends record-keeping to AI systems but is not specific about DLP integration.",
928
+ "evidence_examples": [
929
+ "DLP-to-SIEM data pipeline with retention policy",
930
+ "Log-integrity verification (hash chain) result",
931
+ "Sample event extract for an attestation"
932
+ ],
933
+ "privacy_regimes": [
934
+ "GDPR (Art 30 records of processing)",
935
+ "EU AI Act Art 12 (record-keeping)",
936
+ "HIPAA",
937
+ "PCI-DSS",
938
+ "CCPA/CPRA",
939
+ "LGPD"
940
+ ],
941
+ "last_verified": "2026-05-11"
942
+ },
943
+ "DLP-EVIDENCE-FORENSICS": {
944
+ "id": "DLP-EVIDENCE-FORENSICS",
945
+ "name": "Per-event forensic capture",
946
+ "category": "evidence",
947
+ "modern_or_legacy": "modern",
948
+ "description": "On DLP trigger, capture the full payload (with privacy-aware handling) for forensic reconstruction. Distinct from audit logs which carry metadata; forensics carries content.",
949
+ "covers": [
950
+ "full prompt + tool-arg payload on AI-channel triggers",
951
+ "full mail / file payload on classical-channel triggers",
952
+ "chain-of-custody handling for evidentiary use"
953
+ ],
954
+ "does_not_cover": [
955
+ "routine audit (DLP-EVIDENCE-AUDIT — metadata only)",
956
+ "indefinite retention — forensics retention should be incident-scoped, not bulk"
957
+ ],
958
+ "attack_techniques_addressed": [
959
+ "T1567",
960
+ "T1213",
961
+ "AML.T0024"
962
+ ],
963
+ "framework_controls_partially_mapping": [
964
+ "NIST-800-53-IR-4",
965
+ "NIST-800-53-AU-10",
966
+ "ISO-27001-2022-A.5.27",
967
+ "ISO-27001-2022-A.5.28",
968
+ "PCI-DSS-4.0-12.10"
969
+ ],
970
+ "vendor_categories": [
971
+ "DLP forensic capture modes (Symantec DLP, Forcepoint DLP, Microsoft Purview Activity Explorer with content)",
972
+ "DFIR platforms (Magnet AXIOM, Cellebrite Enterprise) for follow-on analysis"
973
+ ],
974
+ "ai_pipeline_applicability": "Primary control point for incident response on AI channels. Without prompt-payload capture, post-incident reconstruction of an AI-mediated data leak is forensically blind.",
975
+ "lag_notes": "Privacy-by-design constraints (GDPR Art 25) create tension with forensic capture; the resolution is incident-scoped retention with strict access. No framework prescribes capture rules for AI channels specifically.",
976
+ "evidence_examples": [
977
+ "Forensic-capture policy with retention and access-control rules",
978
+ "Chain-of-custody record for a recent incident",
979
+ "DPIA covering forensic-capture processing"
980
+ ],
981
+ "privacy_regimes": [
982
+ "GDPR (Art 25 PbD; Art 32 security; balanced against Art 6(1)(f) legitimate interest)",
983
+ "works-council and labor-law restrictions in EU member states (DE, FR, NL)",
984
+ "CCPA/CPRA"
985
+ ],
986
+ "last_verified": "2026-05-11"
987
+ },
988
+ "DLP-LAG-LEGACY-SCOPE": {
989
+ "id": "DLP-LAG-LEGACY-SCOPE",
990
+ "name": "Legacy DLP scope gap (meta-control)",
991
+ "category": "framework_lag",
992
+ "modern_or_legacy": "modern",
993
+ "description": "Meta-entry: explicitly names the gap between the channel set legacy frameworks cite (email / web / USB) and the operational channel set of 2026 (LLM prompt, LLM context, MCP tool-arg, clipboard-to-AI, code-completion context, IDE telemetry). Used by dlp-gap-analysis to flag framework-as-truth drift.",
994
+ "covers": [
995
+ "surfacing the gap during framework-gap-analysis",
996
+ "rejecting compliance attestations that scope DLP to legacy channels only",
997
+ "ratcheting policy scope when new AI channels are deployed"
998
+ ],
999
+ "does_not_cover": [
1000
+ "any specific technical control — this entry is purely a gap declaration"
1001
+ ],
1002
+ "attack_techniques_addressed": [
1003
+ "AML.T0024",
1004
+ "AML.T0048",
1005
+ "AML.T0051",
1006
+ "AML.T0057"
1007
+ ],
1008
+ "framework_controls_partially_mapping": [
1009
+ "NIST-800-53-AC-4",
1010
+ "NIST-800-53-SC-7",
1011
+ "ISO-27001-2022-A.8.12",
1012
+ "PCI-DSS-4.0-3.4",
1013
+ "HIPAA-164.312(e)",
1014
+ "NIS2-Art21",
1015
+ "EU-AI-Act-Art15",
1016
+ "ISO-42001-A.7.4"
1017
+ ],
1018
+ "vendor_categories": [
1019
+ "Not applicable — this is a meta-control, not a product category"
1020
+ ],
1021
+ "ai_pipeline_applicability": "Primary control point conceptually. The whole purpose of this entry is to insist that AI-pipeline channels are in scope when frameworks were written assuming they were not.",
1022
+ "lag_notes": "This entry exists because every prescriptive privacy framework (NIST 800-53, ISO 27001:2022, PCI-DSS 4.0, HIPAA, GDPR Art 32) cites generic 'data leakage prevention' or specific legacy channels. None enumerates the AI channel set as of mid-2026. Treating any of these frameworks as exhaustive scope is DR-1 framework-as-truth drift.",
1023
+ "evidence_examples": [
1024
+ "dlp-gap-analysis skill output flagging legacy-only scope",
1025
+ "Policy ratchet record adding LLM prompt channel to in-scope DLP"
1026
+ ],
1027
+ "privacy_regimes": [
1028
+ "GDPR",
1029
+ "EU AI Act",
1030
+ "CCPA/CPRA",
1031
+ "LGPD",
1032
+ "PIPEDA",
1033
+ "POPIA",
1034
+ "HIPAA",
1035
+ "PCI-DSS"
1036
+ ],
1037
+ "last_verified": "2026-05-11"
1038
+ }
1039
+ }