npm - genai-security-crosswalk - Versions diffs - 2.0.0 - Mend

genai-security-crosswalk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/LICENSE.md +28 -0
package/README.md +618 -0
package/data/entries/ASI01.json +911 -0
package/data/entries/ASI02.json +850 -0
package/data/entries/ASI03.json +854 -0
package/data/entries/ASI04.json +759 -0
package/data/entries/ASI05.json +764 -0
package/data/entries/ASI06.json +817 -0
package/data/entries/ASI07.json +789 -0
package/data/entries/ASI08.json +788 -0
package/data/entries/ASI09.json +754 -0
package/data/entries/ASI10.json +833 -0
package/data/entries/DSGAI01.json +779 -0
package/data/entries/DSGAI02.json +728 -0
package/data/entries/DSGAI03.json +671 -0
package/data/entries/DSGAI04.json +752 -0
package/data/entries/DSGAI05.json +689 -0
package/data/entries/DSGAI06.json +673 -0
package/data/entries/DSGAI07.json +680 -0
package/data/entries/DSGAI08.json +698 -0
package/data/entries/DSGAI09.json +687 -0
package/data/entries/DSGAI10.json +627 -0
package/data/entries/DSGAI11.json +663 -0
package/data/entries/DSGAI12.json +695 -0
package/data/entries/DSGAI13.json +688 -0
package/data/entries/DSGAI14.json +703 -0
package/data/entries/DSGAI15.json +655 -0
package/data/entries/DSGAI16.json +716 -0
package/data/entries/DSGAI17.json +690 -0
package/data/entries/DSGAI18.json +613 -0
package/data/entries/DSGAI19.json +638 -0
package/data/entries/DSGAI20.json +671 -0
package/data/entries/DSGAI21.json +881 -0
package/data/entries/LLM01.json +975 -0
package/data/entries/LLM02.json +868 -0
package/data/entries/LLM03.json +817 -0
package/data/entries/LLM04.json +797 -0
package/data/entries/LLM05.json +761 -0
package/data/entries/LLM06.json +848 -0
package/data/entries/LLM07.json +749 -0
package/data/entries/LLM08.json +750 -0
package/data/entries/LLM09.json +760 -0
package/data/entries/LLM10.json +763 -0
package/data/incidents-schema.json +121 -0
package/data/incidents.json +1484 -0
package/data/schema.json +134 -0
package/dist/index.d.ts +97 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +124 -0
package/dist/index.js.map +1 -0
package/dist/index.test.d.ts +2 -0
package/dist/index.test.d.ts.map +1 -0
package/dist/index.test.js +97 -0
package/dist/index.test.js.map +1 -0
package/package.json +62 -0

package/data/entries/DSGAI10.json ADDED Viewed

@@ -0,0 +1,627 @@
+{
+  "id": "DSGAI10",
+  "name": "Synthetic Data and Anonymization Pitfalls",
+  "source_list": "DSGAI-2026",
+  "version": "2026-Q1",
+  "severity": "Medium",
+  "aivss_score": null,
+  "audience": [
+    "red-teamer",
+    "security-engineer",
+    "ciso",
+    "compliance",
+    "ml-engineer",
+    "ot-engineer",
+    "auditor",
+    "developer",
+    "data-engineer"
+  ],
+  "mappings": [
+    {
+      "framework": "MITRE ATLAS",
+      "control_id": "AML.T0024.000",
+      "control_name": "Membership Inference",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Adversary probes synthetic dataset to determine whether specific individuals are re-identifiable"
+    },
+    {
+      "framework": "MITRE ATLAS",
+      "control_id": "AML.T0025",
+      "control_name": "Exfiltrate via Cyber Means",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Re-identified individuals from synthetic data extracted via standard exfiltration paths"
+    },
+    {
+      "framework": "MITRE ATLAS",
+      "control_id": "AML.T0035",
+      "control_name": "Exfiltrate via ML Inference API",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Synthetic training data membership confirmed and specific records reconstructed through inference API"
+    },
+    {
+      "framework": "NIST AI RMF 1.0",
+      "control_id": "GV-1.6",
+      "control_name": "Policies for data privacy",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Anonymisation governance policy — legal standard, not technical checkbox"
+    },
+    {
+      "framework": "NIST AI RMF 1.0",
+      "control_id": "MS-2.6",
+      "control_name": "Testing — data leakage",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Re-identification risk testing and membership inference testing on synthetic datasets"
+    },
+    {
+      "framework": "NIST AI RMF 1.0",
+      "control_id": "MS-3.3",
+      "control_name": "Data quality",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Data quality measurement of synthetic datasets — statistical fidelity vs privacy tradeoffs"
+    },
+    {
+      "framework": "NIST AI RMF 1.0",
+      "control_id": "MG-3.2",
+      "control_name": "Residual risk",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Residual re-identification risk documented and accepted formally before dataset release or use"
+    },
+    {
+      "framework": "EU AI Act",
+      "control_id": "Training data must be subject to appropriate data governance — privacy measures must be effective",
+      "control_name": "Art. 10 — Data and data governance",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Anonymisation effectiveness is an Art. 10 data governance requirement, not a self-certification"
+    },
+    {
+      "framework": "EU AI Act",
+      "control_id": "GPAI training data governance documented — privacy measures included",
+      "control_name": "Art. 53(1)(a) — GPAI documentation",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Synthetic data generation methodology and re-identification risk assessment documented for GPAI"
+    },
+    {
+      "framework": "ISO/IEC 27001:2022",
+      "control_id": "A.5.34",
+      "control_name": "Privacy and PII protection",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Anonymisation must meet the standard required by applicable privacy law — not just technical anonymisation"
+    },
+    {
+      "framework": "ISO/IEC 27001:2022",
+      "control_id": "A.8.11",
+      "control_name": "Data masking",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Technical anonymisation and pseudonymisation controls applied to synthetic data generation"
+    },
+    {
+      "framework": "ISO/IEC 27001:2022",
+      "control_id": "A.5.12",
+      "control_name": "Classification of information",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Synthetic datasets classified based on re-identification risk, not assumed to be non-personal"
+    },
+    {
+      "framework": "ISO/IEC 27001:2022",
+      "control_id": "A.8.33",
+      "control_name": "Test information",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Appropriate protection of test and synthetic data used in AI development"
+    },
+    {
+      "framework": "ISO/IEC 42001:2023",
+      "control_id": "Data — acquisition",
+      "control_name": "A.7.2",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Hardening"
+    },
+    {
+      "framework": "ISO/IEC 42001:2023",
+      "control_id": "Impact assessment",
+      "control_name": "A.5.2",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Hardening"
+    },
+    {
+      "framework": "ISO/IEC 42001:2023",
+      "control_id": "Data — preparation",
+      "control_name": "A.7.3",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Hardening"
+    },
+    {
+      "framework": "ISO/IEC 42001:2023",
+      "control_id": "Planning — risk",
+      "control_name": "Cl.6.1",
+      "tier": "Hardening",
+      "scope": "Build",
+      "notes": "Hardening"
+    },
+    {
+      "framework": "CIS Controls v8.1",
+      "control_id": "CIS 3",
+      "control_name": "3.7 — Establish data classification scheme",
+      "tier": "Hardening",
+      "scope": "Build"
+    },
+    {
+      "framework": "CIS Controls v8.1",
+      "control_id": "CIS 18",
+      "control_name": "18.3 — Remediate penetration test findings",
+      "tier": "Hardening",
+      "scope": "Build"
+    },
+    {
+      "framework": "OWASP ASVS 4.0.3",
+      "control_id": "V8 Data Protection",
+      "control_name": "V8.3.4 — Sensitive data classified",
+      "tier": "Hardening",
+      "scope": "Build"
+    },
+    {
+      "framework": "OWASP ASVS 4.0.3",
+      "control_id": "V5 Validation",
+      "control_name": "V5.2.6 — Defined output structure",
+      "tier": "Hardening",
+      "scope": "Build"
+    },
+    {
+      "framework": "ISA/IEC 62443",
+      "control_id": "SR 4.1",
+      "control_name": "Data confidentiality",
+      "tier": "Foundational",
+      "scope": "Both",
+      "notes": "Synthetic OT datasets not automatically excluded from protection — assessed before classification change"
+    },
+    {
+      "framework": "ISA/IEC 62443",
+      "control_id": "SR 3.3",
+      "control_name": "Software and information integrity",
+      "tier": "Foundational",
+      "scope": "Both",
+      "notes": "Synthetic data generation pipeline validated — source OT data integrity maintained"
+    },
+    {
+      "framework": "ISA/IEC 62443",
+      "control_id": "Security management",
+      "control_name": "62443-2-1",
+      "tier": "Foundational",
+      "scope": "Both",
+      "notes": "OT synthetic data governance policy — when synthetic data removes OT classification obligation"
+    },
+    {
+      "framework": "NIST SP 800-82 Rev 3",
+      "control_id": "ICS vulnerabilities",
+      "control_name": "§5.3",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "In-context manipulation of OT decision support"
+    },
+    {
+      "framework": "NIST SP 800-82 Rev 3",
+      "control_id": "Risk assessment",
+      "control_name": "§6.2",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Context poisoning risk for OT data feeds"
+    },
+    {
+      "framework": "NIST CSF 2.0",
+      "control_id": "GV.RM-06",
+      "control_name": "Risk Management Strategy",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Risk tolerance defined for re-identification risk in synthetic datasets — legal standard, not technical checkbox"
+    },
+    {
+      "framework": "NIST CSF 2.0",
+      "control_id": "ID.RA-01",
+      "control_name": "Risk Assessment",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Re-identification risk assessed for all synthetic datasets before use or distribution"
+    },
+    {
+      "framework": "NIST CSF 2.0",
+      "control_id": "PR.DS-01",
+      "control_name": "Data Security",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Synthetic datasets not automatically excluded from data protection — protected until re-identification risk formally assessed"
+    },
+    {
+      "framework": "NIST CSF 2.0",
+      "control_id": "DE.CM-09",
+      "control_name": "Continuous Monitoring",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Monitoring for re-identification attempts against synthetic datasets"
+    },
+    {
+      "framework": "SOC 2",
+      "control_id": "Synthetic datasets that are not truly anonymous subject to same retention obligations as source personal data",
+      "control_name": "P4.2 — Retention of personal information",
+      "tier": "Hardening",
+      "scope": "Both"
+    },
+    {
+      "framework": "SOC 2",
+      "control_id": "Synthetic data use must respect underlying privacy commitments if re-identification risk exists",
+      "control_name": "P5.1 — Personal information use",
+      "tier": "Hardening",
+      "scope": "Both"
+    },
+    {
+      "framework": "SOC 2",
+      "control_id": "Re-identification risk in synthetic datasets identified in risk assessment",
+      "control_name": "CC3.2 — Risk assessment",
+      "tier": "Hardening",
+      "scope": "Both"
+    },
+    {
+      "framework": "SOC 2",
+      "control_id": "Synthetic OT data and business data protected at source classification level until re-identification risk formally assessed",
+      "control_name": "C2.1 — Confidential information protection",
+      "tier": "Hardening",
+      "scope": "Both"
+    },
+    {
+      "framework": "PCI DSS v4.0",
+      "control_id": "Req 3.1.1",
+      "control_name": "Account data inventory",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Synthetic payment datasets that may be re-identifiable included in CHD inventory"
+    },
+    {
+      "framework": "PCI DSS v4.0",
+      "control_id": "Req 3.3.1",
+      "control_name": "SAD prohibition",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Synthetic data generated from SAD retains SAD classification — generation does not remove the prohibition"
+    },
+    {
+      "framework": "PCI DSS v4.0",
+      "control_id": "Req 3.4.1",
+      "control_name": "PAN rendering",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Synthetic PANs that pass Luhn check treated as CHD — format-preserving synthetic PANs are in PCI scope"
+    },
+    {
+      "framework": "PCI DSS v4.0",
+      "control_id": "Req 12.3.2",
+      "control_name": "Targeted risk analysis",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Re-identification risk in synthetic payment datasets documented in targeted risk analysis"
+    },
+    {
+      "framework": "ENISA Multilayer Framework",
+      "control_id": "L2",
+      "control_name": "Data and Model Security (DMS)",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Synthetic data generation validated against re-identification risk — formal anonymisation assessments documented as DMS evidence"
+    },
+    {
+      "framework": "ENISA Multilayer Framework",
+      "control_id": "L2",
+      "control_name": "Governance and Risk (GOV)",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Re-identification risk included in AI privacy impact assessment — treatment controls and residual risk accepted by data protection officer"
+    },
+    {
+      "framework": "ENISA Multilayer Framework",
+      "control_id": "L2",
+      "control_name": "Monitoring and Detection (MON)",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Quality and privacy metrics monitored for all synthetic datasets — drift in re-identification risk triggers re-assessment"
+    },
+    {
+      "framework": "ENISA Multilayer Framework",
+      "control_id": "L1",
+      "control_name": "General ICT — Data Protection",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Anonymisation requirements established at design time — not applied as an afterthought to an already-designed pipeline"
+    },
+    {
+      "framework": "OWASP SAMM v2.0",
+      "control_id": "D-TA",
+      "control_name": "Design / Threat Assessment",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Map all sources that contribute to context window; assess trust level per source"
+    },
+    {
+      "framework": "OWASP SAMM v2.0",
+      "control_id": "I-SB",
+      "control_name": "Implementation / Secure Build",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Validate and sanitise all content before context window assembly"
+    },
+    {
+      "framework": "OWASP SAMM v2.0",
+      "control_id": "V-ST",
+      "control_name": "Verification / Security Testing",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Adversarial tests injecting malicious content via each context source"
+    },
+    {
+      "framework": "OWASP SAMM v2.0",
+      "control_id": "O-IM",
+      "control_name": "Operations / Incident Management",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Alert on reasoning deviations correlated with external content in context"
+    },
+    {
+      "framework": "CWE/CVE",
+      "control_id": "CWE-359",
+      "control_name": "CWE-359",
+      "tier": "Hardening",
+      "scope": "Build",
+      "url": "https://cwe.mitre.org/data/definitions/359.html"
+    },
+    {
+      "framework": "CWE/CVE",
+      "control_id": "CWE-330",
+      "control_name": "CWE-330",
+      "tier": "Hardening",
+      "scope": "Build",
+      "url": "https://cwe.mitre.org/data/definitions/330.html"
+    },
+    {
+      "framework": "MAESTRO",
+      "control_id": "L2",
+      "control_name": "Data Operations",
+      "tier": "Hardening",
+      "scope": "Both"
+    },
+    {
+      "framework": "MAESTRO",
+      "control_id": "L1",
+      "control_name": "Foundation Models",
+      "tier": "Hardening",
+      "scope": "Both"
+    },
+    {
+      "framework": "AIUC-1",
+      "control_id": "B001",
+      "control_name": "Third-party adversarial robustness testing",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Foundational"
+    },
+    {
+      "framework": "AIUC-1",
+      "control_id": "B002",
+      "control_name": "Detect adversarial input",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Hardening"
+    },
+    {
+      "framework": "AIUC-1",
+      "control_id": "B005",
+      "control_name": "Implement real-time input filtering",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Foundational"
+    },
+    {
+      "framework": "OWASP NHI Top 10",
+      "control_id": "Data feed service accounts with access to sensitive data that should not enter context",
+      "control_name": "NHI-5 Over-Privileged NHI",
+      "tier": "Hardening",
+      "scope": "Both",
+      "notes": "Scope data feed credentials to approved data only"
+    },
+    {
+      "framework": "NIST SP 800-218A",
+      "control_id": "PW.7.2-PS",
+      "control_name": "Review for security vulnerabilities — synthetic data quality review",
+      "tier": "Foundational",
+      "scope": "Build",
+      "notes": "Review synthetic data for bias inheritance, privacy leakage, and statistical fidelity before use in training pipelines; verify privacy guarantees are meaningful"
+    },
+    {
+      "framework": "NIST SP 800-218A",
+      "control_id": "PW.8.2-PS",
+      "control_name": "Test for security vulnerabilities — synthetic data adversarial testing",
+      "tier": "Foundational",
+      "scope": "Build",
+      "notes": "Conduct adversarial testing of synthetic data for membership inference, attribute inference, and reconstruction attacks to validate privacy claims"
+    },
+    {
+      "framework": "NIST SP 800-218A",
+      "control_id": "RV.3.1-PS",
+      "control_name": "Analyse root causes — synthetic data failure analysis",
+      "tier": "Foundational",
+      "scope": "Build",
+      "notes": "When model failures trace to synthetic training data, conduct root cause analysis of the generation process, source data, and privacy mechanism"
+    },
+    {
+      "framework": "FedRAMP",
+      "control_id": "SI-4",
+      "control_name": "System Monitoring — synthetic data quality",
+      "tier": "Foundational",
+      "scope": "Build",
+      "notes": "Monitor synthetic data outputs for quality, privacy preservation, and absence of sensitive pattern leakage"
+    },
+    {
+      "framework": "FedRAMP",
+      "control_id": "CA-7",
+      "control_name": "Continuous Monitoring — synthetic data drift",
+      "tier": "Foundational",
+      "scope": "Build",
+      "notes": "Include synthetic data quality metrics in continuous monitoring; track for privacy degradation and bias drift"
+    },
+    {
+      "framework": "FedRAMP",
+      "control_id": "RA-5",
+      "control_name": "Vulnerability Scanning — synthetic data risks",
+      "tier": "Foundational",
+      "scope": "Build",
+      "notes": "Include synthetic data re-identification and pattern leakage in vulnerability assessment"
+    },
+    {
+      "framework": "DORA",
+      "control_id": "Art. 9",
+      "control_name": "Protection and Prevention — synthetic data privacy",
+      "tier": "Foundational",
+      "scope": "Build",
+      "notes": "Implement privacy controls for synthetic data generation; validate privacy preservation and absence of sensitive pattern leakage from source financial data"
+    },
+    {
+      "framework": "DORA",
+      "control_id": "Art. 24–27",
+      "control_name": "Resilience Testing — synthetic data testing",
+      "tier": "Foundational",
+      "scope": "Build",
+      "notes": "Include synthetic data re-identification and privacy testing in resilience testing programme; test for linkage attacks and attribute inference"
+    },
+    {
+      "framework": "DORA",
+      "control_id": "Art. 13",
+      "control_name": "Learning and Evolving — synthetic data improvement",
+      "tier": "Foundational",
+      "scope": "Build",
+      "notes": "Apply lessons learned from synthetic data privacy failures; update generation processes and validation controls"
+    }
+  ],
+  "tools": [
+    {
+      "name": "Synthetic Data Vault",
+      "type": "open-source",
+      "url": "https://sdv.dev"
+    },
+    {
+      "name": "ML Privacy Meter",
+      "type": "open-source",
+      "url": "https://github.com/privacytrustlab/ml_privacy_meter"
+    },
+    {
+      "name": "Gretel AI",
+      "type": "commercial",
+      "url": "https://gretel.ai"
+    },
+    {
+      "name": "ARX Anonymisation Tool",
+      "type": "open-source",
+      "url": "https://arx.deidentifier.org"
+    },
+    {
+      "name": "ARX Data Anonymization Tool",
+      "type": "open-source",
+      "url": "https://arx.deidentifier.org"
+    },
+    {
+      "name": "TensorFlow Privacy",
+      "type": "open-source",
+      "url": "https://github.com/tensorflow/privacy"
+    },
+    {
+      "name": "OpenDP",
+      "type": "open-source",
+      "url": "https://github.com/opendp/opendp"
+    },
+    {
+      "name": "SDV (Synthetic Data Vault)",
+      "type": "open-source",
+      "url": "https://github.com/sdv-dev/SDV"
+    },
+    {
+      "name": "Gretel.ai",
+      "type": "commercial",
+      "url": "https://gretel.ai"
+    },
+    {
+      "name": "ARX",
+      "type": "open-source",
+      "url": "https://github.com/arx-deidentifier/arx"
+    },
+    {
+      "name": "SDV",
+      "type": "open-source",
+      "url": "https://github.com/sdv-dev/SDV"
+    },
+    {
+      "name": "Anonymeter",
+      "type": "open-source",
+      "url": "https://github.com/statice/anonymeter"
+    },
+    {
+      "name": "ARX Data Anonymization",
+      "type": "open-source",
+      "url": "https://arx.deidentifier.org"
+    },
+    {
+      "name": "Microsoft Presidio",
+      "type": "open-source",
+      "url": "https://github.com/microsoft/presidio"
+    }
+  ],
+  "incidents": [
+    {
+      "name": "Synthetic data re-identification — de-anonymized patients from synthetic health records",
+      "url": "https://github.com/emmanuelgjr/GenAI-Security-Crosswalk/blob/main/data/incidents.json",
+      "year": 2025,
+      "incident_id": "INC-040"
+    },
+    {
+      "name": "Stability AI synthetic CSAM generation — training data and output safety failures",
+      "url": "https://github.com/emmanuelgjr/GenAI-Security-Crosswalk/blob/main/data/incidents.json",
+      "year": 2024,
+      "incident_id": "INC-049"
+    }
+  ],
+  "crossrefs": {
+    "dsgai_2026": [
+      "DSGAI08",
+      "DSGAI18"
+    ],
+    "llm_top10": [
+      "LLM02",
+      "LLM01",
+      "LLM08",
+      "LLM09",
+      "LLM03"
+    ],
+    "agentic_top10": [
+      "ASI03",
+      "ASI06",
+      "ASI09"
+    ]
+  },
+  "changelog": [
+    {
+      "date": "2026-03-27",
+      "version": "1.0.0",
+      "change": "Initial entry — generated from GenAI Security Crosswalk v1.5.1 mapping files",
+      "author": "emmanuelgjr"
+    }
+  ]
+}