npm - agent-threat-rules - Versions diffs - 2.0.15 → 2.0.16 - Mend

agent-threat-rules 2.0.15 → 2.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-threat-rules",
-  "version": "2.0.15",
+  "version": "2.0.16",
   "type": "module",
   "description": "Open detection standard -- like Sigma, but for AI agents. 311 rules for prompt injection, tool poisoning, context exfiltration, and MCP attacks. Shipped in Cisco AI Defense. 97.1% recall on NVIDIA garak.",
   "main": "./dist/index.js",

package/rules/agent-manipulation/ATR-2026-00032-goal-hijacking.yaml CHANGED Viewed

@@ -26,6 +26,30 @@ references:
   mitre_atlas:
     - AML.T0051 - Prompt Injection
     - AML.T0043 - Craft Adversarial Data
+compliance:
+  eu_ai_act:
+    - article: "14"
+      context: "Goal hijacking redirects the agent away from its authorized objectives without human awareness, directly undermining the human oversight mechanisms Article 14 requires for high-risk AI systems."
+      strength: primary
+    - article: "15"
+      context: "Article 15 robustness requirements include resistance against adversarial redirection of agent objectives; this rule detects explicit and subtle goal-pivot patterns at runtime."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "GV.1.2"
+      context: "Goal hijacking attacks subvert the accountability roles assigned to AI agents by redirecting them from authorized objectives; GV.1.2 requires that defined accountability roles and their task boundaries are enforced at runtime."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "Detecting goal hijacking patterns triggers the risk treatment plans required by MG.2.3 to contain adversarial task-redirection before the agent executes unauthorized actions."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "ISO 42001 clause 6.2 risk treatment plans must address task-redirection attacks; this rule implements the monitoring control that detects when an agent's objective is adversarially substituted."
+      strength: primary
+    - clause: "8.6"
+      context: "Clause 8.6 operational controls ensure AI systems operate within their authorized task boundaries; goal hijacking detection enforces that agents remain on their assigned objective despite redirection attempts."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: goal-hijacking

package/rules/agent-manipulation/ATR-2026-00074-cross-agent-privilege-escalation.yaml CHANGED Viewed

@@ -25,6 +25,30 @@ references:
   mitre_attack:
     - T1078 - Valid Accounts
     - T1550 - Use Alternate Authentication Material
+compliance:
+  eu_ai_act:
+    - article: "14"
+      context: "Cross-agent privilege escalation allows one agent to acquire capabilities from another agent's authorization scope without human approval, directly bypassing the role-based access controls that Article 14 human oversight requires."
+      strength: primary
+    - article: "9"
+      context: "Multi-agent privilege escalation via trust relationships is a documented attack pattern in the AI system risk register; Article 9 requires monitoring controls for this lateral movement risk."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "GV.1.2"
+      context: "Cross-agent privilege escalation exploits gaps in the accountability roles defined for each agent in a multi-agent system; GV.1.2 requires that agent role boundaries are defined and that lateral movement across those boundaries is detected."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "Credential forwarding and orchestrator bypass between agents require immediate risk treatment to prevent lateral movement from escalating to full system compromise; MG.2.3 mandates that these response plans are pre-defined and activated on detection."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "ISO 42001 clause 6.2 risk treatment activities must cover inter-agent privilege escalation as a multi-agent architecture risk; this rule implements the detection control for credential forwarding and unauthorized role assumption between agents."
+      strength: primary
+    - clause: "8.6"
+      context: "Clause 8.6 operational controls ensure each agent executes within its authorized capability set; cross-agent privilege escalation detection enforces that no agent can assume the permissions of a higher-privilege agent without explicit authorization."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: cross-agent-privilege-escalation

package/rules/agent-manipulation/ATR-2026-00076-inter-agent-message-spoofing.yaml CHANGED Viewed

@@ -27,6 +27,30 @@ references:
   mitre_atlas:
     - AML.T0051 - Prompt Injection
     - AML.T0043 - Craft Adversarial Data
+compliance:
+  eu_ai_act:
+    - article: "15"
+      context: "Article 15 cybersecurity requirements for high-risk AI include protecting agent communication channels against spoofing, replay, and message integrity attacks; this rule detects structural communication failures between agents."
+      strength: primary
+    - article: "14"
+      context: "Spoofed inter-agent messages can trigger unauthorized agent actions without any human-visible audit trail, undermining the traceability required by Article 14 human oversight provisions."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "MP.5.1"
+      context: "Spoofed, replayed, and unauthenticated inter-agent messages are adversarial inputs that exploit trust assumptions in multi-agent communication channels; MP.5.1 requires that these structural communication integrity attacks are identified and tracked."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "Detection of message spoofing, replay attacks, and null authentication in agent channels triggers risk treatment plans to quarantine compromised messages before they cause unauthorized actions downstream; MG.2.3 requires these treatment processes are defined."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "ISO 42001 clause 6.2 risk treatment plans must address inter-agent communication integrity failures including message spoofing and replay; this rule implements the detection control for structural authentication weaknesses in agent-to-agent channels."
+      strength: primary
+    - clause: "8.6"
+      context: "Clause 8.6 operational controls require that inter-agent communication operates with verified message authentication; detection of null tokens, forged IDs, and unauthenticated commands enforces the integrity requirements for correct agent pipeline execution."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: inter-agent-communication

package/rules/agent-manipulation/ATR-2026-00077-human-trust-exploitation.yaml CHANGED Viewed

@@ -33,6 +33,30 @@ references:
     - ASI09:2026 - Human-Agent Trust Exploitation
   mitre_atlas:
     - AML.T0048 - Adversarial Prompt Techniques
+compliance:
+  eu_ai_act:
+    - article: "13"
+      context: "Article 13 transparency obligations require that AI systems do not exploit informational asymmetries to manipulate human users; human-trust exploitation attacks weaponize the AI's perceived authority and credibility against its own users."
+      strength: primary
+    - article: "14"
+      context: "Trust exploitation techniques undermine the human's ability to critically evaluate agent outputs and decisions, eroding the informed oversight capability Article 14 requires to remain effective."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "GV.6.1"
+      context: "Human-agent trust exploitation weaponizes information asymmetry between the AI system and its operators; GV.6.1 data governance policies must ensure AI systems do not suppress or fabricate information to manipulate human decision-makers."
+      strength: primary
+    - subcategory: "MP.5.1"
+      context: "False certainty claims, manufactured urgency, and verification discouragement are adversarial input patterns targeting human operators rather than the AI system itself; MP.5.1 requires identifying and tracking these social-engineering attack vectors."
+      strength: secondary
+  iso_42001:
+    - clause: "8.4"
+      context: "ISO 42001 clause 8.4 requires documented AI system impact assessments; human-trust exploitation attacks that suppress warnings or manufacture urgency to bypass review represent a documented impact category requiring runtime detection controls."
+      strength: primary
+    - clause: "9.1"
+      context: "Clause 9.1 performance monitoring must evaluate whether AI systems support effective human oversight; detecting trust exploitation patterns provides the measurement signal that indicates the AI management system's oversight controls are being actively undermined."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: human-trust-exploitation

package/rules/agent-manipulation/ATR-2026-00108-consensus-sybil-attack.yaml CHANGED Viewed

@@ -24,6 +24,28 @@ references:
     - ASI01:2026 - Agent Goal Hijack
   mitre_atlas:
     - AML.T0043 - Craft Adversarial Data
+compliance:
+  eu_ai_act:
+    - article: "14"
+      context: "Consensus Sybil attacks allow compromised agents to manufacture false agreement in multi-agent decision systems, directly undermining the human oversight capability Article 14 requires for high-risk AI applications."
+      strength: primary
+    - article: "15"
+      context: "Article 15 robustness requirements mandate that high-risk AI systems resist adversarial manipulation; Sybil attacks on consensus mechanisms are a documented adversarial pattern requiring systematic detection."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "GV.1.2"
+      context: "Consensus Sybil attacks exploit undefined accountability for agent identity in multi-agent systems; GV.1.2 requires that accountability roles for AI risk management are defined and assigned to prevent Sybil-style identity fabrication."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "Sybil attacks represent a documented risk treatment gap in multi-agent consensus deployments; MG.2.3 requires that risk treatment plans are implemented for identified AI risks, including detection controls for fake-identity flooding."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "Multi-agent systems deploying consensus mechanisms must include Sybil attack detection as a planned risk treatment activity under the AI objectives framework required by clause 6.2."
+      strength: primary
+    - clause: "8.6"
+      context: "Clause 8.6 operational controls must ensure that consensus decisions are made by verified agent identities and that fabricated voter identities are detected before they influence multi-agent outcomes."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: consensus-sybil-attack

package/rules/agent-manipulation/ATR-2026-00116-a2a-message-validation.yaml CHANGED Viewed

@@ -20,6 +20,28 @@ references:
     - ASI07:2026 - Insecure Inter-Agent Communication
   mitre_attack:
     - T1557 - Adversary-in-the-Middle
+compliance:
+  eu_ai_act:
+    - article: "15"
+      context: "Malicious agent-to-agent message injection exploits unvalidated inter-agent channels to execute unauthorized actions; Article 15 cybersecurity requirements mandate detection and resistance against such adversarial input manipulation."
+      strength: primary
+    - article: "14"
+      context: "A2A message injection can cause agents to take actions outside their authorized scope without human awareness, eroding the effective oversight capability Article 14 requires for high-risk AI systems."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "MP.5.1"
+      context: "Malicious A2A message injection is a documented adversarial input risk targeting inter-agent communication channels; MP.5.1 requires that adversarial input risks to AI systems are identified and tracked to enable detection of embedded payload attacks."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "A2A message validation failures represent a risk requiring active treatment; MG.2.3 requires that risk treatment plans are implemented for identified AI risks, including validation controls on all inter-agent message channels."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "AI system plans under clause 6.2 must include risk treatment activities for inter-agent message injection, ensuring that A2A communication validation is a planned control rather than an afterthought."
+      strength: primary
+    - clause: "8.6"
+      context: "Clause 8.6 operational controls require that inter-agent messages are validated before execution, preventing injected instructions from executing in the receiving agent's security context."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: a2a-message-validation

package/rules/agent-manipulation/ATR-2026-00117-agent-identity-spoofing.yaml CHANGED Viewed

@@ -20,6 +20,28 @@ references:
     - ASI10:2026 - Rogue Agents
   mitre_attack:
     - T1036 - Masquerading
+compliance:
+  eu_ai_act:
+    - article: "13"
+      context: "Agent identity spoofing creates false impressions of authority and trustworthiness that users and other agents cannot distinguish from legitimate communications; Article 13 transparency obligations require that AI system identity and authority claims be verifiable."
+      strength: primary
+    - article: "15"
+      context: "Article 15 cybersecurity requirements include protection against masquerading attacks; identity spoofing in multi-agent systems represents a documented adversarial pattern targeting the authentication layer of agent architectures."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "GV.6.1"
+      context: "Agent identity spoofing exploits the absence of verified identity provenance in inter-agent data flows; GV.6.1 data governance policies must define how agent identity claims are authenticated to prevent masquerading attacks."
+      strength: primary
+    - subcategory: "MP.5.1"
+      context: "Impersonation of admin or supervisor agent roles is an adversarial input risk that must be tracked under MP.5.1 to ensure detection controls cover identity-claim-based privilege escalation patterns."
+      strength: secondary
+  iso_42001:
+    - clause: "8.4"
+      context: "Clause 8.4 AI system impact assessments must document the risk that unverified agent identity claims allow privilege escalation, and review controls that ensure identity spoofing is detectable before actions are taken."
+      strength: primary
+    - clause: "9.1"
+      context: "Clause 9.1 performance monitoring must include evaluation of whether agent identity verification controls are functioning correctly and catching masquerading attacks in operational deployments."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: agent-identity-spoofing

package/rules/agent-manipulation/ATR-2026-00119-social-engineering-via-agent.yaml CHANGED Viewed

@@ -19,6 +19,28 @@ references:
     - ASI09:2026 - Human Trust Exploitation
   mitre_attack:
     - T1566 - Phishing
+compliance:
+  eu_ai_act:
+    - article: "13"
+      context: "Social engineering delivered through agent output exploits the elevated trust users place in AI-generated content; Article 13 transparency obligations require that AI systems not be used as vectors for deception or manipulation against the users they serve."
+      strength: primary
+    - article: "14"
+      context: "Agents weaponized for social engineering undermine the informed human judgment that Article 14 oversight depends on; users manipulated through trusted AI channels cannot exercise effective oversight of subsequent decisions."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "GV.6.1"
+      context: "Social engineering delivered via agent output exploits user trust in AI-generated content to harvest credentials and personal data; GV.6.1 data governance policies must address how AI-generated communications are authenticated to prevent agent-mediated phishing."
+      strength: primary
+    - subcategory: "MP.5.1"
+      context: "Using agents as social engineering vectors is an adversarial input risk where poisoned context produces manipulative outputs; MP.5.1 requires this risk to be identified and tracked so that urgency and authority-impersonation patterns in agent output are monitored."
+      strength: secondary
+  iso_42001:
+    - clause: "8.4"
+      context: "Clause 8.4 impact assessments must document the elevated trust users place in AI-generated content and evaluate whether social engineering via agent output constitutes a significant harm requiring dedicated detection controls."
+      strength: primary
+    - clause: "9.1"
+      context: "Clause 9.1 performance monitoring must evaluate whether agent output monitoring detects social engineering patterns such as urgency language and credential-harvesting requests generated through poisoned agent context."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: social-engineering-via-agent

package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml CHANGED Viewed

@@ -26,6 +26,28 @@ references:
     - ASI09:2026 - Human-Agent Trust Exploitation
 metadata_provenance:
   mitre_atlas: auto-generated
+compliance:
+  eu_ai_act:
+    - article: "14"
+      context: "Casual authority claims using conversational tone bypass detection while achieving the same effect as explicit privilege escalation — unauthorized actions without human approval; Article 14 oversight requirements apply regardless of the linguistic register used to circumvent them."
+      strength: primary
+    - article: "9"
+      context: "Rationalized bypasses and casual scope escalation are documented attack patterns that Article 9 risk management systems must account for; detection of informal social engineering is a required risk treatment for multi-agent deployments."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "GV.1.2"
+      context: "Casual authority escalation exploits gaps in accountability role definitions, allowing informal authority claims like 'the boss said' to bypass controls without challenge; GV.1.2 requires that accountability roles are formally assigned so that unverifiable casual authority claims are detectable."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "Conversational-tone scope escalation is a documented risk requiring active treatment; MG.2.3 requires that risk treatment plans address informal social engineering techniques that evade formal injection detection in multi-agent deployments."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "Clause 6.2 AI objectives and plans must include risk treatment for casual authority claim patterns that bypass formal injection detection, as these represent a distinct attack surface from explicit privilege escalation."
+      strength: primary
+    - clause: "8.6"
+      context: "Clause 8.6 operational controls must ensure that agents require verified authorization before acting on scope expansions or control bypasses, regardless of whether the instruction is phrased formally or in casual conversational language."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: casual-authority

package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml CHANGED Viewed

@@ -21,6 +21,28 @@ references:
     - ASI06 - Rogue Actions
 metadata_provenance:
   mitre_atlas: auto-generated
+compliance:
+  eu_ai_act:
+    - article: "14"
+      context: "Casual authority data redirect attacks redirect agent output to attacker-controlled endpoints by spoofing organizational authority, bypassing human review of where sensitive data flows; Article 14 oversight requires that data redirection decisions remain under human control."
+      strength: primary
+    - article: "15"
+      context: "Article 15 cybersecurity requirements include protection against social engineering attacks targeting agent output pipelines; casual authority redirect is a documented adversarial technique exploiting agents as exfiltration proxies."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "GV.1.2"
+      context: "Casual authority redirect attacks succeed when no formal accountability role exists to verify output redirection decisions; GV.1.2 requires that accountability roles for AI risk management are defined so that informal directives to redirect agent output can be challenged and blocked."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "Data redirect via social engineering authority claims is a documented exfiltration risk requiring active treatment; MG.2.3 requires that risk treatment plans include detection controls for authority-impersonation-based output redirection attacks."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "Clause 6.2 AI objectives and plans must include controls for preventing agents from being redirected to attacker-controlled endpoints through casual authority claims that spoof organizational hierarchy."
+      strength: primary
+    - clause: "8.6"
+      context: "Clause 8.6 operational controls must ensure that agent output destinations are validated against authorized endpoints and that casual authority directives to disable safety filters or redirect data are blocked before execution."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: casual-authority-redirect

package/rules/agent-manipulation/ATR-2026-00164-skill-scope-hijack.yaml CHANGED Viewed

@@ -17,6 +17,28 @@ references:
     - 'LLM06:2025 - Excessive Agency'
   owasp_agentic:
     - 'ASI03:2026 - Cross-Agent Escalation'
+compliance:
+  eu_ai_act:
+    - article: "14"
+      context: "Skill scope hijacking embeds privilege escalation instructions within SKILL.md files to expand agent capabilities beyond authorized boundaries without human knowledge; Article 14 requires that agents operate within sanctioned scopes visible to human overseers."
+      strength: primary
+    - article: "15"
+      context: "Article 15 cybersecurity requirements mandate protection against supply chain attacks; malicious SKILL.md files represent a documented technique for injecting unauthorized capability expansion at the skill-definition layer."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "GV.1.2"
+      context: "Skill scope hijacking succeeds when no accountability role governs what capabilities a skill may claim; GV.1.2 requires that accountability roles for AI risk management are defined and assigned, ensuring that SKILL.md capability claims are reviewed against authorized scope boundaries."
+      strength: primary
+    - subcategory: "MP.2.3"
+      context: "Malicious SKILL.md files in external skill repositories represent an AI supply chain risk source; MP.2.3 requires that AI supply chain risk sources are identified and assessed, covering the skill-definition layer as a vector for privilege escalation injection."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "Clause 6.2 AI objectives and plans must include risk treatment for skill-definition-layer attacks where SKILL.md instructions expand agent scope beyond authorized boundaries without human knowledge."
+      strength: primary
+    - clause: "8.5"
+      context: "Clause 8.5 controls over externally-provided AI components must include validation of SKILL.md files to ensure that skill definitions do not embed cross-agent escalation or unauthorized scope expansion instructions."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: skill-scope-hijack

package/rules/agent-manipulation/ATR-2026-00269-fitd-escalation.yaml CHANGED Viewed

@@ -26,6 +26,28 @@ references:
   research:
     - "https://github.com/NVIDIA/garak/blob/main/garak/probes/continuation.py"
     - "https://jailbreaking-llms.github.io/"
+compliance:
+  eu_ai_act:
+    - article: "15"
+      context: "Foot-in-the-door escalation exploits LLM consistency bias to incrementally extract harmful outputs that would be refused if requested directly; Article 15 robustness requirements demand resistance against systematic adversarial probing techniques targeting model behavioral constraints."
+      strength: primary
+    - article: "9"
+      context: "FITD escalation is a documented multi-step adversarial pattern; Article 9 risk management systems must include detection controls for incremental jailbreak techniques that bypass single-turn safety evaluation."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "MP.5.1"
+      context: "Foot-in-the-door escalation is an adversarial input technique that exploits LLM consistency bias to incrementally bypass safety constraints across multiple turns; MP.5.1 requires that such multi-step adversarial input risks are identified and tracked to enable cross-turn detection."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "FITD escalation represents an identified risk requiring active treatment through detection controls that monitor prior-compliance anchors and escalation signals across conversation history; MG.2.3 requires such risk treatment plans are implemented."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "Clause 6.2 AI objectives and plans must include risk treatment for multi-step jailbreak techniques like FITD escalation that exploit model consistency bias to extract harmful outputs that would be refused if requested directly."
+      strength: primary
+    - clause: "8.6"
+      context: "Clause 8.6 operational controls must ensure that agents evaluate escalating requests in context of prior conversation turns rather than in isolation, preventing foot-in-the-door compliance anchoring from lowering refusal thresholds."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: fitd-escalation

package/rules/agent-manipulation/ATR-2026-00271-grandma-roleplay-jailbreak.yaml CHANGED Viewed

@@ -26,6 +26,28 @@ references:
   research:
     - "https://github.com/NVIDIA/garak/blob/main/garak/probes/grandma.py"
     - "https://www.independent.co.uk/tech/chatgpt-microsoft-windows-11-grandma-exploit-b2360213.html"
+compliance:
+  eu_ai_act:
+    - article: "15"
+      context: "Grandma roleplay jailbreaks use emotional manipulation through grief and nostalgia framing to circumvent safety constraints; Article 15 robustness requirements include resistance against social engineering techniques that exploit psychological vulnerabilities in human-AI interaction."
+      strength: primary
+    - article: "9"
+      context: "Persona-based jailbreaks exploiting emotional framing are documented high-risk attack patterns; Article 9 risk management requires systematic controls for emotional manipulation techniques used to extract harmful content from high-risk AI systems."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "MP.5.1"
+      context: "Grandma roleplay jailbreaks are adversarial inputs that exploit grief and nostalgia emotional framing to bypass safety constraints; MP.5.1 requires that this class of psychological manipulation risk is identified and tracked as a distinct adversarial input vector."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "Emotional-framing persona jailbreaks represent a known risk requiring active treatment through detection controls for deceased-relative roleplay patterns; MG.2.3 requires that risk treatment plans are implemented for identified AI risks including this garak-documented attack family."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "Clause 6.2 AI objectives and plans must include risk treatment for emotional manipulation techniques such as grandma roleplay that lower the model's refusal threshold by exploiting grief and nostalgia framing."
+      strength: primary
+    - clause: "8.6"
+      context: "Clause 8.6 operational controls must ensure that harmful procedural content is not generated regardless of the emotional or persona framing used in the request, blocking grandma roleplay jailbreak patterns before harmful content is produced."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: grandma-roleplay-jailbreak

package/rules/agent-manipulation/ATR-2026-00273-dan-developer-mode-persona.yaml CHANGED Viewed

@@ -27,6 +27,28 @@ references:
   research:
     - "https://github.com/NVIDIA/garak/blob/main/garak/probes/dan.py"
     - "https://github.com/NVIDIA/garak/blob/main/garak/data/dan/"
+compliance:
+  eu_ai_act:
+    - article: "15"
+      context: "DAN and Developer Mode persona jailbreaks attempt to replace the model's safety-trained identity with an unrestricted persona; Article 15 robustness and cybersecurity requirements mandate that high-risk AI systems resist identity replacement attacks that disable safety constraints."
+      strength: primary
+    - article: "9"
+      context: "Named persona jailbreaks are a well-documented attack family with widespread deployment in the wild; Article 9 risk management systems for high-risk AI must include detection controls for this established technique for bypassing content restrictions."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "MP.5.1"
+      context: "DAN, DUDE, and Developer Mode persona jailbreaks are adversarial inputs that attempt to replace the model's safety-trained identity with an unrestricted persona; MP.5.1 requires that this widely-deployed adversarial input risk is identified and tracked across model deployments."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "Named persona jailbreak families represent a documented risk requiring active treatment through pattern-based detection of DAN/DUDE/STAN identity-replacement prompts; MG.2.3 requires that risk treatment plans are implemented for this established and widely-distributed attack technique."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "Clause 6.2 AI objectives and plans must include risk treatment for DAN and Developer Mode persona replacement attacks, which represent one of the most widely-deployed jailbreak families targeting AI system safety constraints."
+      strength: primary
+    - clause: "8.6"
+      context: "Clause 8.6 operational controls must ensure that the AI system's safety identity cannot be replaced by named unrestricted personas such as DAN or DUDE, blocking persona-replacement jailbreaks before they disable safety constraints."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: dan-developer-mode-persona

package/rules/context-exfiltration/ATR-2026-00075-agent-memory-manipulation.yaml CHANGED Viewed

@@ -23,6 +23,28 @@ references:
     - AML.T0051 - LLM Prompt Injection
   mitre_attack:
     - "T1565.001 - Data Manipulation: Stored Data Manipulation"
+compliance:
+  eu_ai_act:
+    - article: "14"
+      context: "Agent memory poisoning corrupts persistent context across sessions, allowing attackers to establish behavioral backdoors that survive individual-turn oversight checks; Article 14 requires that human oversight mechanisms remain effective across the full lifecycle of agent interactions, including persistent state."
+      strength: primary
+    - article: "15"
+      context: "Article 15 robustness requirements mandate resistance against adversarial inputs that modify system behavior; memory injection attacks that alter an agent's long-term behavioral state represent a critical robustness failure requiring detection controls."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "GV.1.2"
+      context: "Memory manipulation attacks that inject false behavioral directives require clearly assigned accountability roles to ensure that persistent agent state is monitored and that any unauthorized modification is escalated to a responsible party."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "Risk treatment plans for memory poisoning must include controls that detect and block cross-session behavioral injection before poisoned directives propagate into future interactions."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "AI objectives and risk treatment plans must explicitly address memory manipulation threats to prevent attackers from establishing persistent behavioral backdoors that survive context resets."
+      strength: primary
+    - clause: "8.6"
+      context: "Operational controls for AI systems must include integrity checks on persistent memory stores to ensure that agent behavioral state has not been tampered with between sessions."
+      strength: secondary
 tags:
   category: context-exfiltration
   subcategory: memory-manipulation

package/rules/context-exfiltration/ATR-2026-00102-disguised-analytics-exfiltration.yaml CHANGED Viewed

@@ -24,6 +24,28 @@ references:
     - ASI06:2026 - Insufficient Access Controls
 metadata_provenance:
   mitre_atlas: auto-generated
+compliance:
+  eu_ai_act:
+    - article: "13"
+      context: "Disguised analytics exfiltration exploits the assumption that telemetry collection is harmless to covertly transmit full conversation context to attacker-controlled endpoints; Article 13 transparency obligations require that AI systems and their tools disclose all data collection practices clearly to users."
+      strength: primary
+    - article: "15"
+      context: "Article 15 cybersecurity requirements include protection against tools that misrepresent their data handling to facilitate unauthorized data extraction; detection of analytics-disguised exfiltration is a required control for high-risk AI deployments."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "GV.6.1"
+      context: "Disguised analytics exfiltration violates data governance boundaries by covertly transmitting full conversation context under the cover of routine telemetry, bypassing the data handling policies that GV.6.1 requires AI systems to enforce."
+      strength: primary
+    - subcategory: "MS.2.5"
+      context: "Detection of analytics-disguised data exfiltration is a data protection control measurement required under MS.2.5 to verify that AI system tool invocations do not silently leak user conversation data to external endpoints."
+      strength: secondary
+  iso_42001:
+    - clause: "8.4"
+      context: "Disguised analytics exfiltration requires a documented AI system impact assessment under clause 8.4 that identifies tools claiming benign analytics roles as a covert data exfiltration vector affecting user privacy."
+      strength: primary
+    - clause: "9.1"
+      context: "Ongoing monitoring and evaluation under clause 9.1 must include checks that tool-level data transmission claims are consistent with stated telemetry policies, detecting analytics-disguised exfiltration during operational review."
+      strength: secondary
 tags:
   category: context-exfiltration
   subcategory: disguised-analytics-collection

package/rules/context-exfiltration/ATR-2026-00113-credential-theft.yaml CHANGED Viewed

@@ -19,6 +19,28 @@ references:
     - ASI03:2026 - Agent Identity and Access Abuse
   mitre_attack:
     - T1552.001 - Credentials In Files
+compliance:
+  eu_ai_act:
+    - article: "15"
+      context: "Credential file theft through agent tool access compromises every downstream service connected to the stolen credentials; Article 15 cybersecurity requirements mandate that high-risk AI systems include controls preventing agents from accessing credential stores outside their authorized scope."
+      strength: primary
+    - article: "12"
+      context: "Credential theft undermines the integrity of audit logs and access records that Article 12 logging obligations depend on; an attacker who obtains credentials can falsify or erase audit trails, making Article 12 compliance unverifiable."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "MP.5.1"
+      context: "Adversarial tool calls targeting credential files such as ~/.aws/credentials and SSH private keys represent a direct exploitation of the agent's file-access capabilities that must be identified and tracked as a critical AI system risk."
+      strength: primary
+    - subcategory: "GV.6.1"
+      context: "Data governance policies must define and enforce scope restrictions on agent access to credential stores, preventing AI tools from reading authentication files outside their authorized operational boundary."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "Risk treatment activities must include controls that block agent access to well-known credential file paths, reducing the attack surface for credential theft through AI tool invocations."
+      strength: primary
+    - clause: "8.4"
+      context: "AI system impact assessments must document and review the potential consequences of credential file theft, including lateral movement across all downstream services connected to stolen credentials."
+      strength: secondary
 tags:
   category: context-exfiltration
   subcategory: credential-theft

package/rules/context-exfiltration/ATR-2026-00114-oauth-token-abuse.yaml CHANGED Viewed

@@ -19,6 +19,28 @@ references:
     - ASI03:2026 - Agent Identity and Access Abuse
   mitre_attack:
     - T1528 - Steal Application Access Token
+compliance:
+  eu_ai_act:
+    - article: "15"
+      context: "OAuth token interception and redirect_uri manipulation enable agent-assisted account takeover across all services using the stolen token; Article 15 cybersecurity requirements mandate that high-risk AI systems detect and block token theft patterns in agent-handled authentication flows."
+      strength: primary
+    - article: "12"
+      context: "Stolen OAuth tokens allow attackers to impersonate legitimate users in audit logs, corrupting the record integrity that Article 12 requires; token interception detection is a prerequisite for maintaining trustworthy audit trails."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "MP.5.1"
+      context: "OAuth token interception and malicious redirect_uri manipulation are adversarial input risks that exploit the agent's participation in authentication flows to capture credentials intended for legitimate services."
+      strength: primary
+    - subcategory: "GV.6.1"
+      context: "Data governance policies must restrict agent handling of bearer tokens, refresh tokens, and client secrets to prevent AI-assisted exfiltration of authorization credentials via forwarding or redirect manipulation."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "Risk treatment plans must address OAuth token interception scenarios by requiring agents to validate redirect URIs and prohibiting forwarding of authorization tokens to non-allowlisted destinations."
+      strength: primary
+    - clause: "8.4"
+      context: "Impact assessments must document the account-takeover blast radius of OAuth token theft through agent-mediated flows, covering all services that accept the stolen bearer token."
+      strength: secondary
 tags:
   category: context-exfiltration
   subcategory: oauth-token-abuse

package/rules/context-exfiltration/ATR-2026-00115-env-var-harvesting.yaml CHANGED Viewed

@@ -20,6 +20,28 @@ references:
     - ASI03:2026 - Agent Identity and Access Abuse
   mitre_attack:
     - T1082 - System Information Discovery
+compliance:
+  eu_ai_act:
+    - article: "15"
+      context: "Bulk environment variable harvesting exposes every secret in the agent runtime in a single operation, providing an attacker with complete access to all connected services; Article 15 cybersecurity requirements mandate that AI systems resist systematic environment enumeration attacks."
+      strength: primary
+    - article: "12"
+      context: "Exfiltrated environment variables typically include logging and monitoring credentials; Article 12 record-keeping integrity depends on protecting the secrets that secure audit infrastructure from compromise via environment harvesting."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "MP.5.1"
+      context: "Bulk environment variable harvesting via printenv, os.environ, or .env file reads is an adversarial technique that exploits agent runtime access to extract every secret in a single operation, and must be tracked as a critical AI system attack vector."
+      strength: primary
+    - subcategory: "GV.6.1"
+      context: "Data governance policies must explicitly prohibit agent tools from accessing the full process environment or .env files, as these sources aggregate all application secrets and database credentials into a single exfiltration target."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "Risk treatment activities must implement least-privilege environment access controls that prevent agent tools from reading bulk environment state and combining it with network exfiltration calls."
+      strength: primary
+    - clause: "8.4"
+      context: "Impact assessments must document the total credential exposure scope of a successful environment harvesting attack, as compromising the full environment grants access to every connected service simultaneously."
+      strength: secondary
 tags:
   category: context-exfiltration
   subcategory: env-var-harvesting

package/rules/context-exfiltration/ATR-2026-00136-tool-response-data-piggyback.yaml CHANGED Viewed

@@ -23,6 +23,28 @@ references:
     - AML.T0054
   safe_mcp:
     - SMCP-T012
+compliance:
+  eu_ai_act:
+    - article: "15"
+      context: "Tool response piggybacking embeds credential theft or social engineering within legitimate-looking tool outputs, exploiting the agent's inherent trust in tool responses to exfiltrate sensitive data; Article 15 cybersecurity requirements mandate that AI systems validate tool output integrity and detect embedded exfiltration payloads."
+      strength: primary
+    - article: "13"
+      context: "Piggybacked data theft disguised as normal tool responses undermines the transparency Article 13 requires between AI system behavior and what users or operators understand to be occurring; users cannot exercise informed oversight when tool responses silently exfiltrate their data."
+      strength: secondary
+  nist_ai_rmf:
+    - subcategory: "MP.5.1"
+      context: "Tool response piggybacking is an adversarial input risk where legitimate-looking MCP outputs embed credential theft payloads; MP.5.1 requires that this class of adversarial tool-output attack be identified and tracked in the AI system risk inventory."
+      strength: primary
+    - subcategory: "GV.6.1"
+      context: "Piggybacked credential exfiltration in tool responses breaches data governance controls by transmitting SSH keys and API tokens to attacker endpoints; GV.6.1 data governance policies must cover tool output integrity as a data handling boundary."
+      strength: secondary
+  iso_42001:
+    - clause: "6.2"
+      context: "Risk treatment plans under clause 6.2 must include controls for tool response piggybacking attacks, where malicious MCP servers embed credential theft within normal-appearing tool result payloads to exploit agent trust."
+      strength: primary
+    - clause: "8.4"
+      context: "Impact assessment under clause 8.4 must document the tool-response piggybacking vector — where credential theft is hidden in legitimate tool outputs — as a high-impact data exfiltration pathway for AI systems using MCP tool exchanges."
+      strength: secondary
 tags:
   category: context-exfiltration
   subcategory: tool-response-piggyback