agent-threat-rules 2.0.15 → 2.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/package.json +1 -1
  2. package/rules/agent-manipulation/ATR-2026-00032-goal-hijacking.yaml +24 -0
  3. package/rules/agent-manipulation/ATR-2026-00074-cross-agent-privilege-escalation.yaml +24 -0
  4. package/rules/agent-manipulation/ATR-2026-00076-inter-agent-message-spoofing.yaml +24 -0
  5. package/rules/agent-manipulation/ATR-2026-00077-human-trust-exploitation.yaml +24 -0
  6. package/rules/agent-manipulation/ATR-2026-00108-consensus-sybil-attack.yaml +22 -0
  7. package/rules/agent-manipulation/ATR-2026-00116-a2a-message-validation.yaml +22 -0
  8. package/rules/agent-manipulation/ATR-2026-00117-agent-identity-spoofing.yaml +22 -0
  9. package/rules/agent-manipulation/ATR-2026-00119-social-engineering-via-agent.yaml +22 -0
  10. package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml +22 -0
  11. package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml +22 -0
  12. package/rules/agent-manipulation/ATR-2026-00164-skill-scope-hijack.yaml +22 -0
  13. package/rules/agent-manipulation/ATR-2026-00269-fitd-escalation.yaml +22 -0
  14. package/rules/agent-manipulation/ATR-2026-00271-grandma-roleplay-jailbreak.yaml +22 -0
  15. package/rules/agent-manipulation/ATR-2026-00273-dan-developer-mode-persona.yaml +22 -0
  16. package/rules/context-exfiltration/ATR-2026-00075-agent-memory-manipulation.yaml +22 -0
  17. package/rules/context-exfiltration/ATR-2026-00102-disguised-analytics-exfiltration.yaml +22 -0
  18. package/rules/context-exfiltration/ATR-2026-00113-credential-theft.yaml +22 -0
  19. package/rules/context-exfiltration/ATR-2026-00114-oauth-token-abuse.yaml +22 -0
  20. package/rules/context-exfiltration/ATR-2026-00115-env-var-harvesting.yaml +22 -0
  21. package/rules/context-exfiltration/ATR-2026-00136-tool-response-data-piggyback.yaml +22 -0
  22. package/rules/context-exfiltration/ATR-2026-00212-mcp-atlassian-credential-leak.yaml +165 -0
  23. package/rules/context-exfiltration/ATR-2026-00261-markdown-image-exfiltration.yaml +22 -0
  24. package/rules/data-poisoning/ATR-2026-00070-data-poisoning.yaml +9 -0
  25. package/rules/excessive-autonomy/ATR-2026-00051-resource-exhaustion.yaml +24 -0
  26. package/rules/excessive-autonomy/ATR-2026-00052-cascading-failure.yaml +24 -0
  27. package/rules/excessive-autonomy/ATR-2026-00098-unauthorized-financial-action.yaml +31 -0
  28. package/rules/excessive-autonomy/ATR-2026-00099-high-risk-tool-gate.yaml +22 -0
  29. package/rules/model-abuse/ATR-2026-00279-harmful-completion-continuation.yaml +22 -0
  30. package/rules/model-abuse/ATR-2026-00281-eicar-gtube-malware-signature-request.yaml +22 -0
  31. package/rules/model-abuse/ATR-2026-00289-lmrc-harmful-content-elicitation.yaml +22 -0
  32. package/rules/model-abuse/ATR-2026-00292-self-harm-eating-disorder-facilitation.yaml +22 -0
  33. package/rules/model-abuse/ATR-2026-00298-malicious-use-illegal-activity-request.yaml +22 -0
  34. package/rules/model-abuse/ATR-2026-00299-harmbench-detailed-harmful-instruction.yaml +22 -0
  35. package/rules/model-security/ATR-2026-00072-model-behavior-extraction.yaml +24 -0
  36. package/rules/model-security/ATR-2026-00073-malicious-finetuning-data.yaml +24 -0
  37. package/rules/privilege-escalation/ATR-2026-00041-scope-creep.yaml +24 -0
  38. package/rules/privilege-escalation/ATR-2026-00107-delayed-execution-bypass.yaml +22 -0
  39. package/rules/privilege-escalation/ATR-2026-00110-eval-injection.yaml +22 -0
  40. package/rules/privilege-escalation/ATR-2026-00111-shell-escape.yaml +22 -0
  41. package/rules/privilege-escalation/ATR-2026-00112-dynamic-import-exploitation.yaml +22 -0
  42. package/rules/privilege-escalation/ATR-2026-00143-casual-privilege-escalation.yaml +22 -0
  43. package/rules/prompt-injection/ATR-2026-00003-jailbreak-attempt.yaml +23 -0
  44. package/rules/tool-poisoning/ATR-2026-00209-mcpwn-runaway-invocation.yaml +162 -0
  45. package/rules/tool-poisoning/ATR-2026-00210-flowise-system-message-override.yaml +162 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-threat-rules",
3
- "version": "2.0.15",
3
+ "version": "2.0.16",
4
4
  "type": "module",
5
5
  "description": "Open detection standard -- like Sigma, but for AI agents. 311 rules for prompt injection, tool poisoning, context exfiltration, and MCP attacks. Shipped in Cisco AI Defense. 97.1% recall on NVIDIA garak.",
6
6
  "main": "./dist/index.js",
@@ -26,6 +26,30 @@ references:
26
26
  mitre_atlas:
27
27
  - AML.T0051 - Prompt Injection
28
28
  - AML.T0043 - Craft Adversarial Data
29
+
30
+ compliance:
31
+ eu_ai_act:
32
+ - article: "14"
33
+ context: "Goal hijacking redirects the agent away from its authorized objectives without human awareness, directly undermining the human oversight mechanisms Article 14 requires for high-risk AI systems."
34
+ strength: primary
35
+ - article: "15"
36
+ context: "Article 15 robustness requirements include resistance against adversarial redirection of agent objectives; this rule detects explicit and subtle goal-pivot patterns at runtime."
37
+ strength: secondary
38
+ nist_ai_rmf:
39
+ - subcategory: "GV.1.2"
40
+ context: "Goal hijacking attacks subvert the accountability roles assigned to AI agents by redirecting them from authorized objectives; GV.1.2 requires that defined accountability roles and their task boundaries are enforced at runtime."
41
+ strength: primary
42
+ - subcategory: "MG.2.3"
43
+ context: "Detecting goal hijacking patterns triggers the risk treatment plans required by MG.2.3 to contain adversarial task-redirection before the agent executes unauthorized actions."
44
+ strength: secondary
45
+ iso_42001:
46
+ - clause: "6.2"
47
+ context: "ISO 42001 clause 6.2 risk treatment plans must address task-redirection attacks; this rule implements the monitoring control that detects when an agent's objective is adversarially substituted."
48
+ strength: primary
49
+ - clause: "8.6"
50
+ context: "Clause 8.6 operational controls ensure AI systems operate within their authorized task boundaries; goal hijacking detection enforces that agents remain on their assigned objective despite redirection attempts."
51
+ strength: secondary
52
+
29
53
  tags:
30
54
  category: agent-manipulation
31
55
  subcategory: goal-hijacking
@@ -25,6 +25,30 @@ references:
25
25
  mitre_attack:
26
26
  - T1078 - Valid Accounts
27
27
  - T1550 - Use Alternate Authentication Material
28
+
29
+ compliance:
30
+ eu_ai_act:
31
+ - article: "14"
32
+ context: "Cross-agent privilege escalation allows one agent to acquire capabilities from another agent's authorization scope without human approval, directly bypassing the role-based access controls that Article 14 human oversight requires."
33
+ strength: primary
34
+ - article: "9"
35
+ context: "Multi-agent privilege escalation via trust relationships is a documented attack pattern in the AI system risk register; Article 9 requires monitoring controls for this lateral movement risk."
36
+ strength: secondary
37
+ nist_ai_rmf:
38
+ - subcategory: "GV.1.2"
39
+ context: "Cross-agent privilege escalation exploits gaps in the accountability roles defined for each agent in a multi-agent system; GV.1.2 requires that agent role boundaries are defined and that lateral movement across those boundaries is detected."
40
+ strength: primary
41
+ - subcategory: "MG.2.3"
42
+ context: "Credential forwarding and orchestrator bypass between agents require immediate risk treatment to prevent lateral movement from escalating to full system compromise; MG.2.3 mandates that these response plans are pre-defined and activated on detection."
43
+ strength: secondary
44
+ iso_42001:
45
+ - clause: "6.2"
46
+ context: "ISO 42001 clause 6.2 risk treatment activities must cover inter-agent privilege escalation as a multi-agent architecture risk; this rule implements the detection control for credential forwarding and unauthorized role assumption between agents."
47
+ strength: primary
48
+ - clause: "8.6"
49
+ context: "Clause 8.6 operational controls ensure each agent executes within its authorized capability set; cross-agent privilege escalation detection enforces that no agent can assume the permissions of a higher-privilege agent without explicit authorization."
50
+ strength: secondary
51
+
28
52
  tags:
29
53
  category: agent-manipulation
30
54
  subcategory: cross-agent-privilege-escalation
@@ -27,6 +27,30 @@ references:
27
27
  mitre_atlas:
28
28
  - AML.T0051 - Prompt Injection
29
29
  - AML.T0043 - Craft Adversarial Data
30
+
31
+ compliance:
32
+ eu_ai_act:
33
+ - article: "15"
34
+ context: "Article 15 cybersecurity requirements for high-risk AI include protecting agent communication channels against spoofing, replay, and message integrity attacks; this rule detects structural communication failures between agents."
35
+ strength: primary
36
+ - article: "14"
37
+ context: "Spoofed inter-agent messages can trigger unauthorized agent actions without any human-visible audit trail, undermining the traceability required by Article 14 human oversight provisions."
38
+ strength: secondary
39
+ nist_ai_rmf:
40
+ - subcategory: "MP.5.1"
41
+ context: "Spoofed, replayed, and unauthenticated inter-agent messages are adversarial inputs that exploit trust assumptions in multi-agent communication channels; MP.5.1 requires that these structural communication integrity attacks are identified and tracked."
42
+ strength: primary
43
+ - subcategory: "MG.2.3"
44
+ context: "Detection of message spoofing, replay attacks, and null authentication in agent channels triggers risk treatment plans to quarantine compromised messages before they cause unauthorized actions downstream; MG.2.3 requires these treatment processes are defined."
45
+ strength: secondary
46
+ iso_42001:
47
+ - clause: "6.2"
48
+ context: "ISO 42001 clause 6.2 risk treatment plans must address inter-agent communication integrity failures including message spoofing and replay; this rule implements the detection control for structural authentication weaknesses in agent-to-agent channels."
49
+ strength: primary
50
+ - clause: "8.6"
51
+ context: "Clause 8.6 operational controls require that inter-agent communication operates with verified message authentication; detection of null tokens, forged IDs, and unauthenticated commands enforces the integrity requirements for correct agent pipeline execution."
52
+ strength: secondary
53
+
30
54
  tags:
31
55
  category: agent-manipulation
32
56
  subcategory: inter-agent-communication
@@ -33,6 +33,30 @@ references:
33
33
  - ASI09:2026 - Human-Agent Trust Exploitation
34
34
  mitre_atlas:
35
35
  - AML.T0048 - Adversarial Prompt Techniques
36
+
37
+ compliance:
38
+ eu_ai_act:
39
+ - article: "13"
40
+ context: "Article 13 transparency obligations require that AI systems do not exploit informational asymmetries to manipulate human users; human-trust exploitation attacks weaponize the AI's perceived authority and credibility against its own users."
41
+ strength: primary
42
+ - article: "14"
43
+ context: "Trust exploitation techniques undermine the human's ability to critically evaluate agent outputs and decisions, eroding the informed oversight capability Article 14 requires to remain effective."
44
+ strength: secondary
45
+ nist_ai_rmf:
46
+ - subcategory: "GV.6.1"
47
+ context: "Human-agent trust exploitation weaponizes information asymmetry between the AI system and its operators; GV.6.1 data governance policies must ensure AI systems do not suppress or fabricate information to manipulate human decision-makers."
48
+ strength: primary
49
+ - subcategory: "MP.5.1"
50
+ context: "False certainty claims, manufactured urgency, and verification discouragement are adversarial input patterns targeting human operators rather than the AI system itself; MP.5.1 requires identifying and tracking these social-engineering attack vectors."
51
+ strength: secondary
52
+ iso_42001:
53
+ - clause: "8.4"
54
+ context: "ISO 42001 clause 8.4 requires documented AI system impact assessments; human-trust exploitation attacks that suppress warnings or manufacture urgency to bypass review represent a documented impact category requiring runtime detection controls."
55
+ strength: primary
56
+ - clause: "9.1"
57
+ context: "Clause 9.1 performance monitoring must evaluate whether AI systems support effective human oversight; detecting trust exploitation patterns provides the measurement signal that indicates the AI management system's oversight controls are being actively undermined."
58
+ strength: secondary
59
+
36
60
  tags:
37
61
  category: agent-manipulation
38
62
  subcategory: human-trust-exploitation
@@ -24,6 +24,28 @@ references:
24
24
  - ASI01:2026 - Agent Goal Hijack
25
25
  mitre_atlas:
26
26
  - AML.T0043 - Craft Adversarial Data
27
+ compliance:
28
+ eu_ai_act:
29
+ - article: "14"
30
+ context: "Consensus Sybil attacks allow compromised agents to manufacture false agreement in multi-agent decision systems, directly undermining the human oversight capability Article 14 requires for high-risk AI applications."
31
+ strength: primary
32
+ - article: "15"
33
+ context: "Article 15 robustness requirements mandate that high-risk AI systems resist adversarial manipulation; Sybil attacks on consensus mechanisms are a documented adversarial pattern requiring systematic detection."
34
+ strength: secondary
35
+ nist_ai_rmf:
36
+ - subcategory: "GV.1.2"
37
+ context: "Consensus Sybil attacks exploit undefined accountability for agent identity in multi-agent systems; GV.1.2 requires that accountability roles for AI risk management are defined and assigned to prevent Sybil-style identity fabrication."
38
+ strength: primary
39
+ - subcategory: "MG.2.3"
40
+ context: "Sybil attacks represent a documented risk treatment gap in multi-agent consensus deployments; MG.2.3 requires that risk treatment plans are implemented for identified AI risks, including detection controls for fake-identity flooding."
41
+ strength: secondary
42
+ iso_42001:
43
+ - clause: "6.2"
44
+ context: "Multi-agent systems deploying consensus mechanisms must include Sybil attack detection as a planned risk treatment activity under the AI objectives framework required by clause 6.2."
45
+ strength: primary
46
+ - clause: "8.6"
47
+ context: "Clause 8.6 operational controls must ensure that consensus decisions are made by verified agent identities and that fabricated voter identities are detected before they influence multi-agent outcomes."
48
+ strength: secondary
27
49
  tags:
28
50
  category: agent-manipulation
29
51
  subcategory: consensus-sybil-attack
@@ -20,6 +20,28 @@ references:
20
20
  - ASI07:2026 - Insecure Inter-Agent Communication
21
21
  mitre_attack:
22
22
  - T1557 - Adversary-in-the-Middle
23
+ compliance:
24
+ eu_ai_act:
25
+ - article: "15"
26
+ context: "Malicious agent-to-agent message injection exploits unvalidated inter-agent channels to execute unauthorized actions; Article 15 cybersecurity requirements mandate detection and resistance against such adversarial input manipulation."
27
+ strength: primary
28
+ - article: "14"
29
+ context: "A2A message injection can cause agents to take actions outside their authorized scope without human awareness, eroding the effective oversight capability Article 14 requires for high-risk AI systems."
30
+ strength: secondary
31
+ nist_ai_rmf:
32
+ - subcategory: "MP.5.1"
33
+ context: "Malicious A2A message injection is a documented adversarial input risk targeting inter-agent communication channels; MP.5.1 requires that adversarial input risks to AI systems are identified and tracked to enable detection of embedded payload attacks."
34
+ strength: primary
35
+ - subcategory: "MG.2.3"
36
+ context: "A2A message validation failures represent a risk requiring active treatment; MG.2.3 requires that risk treatment plans are implemented for identified AI risks, including validation controls on all inter-agent message channels."
37
+ strength: secondary
38
+ iso_42001:
39
+ - clause: "6.2"
40
+ context: "AI system plans under clause 6.2 must include risk treatment activities for inter-agent message injection, ensuring that A2A communication validation is a planned control rather than an afterthought."
41
+ strength: primary
42
+ - clause: "8.6"
43
+ context: "Clause 8.6 operational controls require that inter-agent messages are validated before execution, preventing injected instructions from executing in the receiving agent's security context."
44
+ strength: secondary
23
45
  tags:
24
46
  category: agent-manipulation
25
47
  subcategory: a2a-message-validation
@@ -20,6 +20,28 @@ references:
20
20
  - ASI10:2026 - Rogue Agents
21
21
  mitre_attack:
22
22
  - T1036 - Masquerading
23
+ compliance:
24
+ eu_ai_act:
25
+ - article: "13"
26
+ context: "Agent identity spoofing creates false impressions of authority and trustworthiness that users and other agents cannot distinguish from legitimate communications; Article 13 transparency obligations require that AI system identity and authority claims be verifiable."
27
+ strength: primary
28
+ - article: "15"
29
+ context: "Article 15 cybersecurity requirements include protection against masquerading attacks; identity spoofing in multi-agent systems represents a documented adversarial pattern targeting the authentication layer of agent architectures."
30
+ strength: secondary
31
+ nist_ai_rmf:
32
+ - subcategory: "GV.6.1"
33
+ context: "Agent identity spoofing exploits the absence of verified identity provenance in inter-agent data flows; GV.6.1 data governance policies must define how agent identity claims are authenticated to prevent masquerading attacks."
34
+ strength: primary
35
+ - subcategory: "MP.5.1"
36
+ context: "Impersonation of admin or supervisor agent roles is an adversarial input risk that must be tracked under MP.5.1 to ensure detection controls cover identity-claim-based privilege escalation patterns."
37
+ strength: secondary
38
+ iso_42001:
39
+ - clause: "8.4"
40
+ context: "Clause 8.4 AI system impact assessments must document the risk that unverified agent identity claims allow privilege escalation, and review controls that ensure identity spoofing is detectable before actions are taken."
41
+ strength: primary
42
+ - clause: "9.1"
43
+ context: "Clause 9.1 performance monitoring must include evaluation of whether agent identity verification controls are functioning correctly and catching masquerading attacks in operational deployments."
44
+ strength: secondary
23
45
  tags:
24
46
  category: agent-manipulation
25
47
  subcategory: agent-identity-spoofing
@@ -19,6 +19,28 @@ references:
19
19
  - ASI09:2026 - Human Trust Exploitation
20
20
  mitre_attack:
21
21
  - T1566 - Phishing
22
+ compliance:
23
+ eu_ai_act:
24
+ - article: "13"
25
+ context: "Social engineering delivered through agent output exploits the elevated trust users place in AI-generated content; Article 13 transparency obligations require that AI systems not be used as vectors for deception or manipulation against the users they serve."
26
+ strength: primary
27
+ - article: "14"
28
+ context: "Agents weaponized for social engineering undermine the informed human judgment that Article 14 oversight depends on; users manipulated through trusted AI channels cannot exercise effective oversight of subsequent decisions."
29
+ strength: secondary
30
+ nist_ai_rmf:
31
+ - subcategory: "GV.6.1"
32
+ context: "Social engineering delivered via agent output exploits user trust in AI-generated content to harvest credentials and personal data; GV.6.1 data governance policies must address how AI-generated communications are authenticated to prevent agent-mediated phishing."
33
+ strength: primary
34
+ - subcategory: "MP.5.1"
35
+ context: "Using agents as social engineering vectors is an adversarial input risk where poisoned context produces manipulative outputs; MP.5.1 requires this risk to be identified and tracked so that urgency and authority-impersonation patterns in agent output are monitored."
36
+ strength: secondary
37
+ iso_42001:
38
+ - clause: "8.4"
39
+ context: "Clause 8.4 impact assessments must document the elevated trust users place in AI-generated content and evaluate whether social engineering via agent output constitutes a significant harm requiring dedicated detection controls."
40
+ strength: primary
41
+ - clause: "9.1"
42
+ context: "Clause 9.1 performance monitoring must evaluate whether agent output monitoring detects social engineering patterns such as urgency language and credential-harvesting requests generated through poisoned agent context."
43
+ strength: secondary
22
44
  tags:
23
45
  category: agent-manipulation
24
46
  subcategory: social-engineering-via-agent
@@ -26,6 +26,28 @@ references:
26
26
  - ASI09:2026 - Human-Agent Trust Exploitation
27
27
  metadata_provenance:
28
28
  mitre_atlas: auto-generated
29
+ compliance:
30
+ eu_ai_act:
31
+ - article: "14"
32
+ context: "Casual authority claims using conversational tone bypass detection while achieving the same effect as explicit privilege escalation — unauthorized actions without human approval; Article 14 oversight requirements apply regardless of the linguistic register used to circumvent them."
33
+ strength: primary
34
+ - article: "9"
35
+ context: "Rationalized bypasses and casual scope escalation are documented attack patterns that Article 9 risk management systems must account for; detection of informal social engineering is a required risk treatment for multi-agent deployments."
36
+ strength: secondary
37
+ nist_ai_rmf:
38
+ - subcategory: "GV.1.2"
39
+ context: "Casual authority escalation exploits gaps in accountability role definitions, allowing informal authority claims like 'the boss said' to bypass controls without challenge; GV.1.2 requires that accountability roles are formally assigned so that unverifiable casual authority claims are detectable."
40
+ strength: primary
41
+ - subcategory: "MG.2.3"
42
+ context: "Conversational-tone scope escalation is a documented risk requiring active treatment; MG.2.3 requires that risk treatment plans address informal social engineering techniques that evade formal injection detection in multi-agent deployments."
43
+ strength: secondary
44
+ iso_42001:
45
+ - clause: "6.2"
46
+ context: "Clause 6.2 AI objectives and plans must include risk treatment for casual authority claim patterns that bypass formal injection detection, as these represent a distinct attack surface from explicit privilege escalation."
47
+ strength: primary
48
+ - clause: "8.6"
49
+ context: "Clause 8.6 operational controls must ensure that agents require verified authorization before acting on scope expansions or control bypasses, regardless of whether the instruction is phrased formally or in casual conversational language."
50
+ strength: secondary
29
51
  tags:
30
52
  category: agent-manipulation
31
53
  subcategory: casual-authority
@@ -21,6 +21,28 @@ references:
21
21
  - ASI06 - Rogue Actions
22
22
  metadata_provenance:
23
23
  mitre_atlas: auto-generated
24
+ compliance:
25
+ eu_ai_act:
26
+ - article: "14"
27
+ context: "Casual authority data redirect attacks redirect agent output to attacker-controlled endpoints by spoofing organizational authority, bypassing human review of where sensitive data flows; Article 14 oversight requires that data redirection decisions remain under human control."
28
+ strength: primary
29
+ - article: "15"
30
+ context: "Article 15 cybersecurity requirements include protection against social engineering attacks targeting agent output pipelines; casual authority redirect is a documented adversarial technique exploiting agents as exfiltration proxies."
31
+ strength: secondary
32
+ nist_ai_rmf:
33
+ - subcategory: "GV.1.2"
34
+ context: "Casual authority redirect attacks succeed when no formal accountability role exists to verify output redirection decisions; GV.1.2 requires that accountability roles for AI risk management are defined so that informal directives to redirect agent output can be challenged and blocked."
35
+ strength: primary
36
+ - subcategory: "MG.2.3"
37
+ context: "Data redirect via social engineering authority claims is a documented exfiltration risk requiring active treatment; MG.2.3 requires that risk treatment plans include detection controls for authority-impersonation-based output redirection attacks."
38
+ strength: secondary
39
+ iso_42001:
40
+ - clause: "6.2"
41
+ context: "Clause 6.2 AI objectives and plans must include controls for preventing agents from being redirected to attacker-controlled endpoints through casual authority claims that spoof organizational hierarchy."
42
+ strength: primary
43
+ - clause: "8.6"
44
+ context: "Clause 8.6 operational controls must ensure that agent output destinations are validated against authorized endpoints and that casual authority directives to disable safety filters or redirect data are blocked before execution."
45
+ strength: secondary
24
46
  tags:
25
47
  category: agent-manipulation
26
48
  subcategory: casual-authority-redirect
@@ -17,6 +17,28 @@ references:
17
17
  - 'LLM06:2025 - Excessive Agency'
18
18
  owasp_agentic:
19
19
  - 'ASI03:2026 - Cross-Agent Escalation'
20
+ compliance:
21
+ eu_ai_act:
22
+ - article: "14"
23
+ context: "Skill scope hijacking embeds privilege escalation instructions within SKILL.md files to expand agent capabilities beyond authorized boundaries without human knowledge; Article 14 requires that agents operate within sanctioned scopes visible to human overseers."
24
+ strength: primary
25
+ - article: "15"
26
+ context: "Article 15 cybersecurity requirements mandate protection against supply chain attacks; malicious SKILL.md files represent a documented technique for injecting unauthorized capability expansion at the skill-definition layer."
27
+ strength: secondary
28
+ nist_ai_rmf:
29
+ - subcategory: "GV.1.2"
30
+ context: "Skill scope hijacking succeeds when no accountability role governs what capabilities a skill may claim; GV.1.2 requires that accountability roles for AI risk management are defined and assigned, ensuring that SKILL.md capability claims are reviewed against authorized scope boundaries."
31
+ strength: primary
32
+ - subcategory: "MP.2.3"
33
+ context: "Malicious SKILL.md files in external skill repositories represent an AI supply chain risk source; MP.2.3 requires that AI supply chain risk sources are identified and assessed, covering the skill-definition layer as a vector for privilege escalation injection."
34
+ strength: secondary
35
+ iso_42001:
36
+ - clause: "6.2"
37
+ context: "Clause 6.2 AI objectives and plans must include risk treatment for skill-definition-layer attacks where SKILL.md instructions expand agent scope beyond authorized boundaries without human knowledge."
38
+ strength: primary
39
+ - clause: "8.5"
40
+ context: "Clause 8.5 controls over externally-provided AI components must include validation of SKILL.md files to ensure that skill definitions do not embed cross-agent escalation or unauthorized scope expansion instructions."
41
+ strength: secondary
20
42
  tags:
21
43
  category: agent-manipulation
22
44
  subcategory: skill-scope-hijack
@@ -26,6 +26,28 @@ references:
26
26
  research:
27
27
  - "https://github.com/NVIDIA/garak/blob/main/garak/probes/continuation.py"
28
28
  - "https://jailbreaking-llms.github.io/"
29
+ compliance:
30
+ eu_ai_act:
31
+ - article: "15"
32
+ context: "Foot-in-the-door escalation exploits LLM consistency bias to incrementally extract harmful outputs that would be refused if requested directly; Article 15 robustness requirements demand resistance against systematic adversarial probing techniques targeting model behavioral constraints."
33
+ strength: primary
34
+ - article: "9"
35
+ context: "FITD escalation is a documented multi-step adversarial pattern; Article 9 risk management systems must include detection controls for incremental jailbreak techniques that bypass single-turn safety evaluation."
36
+ strength: secondary
37
+ nist_ai_rmf:
38
+ - subcategory: "MP.5.1"
39
+ context: "Foot-in-the-door escalation is an adversarial input technique that exploits LLM consistency bias to incrementally bypass safety constraints across multiple turns; MP.5.1 requires that such multi-step adversarial input risks are identified and tracked to enable cross-turn detection."
40
+ strength: primary
41
+ - subcategory: "MG.2.3"
42
+ context: "FITD escalation represents an identified risk requiring active treatment through detection controls that monitor prior-compliance anchors and escalation signals across conversation history; MG.2.3 requires such risk treatment plans are implemented."
43
+ strength: secondary
44
+ iso_42001:
45
+ - clause: "6.2"
46
+ context: "Clause 6.2 AI objectives and plans must include risk treatment for multi-step jailbreak techniques like FITD escalation that exploit model consistency bias to extract harmful outputs that would be refused if requested directly."
47
+ strength: primary
48
+ - clause: "8.6"
49
+ context: "Clause 8.6 operational controls must ensure that agents evaluate escalating requests in context of prior conversation turns rather than in isolation, preventing foot-in-the-door compliance anchoring from lowering refusal thresholds."
50
+ strength: secondary
29
51
  tags:
30
52
  category: agent-manipulation
31
53
  subcategory: fitd-escalation
@@ -26,6 +26,28 @@ references:
26
26
  research:
27
27
  - "https://github.com/NVIDIA/garak/blob/main/garak/probes/grandma.py"
28
28
  - "https://www.independent.co.uk/tech/chatgpt-microsoft-windows-11-grandma-exploit-b2360213.html"
29
+ compliance:
30
+ eu_ai_act:
31
+ - article: "15"
32
+ context: "Grandma roleplay jailbreaks use emotional manipulation through grief and nostalgia framing to circumvent safety constraints; Article 15 robustness requirements include resistance against social engineering techniques that exploit psychological vulnerabilities in human-AI interaction."
33
+ strength: primary
34
+ - article: "9"
35
+ context: "Persona-based jailbreaks exploiting emotional framing are documented high-risk attack patterns; Article 9 risk management requires systematic controls for emotional manipulation techniques used to extract harmful content from high-risk AI systems."
36
+ strength: secondary
37
+ nist_ai_rmf:
38
+ - subcategory: "MP.5.1"
39
+ context: "Grandma roleplay jailbreaks are adversarial inputs that exploit grief and nostalgia emotional framing to bypass safety constraints; MP.5.1 requires that this class of psychological manipulation risk is identified and tracked as a distinct adversarial input vector."
40
+ strength: primary
41
+ - subcategory: "MG.2.3"
42
+ context: "Emotional-framing persona jailbreaks represent a known risk requiring active treatment through detection controls for deceased-relative roleplay patterns; MG.2.3 requires that risk treatment plans are implemented for identified AI risks including this garak-documented attack family."
43
+ strength: secondary
44
+ iso_42001:
45
+ - clause: "6.2"
46
+ context: "Clause 6.2 AI objectives and plans must include risk treatment for emotional manipulation techniques such as grandma roleplay that lower the model's refusal threshold by exploiting grief and nostalgia framing."
47
+ strength: primary
48
+ - clause: "8.6"
49
+ context: "Clause 8.6 operational controls must ensure that harmful procedural content is not generated regardless of the emotional or persona framing used in the request, blocking grandma roleplay jailbreak patterns before harmful content is produced."
50
+ strength: secondary
29
51
  tags:
30
52
  category: agent-manipulation
31
53
  subcategory: grandma-roleplay-jailbreak
@@ -27,6 +27,28 @@ references:
27
27
  research:
28
28
  - "https://github.com/NVIDIA/garak/blob/main/garak/probes/dan.py"
29
29
  - "https://github.com/NVIDIA/garak/blob/main/garak/data/dan/"
30
+ compliance:
31
+ eu_ai_act:
32
+ - article: "15"
33
+ context: "DAN and Developer Mode persona jailbreaks attempt to replace the model's safety-trained identity with an unrestricted persona; Article 15 robustness and cybersecurity requirements mandate that high-risk AI systems resist identity replacement attacks that disable safety constraints."
34
+ strength: primary
35
+ - article: "9"
36
+ context: "Named persona jailbreaks are a well-documented attack family with widespread deployment in the wild; Article 9 risk management systems for high-risk AI must include detection controls for this established technique for bypassing content restrictions."
37
+ strength: secondary
38
+ nist_ai_rmf:
39
+ - subcategory: "MP.5.1"
40
+ context: "DAN, DUDE, and Developer Mode persona jailbreaks are adversarial inputs that attempt to replace the model's safety-trained identity with an unrestricted persona; MP.5.1 requires that this widely-deployed adversarial input risk is identified and tracked across model deployments."
41
+ strength: primary
42
+ - subcategory: "MG.2.3"
43
+ context: "Named persona jailbreak families represent a documented risk requiring active treatment through pattern-based detection of DAN/DUDE/STAN identity-replacement prompts; MG.2.3 requires that risk treatment plans are implemented for this established and widely-distributed attack technique."
44
+ strength: secondary
45
+ iso_42001:
46
+ - clause: "6.2"
47
+ context: "Clause 6.2 AI objectives and plans must include risk treatment for DAN and Developer Mode persona replacement attacks, which represent one of the most widely-deployed jailbreak families targeting AI system safety constraints."
48
+ strength: primary
49
+ - clause: "8.6"
50
+ context: "Clause 8.6 operational controls must ensure that the AI system's safety identity cannot be replaced by named unrestricted personas such as DAN or DUDE, blocking persona-replacement jailbreaks before they disable safety constraints."
51
+ strength: secondary
30
52
  tags:
31
53
  category: agent-manipulation
32
54
  subcategory: dan-developer-mode-persona
@@ -23,6 +23,28 @@ references:
23
23
  - AML.T0051 - LLM Prompt Injection
24
24
  mitre_attack:
25
25
  - "T1565.001 - Data Manipulation: Stored Data Manipulation"
26
+ compliance:
27
+ eu_ai_act:
28
+ - article: "14"
29
+ context: "Agent memory poisoning corrupts persistent context across sessions, allowing attackers to establish behavioral backdoors that survive individual-turn oversight checks; Article 14 requires that human oversight mechanisms remain effective across the full lifecycle of agent interactions, including persistent state."
30
+ strength: primary
31
+ - article: "15"
32
+ context: "Article 15 robustness requirements mandate resistance against adversarial inputs that modify system behavior; memory injection attacks that alter an agent's long-term behavioral state represent a critical robustness failure requiring detection controls."
33
+ strength: secondary
34
+ nist_ai_rmf:
35
+ - subcategory: "GV.1.2"
36
+ context: "Memory manipulation attacks that inject false behavioral directives require clearly assigned accountability roles to ensure that persistent agent state is monitored and that any unauthorized modification is escalated to a responsible party."
37
+ strength: primary
38
+ - subcategory: "MG.2.3"
39
+ context: "Risk treatment plans for memory poisoning must include controls that detect and block cross-session behavioral injection before poisoned directives propagate into future interactions."
40
+ strength: secondary
41
+ iso_42001:
42
+ - clause: "6.2"
43
+ context: "AI objectives and risk treatment plans must explicitly address memory manipulation threats to prevent attackers from establishing persistent behavioral backdoors that survive context resets."
44
+ strength: primary
45
+ - clause: "8.6"
46
+ context: "Operational controls for AI systems must include integrity checks on persistent memory stores to ensure that agent behavioral state has not been tampered with between sessions."
47
+ strength: secondary
26
48
  tags:
27
49
  category: context-exfiltration
28
50
  subcategory: memory-manipulation
@@ -24,6 +24,28 @@ references:
24
24
  - ASI06:2026 - Insufficient Access Controls
25
25
  metadata_provenance:
26
26
  mitre_atlas: auto-generated
27
+ compliance:
28
+ eu_ai_act:
29
+ - article: "13"
30
+ context: "Disguised analytics exfiltration exploits the assumption that telemetry collection is harmless to covertly transmit full conversation context to attacker-controlled endpoints; Article 13 transparency obligations require that AI systems and their tools disclose all data collection practices clearly to users."
31
+ strength: primary
32
+ - article: "15"
33
+ context: "Article 15 cybersecurity requirements include protection against tools that misrepresent their data handling to facilitate unauthorized data extraction; detection of analytics-disguised exfiltration is a required control for high-risk AI deployments."
34
+ strength: secondary
35
+ nist_ai_rmf:
36
+ - subcategory: "GV.6.1"
37
+ context: "Disguised analytics exfiltration violates data governance boundaries by covertly transmitting full conversation context under the cover of routine telemetry, bypassing the data handling policies that GV.6.1 requires AI systems to enforce."
38
+ strength: primary
39
+ - subcategory: "MS.2.5"
40
+ context: "Detection of analytics-disguised data exfiltration is a data protection control measurement required under MS.2.5 to verify that AI system tool invocations do not silently leak user conversation data to external endpoints."
41
+ strength: secondary
42
+ iso_42001:
43
+ - clause: "8.4"
44
+ context: "Disguised analytics exfiltration requires a documented AI system impact assessment under clause 8.4 that identifies tools claiming benign analytics roles as a covert data exfiltration vector affecting user privacy."
45
+ strength: primary
46
+ - clause: "9.1"
47
+ context: "Ongoing monitoring and evaluation under clause 9.1 must include checks that tool-level data transmission claims are consistent with stated telemetry policies, detecting analytics-disguised exfiltration during operational review."
48
+ strength: secondary
27
49
  tags:
28
50
  category: context-exfiltration
29
51
  subcategory: disguised-analytics-collection
@@ -19,6 +19,28 @@ references:
19
19
  - ASI03:2026 - Agent Identity and Access Abuse
20
20
  mitre_attack:
21
21
  - T1552.001 - Credentials In Files
22
+ compliance:
23
+ eu_ai_act:
24
+ - article: "15"
25
+ context: "Credential file theft through agent tool access compromises every downstream service connected to the stolen credentials; Article 15 cybersecurity requirements mandate that high-risk AI systems include controls preventing agents from accessing credential stores outside their authorized scope."
26
+ strength: primary
27
+ - article: "12"
28
+ context: "Credential theft undermines the integrity of audit logs and access records that Article 12 logging obligations depend on; an attacker who obtains credentials can falsify or erase audit trails, making Article 12 compliance unverifiable."
29
+ strength: secondary
30
+ nist_ai_rmf:
31
+ - subcategory: "MP.5.1"
32
+ context: "Adversarial tool calls targeting credential files such as ~/.aws/credentials and SSH private keys represent a direct exploitation of the agent's file-access capabilities that must be identified and tracked as a critical AI system risk."
33
+ strength: primary
34
+ - subcategory: "GV.6.1"
35
+ context: "Data governance policies must define and enforce scope restrictions on agent access to credential stores, preventing AI tools from reading authentication files outside their authorized operational boundary."
36
+ strength: secondary
37
+ iso_42001:
38
+ - clause: "6.2"
39
+ context: "Risk treatment activities must include controls that block agent access to well-known credential file paths, reducing the attack surface for credential theft through AI tool invocations."
40
+ strength: primary
41
+ - clause: "8.4"
42
+ context: "AI system impact assessments must document and review the potential consequences of credential file theft, including lateral movement across all downstream services connected to stolen credentials."
43
+ strength: secondary
22
44
  tags:
23
45
  category: context-exfiltration
24
46
  subcategory: credential-theft
@@ -19,6 +19,28 @@ references:
19
19
  - ASI03:2026 - Agent Identity and Access Abuse
20
20
  mitre_attack:
21
21
  - T1528 - Steal Application Access Token
22
+ compliance:
23
+ eu_ai_act:
24
+ - article: "15"
25
+ context: "OAuth token interception and redirect_uri manipulation enable agent-assisted account takeover across all services using the stolen token; Article 15 cybersecurity requirements mandate that high-risk AI systems detect and block token theft patterns in agent-handled authentication flows."
26
+ strength: primary
27
+ - article: "12"
28
+ context: "Stolen OAuth tokens allow attackers to impersonate legitimate users in audit logs, corrupting the record integrity that Article 12 requires; token interception detection is a prerequisite for maintaining trustworthy audit trails."
29
+ strength: secondary
30
+ nist_ai_rmf:
31
+ - subcategory: "MP.5.1"
32
+ context: "OAuth token interception and malicious redirect_uri manipulation are adversarial input risks that exploit the agent's participation in authentication flows to capture credentials intended for legitimate services."
33
+ strength: primary
34
+ - subcategory: "GV.6.1"
35
+ context: "Data governance policies must restrict agent handling of bearer tokens, refresh tokens, and client secrets to prevent AI-assisted exfiltration of authorization credentials via forwarding or redirect manipulation."
36
+ strength: secondary
37
+ iso_42001:
38
+ - clause: "6.2"
39
+ context: "Risk treatment plans must address OAuth token interception scenarios by requiring agents to validate redirect URIs and prohibiting forwarding of authorization tokens to non-allowlisted destinations."
40
+ strength: primary
41
+ - clause: "8.4"
42
+ context: "Impact assessments must document the account-takeover blast radius of OAuth token theft through agent-mediated flows, covering all services that accept the stolen bearer token."
43
+ strength: secondary
22
44
  tags:
23
45
  category: context-exfiltration
24
46
  subcategory: oauth-token-abuse
@@ -20,6 +20,28 @@ references:
20
20
  - ASI03:2026 - Agent Identity and Access Abuse
21
21
  mitre_attack:
22
22
  - T1082 - System Information Discovery
23
+ compliance:
24
+ eu_ai_act:
25
+ - article: "15"
26
+ context: "Bulk environment variable harvesting exposes every secret in the agent runtime in a single operation, providing an attacker with complete access to all connected services; Article 15 cybersecurity requirements mandate that AI systems resist systematic environment enumeration attacks."
27
+ strength: primary
28
+ - article: "12"
29
+ context: "Exfiltrated environment variables typically include logging and monitoring credentials; Article 12 record-keeping integrity depends on protecting the secrets that secure audit infrastructure from compromise via environment harvesting."
30
+ strength: secondary
31
+ nist_ai_rmf:
32
+ - subcategory: "MP.5.1"
33
+ context: "Bulk environment variable harvesting via printenv, os.environ, or .env file reads is an adversarial technique that exploits agent runtime access to extract every secret in a single operation, and must be tracked as a critical AI system attack vector."
34
+ strength: primary
35
+ - subcategory: "GV.6.1"
36
+ context: "Data governance policies must explicitly prohibit agent tools from accessing the full process environment or .env files, as these sources aggregate all application secrets and database credentials into a single exfiltration target."
37
+ strength: secondary
38
+ iso_42001:
39
+ - clause: "6.2"
40
+ context: "Risk treatment activities must implement least-privilege environment access controls that prevent agent tools from reading bulk environment state and combining it with network exfiltration calls."
41
+ strength: primary
42
+ - clause: "8.4"
43
+ context: "Impact assessments must document the total credential exposure scope of a successful environment harvesting attack, as compromising the full environment grants access to every connected service simultaneously."
44
+ strength: secondary
23
45
  tags:
24
46
  category: context-exfiltration
25
47
  subcategory: env-var-harvesting
@@ -23,6 +23,28 @@ references:
23
23
  - AML.T0054
24
24
  safe_mcp:
25
25
  - SMCP-T012
26
+ compliance:
27
+ eu_ai_act:
28
+ - article: "15"
29
+ context: "Tool response piggybacking embeds credential theft or social engineering within legitimate-looking tool outputs, exploiting the agent's inherent trust in tool responses to exfiltrate sensitive data; Article 15 cybersecurity requirements mandate that AI systems validate tool output integrity and detect embedded exfiltration payloads."
30
+ strength: primary
31
+ - article: "13"
32
+ context: "Piggybacked data theft disguised as normal tool responses undermines the transparency Article 13 requires between AI system behavior and what users or operators understand to be occurring; users cannot exercise informed oversight when tool responses silently exfiltrate their data."
33
+ strength: secondary
34
+ nist_ai_rmf:
35
+ - subcategory: "MP.5.1"
36
+ context: "Tool response piggybacking is an adversarial input risk where legitimate-looking MCP outputs embed credential theft payloads; MP.5.1 requires that this class of adversarial tool-output attack be identified and tracked in the AI system risk inventory."
37
+ strength: primary
38
+ - subcategory: "GV.6.1"
39
+ context: "Piggybacked credential exfiltration in tool responses breaches data governance controls by transmitting SSH keys and API tokens to attacker endpoints; GV.6.1 data governance policies must cover tool output integrity as a data handling boundary."
40
+ strength: secondary
41
+ iso_42001:
42
+ - clause: "6.2"
43
+ context: "Risk treatment plans under clause 6.2 must include controls for tool response piggybacking attacks, where malicious MCP servers embed credential theft within normal-appearing tool result payloads to exploit agent trust."
44
+ strength: primary
45
+ - clause: "8.4"
46
+ context: "Impact assessment under clause 8.4 must document the tool-response piggybacking vector — where credential theft is hidden in legitimate tool outputs — as a high-impact data exfiltration pathway for AI systems using MCP tool exchanges."
47
+ strength: secondary
26
48
  tags:
27
49
  category: context-exfiltration
28
50
  subcategory: tool-response-piggyback