agent-threat-rules 2.0.14 → 2.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/rules/agent-manipulation/ATR-2026-00032-goal-hijacking.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00074-cross-agent-privilege-escalation.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00076-inter-agent-message-spoofing.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00077-human-trust-exploitation.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00108-consensus-sybil-attack.yaml +22 -0
- package/rules/agent-manipulation/ATR-2026-00116-a2a-message-validation.yaml +22 -0
- package/rules/agent-manipulation/ATR-2026-00117-agent-identity-spoofing.yaml +22 -0
- package/rules/agent-manipulation/ATR-2026-00119-social-engineering-via-agent.yaml +22 -0
- package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml +22 -0
- package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml +22 -0
- package/rules/agent-manipulation/ATR-2026-00164-skill-scope-hijack.yaml +22 -0
- package/rules/agent-manipulation/ATR-2026-00269-fitd-escalation.yaml +22 -0
- package/rules/agent-manipulation/ATR-2026-00271-grandma-roleplay-jailbreak.yaml +22 -0
- package/rules/agent-manipulation/ATR-2026-00273-dan-developer-mode-persona.yaml +22 -0
- package/rules/context-exfiltration/ATR-2026-00020-system-prompt-leak.yaml +39 -0
- package/rules/context-exfiltration/ATR-2026-00021-api-key-exposure.yaml +37 -0
- package/rules/context-exfiltration/ATR-2026-00075-agent-memory-manipulation.yaml +22 -0
- package/rules/context-exfiltration/ATR-2026-00102-disguised-analytics-exfiltration.yaml +22 -0
- package/rules/context-exfiltration/ATR-2026-00113-credential-theft.yaml +22 -0
- package/rules/context-exfiltration/ATR-2026-00114-oauth-token-abuse.yaml +22 -0
- package/rules/context-exfiltration/ATR-2026-00115-env-var-harvesting.yaml +22 -0
- package/rules/context-exfiltration/ATR-2026-00136-tool-response-data-piggyback.yaml +22 -0
- package/rules/context-exfiltration/ATR-2026-00212-mcp-atlassian-credential-leak.yaml +165 -0
- package/rules/context-exfiltration/ATR-2026-00261-markdown-image-exfiltration.yaml +22 -0
- package/rules/data-poisoning/ATR-2026-00070-data-poisoning.yaml +49 -0
- package/rules/excessive-autonomy/ATR-2026-00050-runaway-agent-loop.yaml +37 -0
- package/rules/excessive-autonomy/ATR-2026-00051-resource-exhaustion.yaml +24 -0
- package/rules/excessive-autonomy/ATR-2026-00052-cascading-failure.yaml +24 -0
- package/rules/excessive-autonomy/ATR-2026-00098-unauthorized-financial-action.yaml +31 -0
- package/rules/excessive-autonomy/ATR-2026-00099-high-risk-tool-gate.yaml +22 -0
- package/rules/model-abuse/ATR-2026-00279-harmful-completion-continuation.yaml +22 -0
- package/rules/model-abuse/ATR-2026-00281-eicar-gtube-malware-signature-request.yaml +22 -0
- package/rules/model-abuse/ATR-2026-00289-lmrc-harmful-content-elicitation.yaml +22 -0
- package/rules/model-abuse/ATR-2026-00292-self-harm-eating-disorder-facilitation.yaml +22 -0
- package/rules/model-abuse/ATR-2026-00298-malicious-use-illegal-activity-request.yaml +22 -0
- package/rules/model-abuse/ATR-2026-00299-harmbench-detailed-harmful-instruction.yaml +22 -0
- package/rules/model-security/ATR-2026-00072-model-behavior-extraction.yaml +24 -0
- package/rules/model-security/ATR-2026-00073-malicious-finetuning-data.yaml +24 -0
- package/rules/privilege-escalation/ATR-2026-00040-privilege-escalation.yaml +34 -0
- package/rules/privilege-escalation/ATR-2026-00041-scope-creep.yaml +24 -0
- package/rules/privilege-escalation/ATR-2026-00107-delayed-execution-bypass.yaml +22 -0
- package/rules/privilege-escalation/ATR-2026-00110-eval-injection.yaml +22 -0
- package/rules/privilege-escalation/ATR-2026-00111-shell-escape.yaml +22 -0
- package/rules/privilege-escalation/ATR-2026-00112-dynamic-import-exploitation.yaml +22 -0
- package/rules/privilege-escalation/ATR-2026-00143-casual-privilege-escalation.yaml +22 -0
- package/rules/prompt-injection/ATR-2026-00001-direct-prompt-injection.yaml +33 -0
- package/rules/prompt-injection/ATR-2026-00002-indirect-prompt-injection.yaml +36 -0
- package/rules/prompt-injection/ATR-2026-00003-jailbreak-attempt.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00060-skill-impersonation.yaml +37 -0
- package/rules/tool-poisoning/ATR-2026-00010-mcp-malicious-response.yaml +39 -0
- package/rules/tool-poisoning/ATR-2026-00209-mcpwn-runaway-invocation.yaml +162 -0
- package/rules/tool-poisoning/ATR-2026-00210-flowise-system-message-override.yaml +162 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-threat-rules",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.16",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Open detection standard -- like Sigma, but for AI agents. 311 rules for prompt injection, tool poisoning, context exfiltration, and MCP attacks. Shipped in Cisco AI Defense. 97.1% recall on NVIDIA garak.",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -26,6 +26,30 @@ references:
|
|
|
26
26
|
mitre_atlas:
|
|
27
27
|
- AML.T0051 - Prompt Injection
|
|
28
28
|
- AML.T0043 - Craft Adversarial Data
|
|
29
|
+
|
|
30
|
+
compliance:
|
|
31
|
+
eu_ai_act:
|
|
32
|
+
- article: "14"
|
|
33
|
+
context: "Goal hijacking redirects the agent away from its authorized objectives without human awareness, directly undermining the human oversight mechanisms Article 14 requires for high-risk AI systems."
|
|
34
|
+
strength: primary
|
|
35
|
+
- article: "15"
|
|
36
|
+
context: "Article 15 robustness requirements include resistance against adversarial redirection of agent objectives; this rule detects explicit and subtle goal-pivot patterns at runtime."
|
|
37
|
+
strength: secondary
|
|
38
|
+
nist_ai_rmf:
|
|
39
|
+
- subcategory: "GV.1.2"
|
|
40
|
+
context: "Goal hijacking attacks subvert the accountability roles assigned to AI agents by redirecting them from authorized objectives; GV.1.2 requires that defined accountability roles and their task boundaries are enforced at runtime."
|
|
41
|
+
strength: primary
|
|
42
|
+
- subcategory: "MG.2.3"
|
|
43
|
+
context: "Detecting goal hijacking patterns triggers the risk treatment plans required by MG.2.3 to contain adversarial task-redirection before the agent executes unauthorized actions."
|
|
44
|
+
strength: secondary
|
|
45
|
+
iso_42001:
|
|
46
|
+
- clause: "6.2"
|
|
47
|
+
context: "ISO 42001 clause 6.2 risk treatment plans must address task-redirection attacks; this rule implements the monitoring control that detects when an agent's objective is adversarially substituted."
|
|
48
|
+
strength: primary
|
|
49
|
+
- clause: "8.6"
|
|
50
|
+
context: "Clause 8.6 operational controls ensure AI systems operate within their authorized task boundaries; goal hijacking detection enforces that agents remain on their assigned objective despite redirection attempts."
|
|
51
|
+
strength: secondary
|
|
52
|
+
|
|
29
53
|
tags:
|
|
30
54
|
category: agent-manipulation
|
|
31
55
|
subcategory: goal-hijacking
|
|
@@ -25,6 +25,30 @@ references:
|
|
|
25
25
|
mitre_attack:
|
|
26
26
|
- T1078 - Valid Accounts
|
|
27
27
|
- T1550 - Use Alternate Authentication Material
|
|
28
|
+
|
|
29
|
+
compliance:
|
|
30
|
+
eu_ai_act:
|
|
31
|
+
- article: "14"
|
|
32
|
+
context: "Cross-agent privilege escalation allows one agent to acquire capabilities from another agent's authorization scope without human approval, directly bypassing the role-based access controls that Article 14 human oversight requires."
|
|
33
|
+
strength: primary
|
|
34
|
+
- article: "9"
|
|
35
|
+
context: "Multi-agent privilege escalation via trust relationships is a documented attack pattern in the AI system risk register; Article 9 requires monitoring controls for this lateral movement risk."
|
|
36
|
+
strength: secondary
|
|
37
|
+
nist_ai_rmf:
|
|
38
|
+
- subcategory: "GV.1.2"
|
|
39
|
+
context: "Cross-agent privilege escalation exploits gaps in the accountability roles defined for each agent in a multi-agent system; GV.1.2 requires that agent role boundaries are defined and that lateral movement across those boundaries is detected."
|
|
40
|
+
strength: primary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: "Credential forwarding and orchestrator bypass between agents require immediate risk treatment to prevent lateral movement from escalating to full system compromise; MG.2.3 mandates that these response plans are pre-defined and activated on detection."
|
|
43
|
+
strength: secondary
|
|
44
|
+
iso_42001:
|
|
45
|
+
- clause: "6.2"
|
|
46
|
+
context: "ISO 42001 clause 6.2 risk treatment activities must cover inter-agent privilege escalation as a multi-agent architecture risk; this rule implements the detection control for credential forwarding and unauthorized role assumption between agents."
|
|
47
|
+
strength: primary
|
|
48
|
+
- clause: "8.6"
|
|
49
|
+
context: "Clause 8.6 operational controls ensure each agent executes within its authorized capability set; cross-agent privilege escalation detection enforces that no agent can assume the permissions of a higher-privilege agent without explicit authorization."
|
|
50
|
+
strength: secondary
|
|
51
|
+
|
|
28
52
|
tags:
|
|
29
53
|
category: agent-manipulation
|
|
30
54
|
subcategory: cross-agent-privilege-escalation
|
|
@@ -27,6 +27,30 @@ references:
|
|
|
27
27
|
mitre_atlas:
|
|
28
28
|
- AML.T0051 - Prompt Injection
|
|
29
29
|
- AML.T0043 - Craft Adversarial Data
|
|
30
|
+
|
|
31
|
+
compliance:
|
|
32
|
+
eu_ai_act:
|
|
33
|
+
- article: "15"
|
|
34
|
+
context: "Article 15 cybersecurity requirements for high-risk AI include protecting agent communication channels against spoofing, replay, and message integrity attacks; this rule detects structural communication failures between agents."
|
|
35
|
+
strength: primary
|
|
36
|
+
- article: "14"
|
|
37
|
+
context: "Spoofed inter-agent messages can trigger unauthorized agent actions without any human-visible audit trail, undermining the traceability required by Article 14 human oversight provisions."
|
|
38
|
+
strength: secondary
|
|
39
|
+
nist_ai_rmf:
|
|
40
|
+
- subcategory: "MP.5.1"
|
|
41
|
+
context: "Spoofed, replayed, and unauthenticated inter-agent messages are adversarial inputs that exploit trust assumptions in multi-agent communication channels; MP.5.1 requires that these structural communication integrity attacks are identified and tracked."
|
|
42
|
+
strength: primary
|
|
43
|
+
- subcategory: "MG.2.3"
|
|
44
|
+
context: "Detection of message spoofing, replay attacks, and null authentication in agent channels triggers risk treatment plans to quarantine compromised messages before they cause unauthorized actions downstream; MG.2.3 requires these treatment processes are defined."
|
|
45
|
+
strength: secondary
|
|
46
|
+
iso_42001:
|
|
47
|
+
- clause: "6.2"
|
|
48
|
+
context: "ISO 42001 clause 6.2 risk treatment plans must address inter-agent communication integrity failures including message spoofing and replay; this rule implements the detection control for structural authentication weaknesses in agent-to-agent channels."
|
|
49
|
+
strength: primary
|
|
50
|
+
- clause: "8.6"
|
|
51
|
+
context: "Clause 8.6 operational controls require that inter-agent communication operates with verified message authentication; detection of null tokens, forged IDs, and unauthenticated commands enforces the integrity requirements for correct agent pipeline execution."
|
|
52
|
+
strength: secondary
|
|
53
|
+
|
|
30
54
|
tags:
|
|
31
55
|
category: agent-manipulation
|
|
32
56
|
subcategory: inter-agent-communication
|
|
@@ -33,6 +33,30 @@ references:
|
|
|
33
33
|
- ASI09:2026 - Human-Agent Trust Exploitation
|
|
34
34
|
mitre_atlas:
|
|
35
35
|
- AML.T0048 - Adversarial Prompt Techniques
|
|
36
|
+
|
|
37
|
+
compliance:
|
|
38
|
+
eu_ai_act:
|
|
39
|
+
- article: "13"
|
|
40
|
+
context: "Article 13 transparency obligations require that AI systems do not exploit informational asymmetries to manipulate human users; human-trust exploitation attacks weaponize the AI's perceived authority and credibility against its own users."
|
|
41
|
+
strength: primary
|
|
42
|
+
- article: "14"
|
|
43
|
+
context: "Trust exploitation techniques undermine the human's ability to critically evaluate agent outputs and decisions, eroding the informed oversight capability Article 14 requires to remain effective."
|
|
44
|
+
strength: secondary
|
|
45
|
+
nist_ai_rmf:
|
|
46
|
+
- subcategory: "GV.6.1"
|
|
47
|
+
context: "Human-agent trust exploitation weaponizes information asymmetry between the AI system and its operators; GV.6.1 data governance policies must ensure AI systems do not suppress or fabricate information to manipulate human decision-makers."
|
|
48
|
+
strength: primary
|
|
49
|
+
- subcategory: "MP.5.1"
|
|
50
|
+
context: "False certainty claims, manufactured urgency, and verification discouragement are adversarial input patterns targeting human operators rather than the AI system itself; MP.5.1 requires identifying and tracking these social-engineering attack vectors."
|
|
51
|
+
strength: secondary
|
|
52
|
+
iso_42001:
|
|
53
|
+
- clause: "8.4"
|
|
54
|
+
context: "ISO 42001 clause 8.4 requires documented AI system impact assessments; human-trust exploitation attacks that suppress warnings or manufacture urgency to bypass review represent a documented impact category requiring runtime detection controls."
|
|
55
|
+
strength: primary
|
|
56
|
+
- clause: "9.1"
|
|
57
|
+
context: "Clause 9.1 performance monitoring must evaluate whether AI systems support effective human oversight; detecting trust exploitation patterns provides the measurement signal that indicates the AI management system's oversight controls are being actively undermined."
|
|
58
|
+
strength: secondary
|
|
59
|
+
|
|
36
60
|
tags:
|
|
37
61
|
category: agent-manipulation
|
|
38
62
|
subcategory: human-trust-exploitation
|
|
@@ -24,6 +24,28 @@ references:
|
|
|
24
24
|
- ASI01:2026 - Agent Goal Hijack
|
|
25
25
|
mitre_atlas:
|
|
26
26
|
- AML.T0043 - Craft Adversarial Data
|
|
27
|
+
compliance:
|
|
28
|
+
eu_ai_act:
|
|
29
|
+
- article: "14"
|
|
30
|
+
context: "Consensus Sybil attacks allow compromised agents to manufacture false agreement in multi-agent decision systems, directly undermining the human oversight capability Article 14 requires for high-risk AI applications."
|
|
31
|
+
strength: primary
|
|
32
|
+
- article: "15"
|
|
33
|
+
context: "Article 15 robustness requirements mandate that high-risk AI systems resist adversarial manipulation; Sybil attacks on consensus mechanisms are a documented adversarial pattern requiring systematic detection."
|
|
34
|
+
strength: secondary
|
|
35
|
+
nist_ai_rmf:
|
|
36
|
+
- subcategory: "GV.1.2"
|
|
37
|
+
context: "Consensus Sybil attacks exploit undefined accountability for agent identity in multi-agent systems; GV.1.2 requires that accountability roles for AI risk management are defined and assigned to prevent Sybil-style identity fabrication."
|
|
38
|
+
strength: primary
|
|
39
|
+
- subcategory: "MG.2.3"
|
|
40
|
+
context: "Sybil attacks represent a documented risk treatment gap in multi-agent consensus deployments; MG.2.3 requires that risk treatment plans are implemented for identified AI risks, including detection controls for fake-identity flooding."
|
|
41
|
+
strength: secondary
|
|
42
|
+
iso_42001:
|
|
43
|
+
- clause: "6.2"
|
|
44
|
+
context: "Multi-agent systems deploying consensus mechanisms must include Sybil attack detection as a planned risk treatment activity under the AI objectives framework required by clause 6.2."
|
|
45
|
+
strength: primary
|
|
46
|
+
- clause: "8.6"
|
|
47
|
+
context: "Clause 8.6 operational controls must ensure that consensus decisions are made by verified agent identities and that fabricated voter identities are detected before they influence multi-agent outcomes."
|
|
48
|
+
strength: secondary
|
|
27
49
|
tags:
|
|
28
50
|
category: agent-manipulation
|
|
29
51
|
subcategory: consensus-sybil-attack
|
|
@@ -20,6 +20,28 @@ references:
|
|
|
20
20
|
- ASI07:2026 - Insecure Inter-Agent Communication
|
|
21
21
|
mitre_attack:
|
|
22
22
|
- T1557 - Adversary-in-the-Middle
|
|
23
|
+
compliance:
|
|
24
|
+
eu_ai_act:
|
|
25
|
+
- article: "15"
|
|
26
|
+
context: "Malicious agent-to-agent message injection exploits unvalidated inter-agent channels to execute unauthorized actions; Article 15 cybersecurity requirements mandate detection and resistance against such adversarial input manipulation."
|
|
27
|
+
strength: primary
|
|
28
|
+
- article: "14"
|
|
29
|
+
context: "A2A message injection can cause agents to take actions outside their authorized scope without human awareness, eroding the effective oversight capability Article 14 requires for high-risk AI systems."
|
|
30
|
+
strength: secondary
|
|
31
|
+
nist_ai_rmf:
|
|
32
|
+
- subcategory: "MP.5.1"
|
|
33
|
+
context: "Malicious A2A message injection is a documented adversarial input risk targeting inter-agent communication channels; MP.5.1 requires that adversarial input risks to AI systems are identified and tracked to enable detection of embedded payload attacks."
|
|
34
|
+
strength: primary
|
|
35
|
+
- subcategory: "MG.2.3"
|
|
36
|
+
context: "A2A message validation failures represent a risk requiring active treatment; MG.2.3 requires that risk treatment plans are implemented for identified AI risks, including validation controls on all inter-agent message channels."
|
|
37
|
+
strength: secondary
|
|
38
|
+
iso_42001:
|
|
39
|
+
- clause: "6.2"
|
|
40
|
+
context: "AI system plans under clause 6.2 must include risk treatment activities for inter-agent message injection, ensuring that A2A communication validation is a planned control rather than an afterthought."
|
|
41
|
+
strength: primary
|
|
42
|
+
- clause: "8.6"
|
|
43
|
+
context: "Clause 8.6 operational controls require that inter-agent messages are validated before execution, preventing injected instructions from executing in the receiving agent's security context."
|
|
44
|
+
strength: secondary
|
|
23
45
|
tags:
|
|
24
46
|
category: agent-manipulation
|
|
25
47
|
subcategory: a2a-message-validation
|
|
@@ -20,6 +20,28 @@ references:
|
|
|
20
20
|
- ASI10:2026 - Rogue Agents
|
|
21
21
|
mitre_attack:
|
|
22
22
|
- T1036 - Masquerading
|
|
23
|
+
compliance:
|
|
24
|
+
eu_ai_act:
|
|
25
|
+
- article: "13"
|
|
26
|
+
context: "Agent identity spoofing creates false impressions of authority and trustworthiness that users and other agents cannot distinguish from legitimate communications; Article 13 transparency obligations require that AI system identity and authority claims be verifiable."
|
|
27
|
+
strength: primary
|
|
28
|
+
- article: "15"
|
|
29
|
+
context: "Article 15 cybersecurity requirements include protection against masquerading attacks; identity spoofing in multi-agent systems represents a documented adversarial pattern targeting the authentication layer of agent architectures."
|
|
30
|
+
strength: secondary
|
|
31
|
+
nist_ai_rmf:
|
|
32
|
+
- subcategory: "GV.6.1"
|
|
33
|
+
context: "Agent identity spoofing exploits the absence of verified identity provenance in inter-agent data flows; GV.6.1 data governance policies must define how agent identity claims are authenticated to prevent masquerading attacks."
|
|
34
|
+
strength: primary
|
|
35
|
+
- subcategory: "MP.5.1"
|
|
36
|
+
context: "Impersonation of admin or supervisor agent roles is an adversarial input risk that must be tracked under MP.5.1 to ensure detection controls cover identity-claim-based privilege escalation patterns."
|
|
37
|
+
strength: secondary
|
|
38
|
+
iso_42001:
|
|
39
|
+
- clause: "8.4"
|
|
40
|
+
context: "Clause 8.4 AI system impact assessments must document the risk that unverified agent identity claims allow privilege escalation, and review controls that ensure identity spoofing is detectable before actions are taken."
|
|
41
|
+
strength: primary
|
|
42
|
+
- clause: "9.1"
|
|
43
|
+
context: "Clause 9.1 performance monitoring must include evaluation of whether agent identity verification controls are functioning correctly and catching masquerading attacks in operational deployments."
|
|
44
|
+
strength: secondary
|
|
23
45
|
tags:
|
|
24
46
|
category: agent-manipulation
|
|
25
47
|
subcategory: agent-identity-spoofing
|
|
@@ -19,6 +19,28 @@ references:
|
|
|
19
19
|
- ASI09:2026 - Human Trust Exploitation
|
|
20
20
|
mitre_attack:
|
|
21
21
|
- T1566 - Phishing
|
|
22
|
+
compliance:
|
|
23
|
+
eu_ai_act:
|
|
24
|
+
- article: "13"
|
|
25
|
+
context: "Social engineering delivered through agent output exploits the elevated trust users place in AI-generated content; Article 13 transparency obligations require that AI systems not be used as vectors for deception or manipulation against the users they serve."
|
|
26
|
+
strength: primary
|
|
27
|
+
- article: "14"
|
|
28
|
+
context: "Agents weaponized for social engineering undermine the informed human judgment that Article 14 oversight depends on; users manipulated through trusted AI channels cannot exercise effective oversight of subsequent decisions."
|
|
29
|
+
strength: secondary
|
|
30
|
+
nist_ai_rmf:
|
|
31
|
+
- subcategory: "GV.6.1"
|
|
32
|
+
context: "Social engineering delivered via agent output exploits user trust in AI-generated content to harvest credentials and personal data; GV.6.1 data governance policies must address how AI-generated communications are authenticated to prevent agent-mediated phishing."
|
|
33
|
+
strength: primary
|
|
34
|
+
- subcategory: "MP.5.1"
|
|
35
|
+
context: "Using agents as social engineering vectors is an adversarial input risk where poisoned context produces manipulative outputs; MP.5.1 requires this risk to be identified and tracked so that urgency and authority-impersonation patterns in agent output are monitored."
|
|
36
|
+
strength: secondary
|
|
37
|
+
iso_42001:
|
|
38
|
+
- clause: "8.4"
|
|
39
|
+
context: "Clause 8.4 impact assessments must document the elevated trust users place in AI-generated content and evaluate whether social engineering via agent output constitutes a significant harm requiring dedicated detection controls."
|
|
40
|
+
strength: primary
|
|
41
|
+
- clause: "9.1"
|
|
42
|
+
context: "Clause 9.1 performance monitoring must evaluate whether agent output monitoring detects social engineering patterns such as urgency language and credential-harvesting requests generated through poisoned agent context."
|
|
43
|
+
strength: secondary
|
|
22
44
|
tags:
|
|
23
45
|
category: agent-manipulation
|
|
24
46
|
subcategory: social-engineering-via-agent
|
|
@@ -26,6 +26,28 @@ references:
|
|
|
26
26
|
- ASI09:2026 - Human-Agent Trust Exploitation
|
|
27
27
|
metadata_provenance:
|
|
28
28
|
mitre_atlas: auto-generated
|
|
29
|
+
compliance:
|
|
30
|
+
eu_ai_act:
|
|
31
|
+
- article: "14"
|
|
32
|
+
context: "Casual authority claims using conversational tone bypass detection while achieving the same effect as explicit privilege escalation — unauthorized actions without human approval; Article 14 oversight requirements apply regardless of the linguistic register used to circumvent them."
|
|
33
|
+
strength: primary
|
|
34
|
+
- article: "9"
|
|
35
|
+
context: "Rationalized bypasses and casual scope escalation are documented attack patterns that Article 9 risk management systems must account for; detection of informal social engineering is a required risk treatment for multi-agent deployments."
|
|
36
|
+
strength: secondary
|
|
37
|
+
nist_ai_rmf:
|
|
38
|
+
- subcategory: "GV.1.2"
|
|
39
|
+
context: "Casual authority escalation exploits gaps in accountability role definitions, allowing informal authority claims like 'the boss said' to bypass controls without challenge; GV.1.2 requires that accountability roles are formally assigned so that unverifiable casual authority claims are detectable."
|
|
40
|
+
strength: primary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: "Conversational-tone scope escalation is a documented risk requiring active treatment; MG.2.3 requires that risk treatment plans address informal social engineering techniques that evade formal injection detection in multi-agent deployments."
|
|
43
|
+
strength: secondary
|
|
44
|
+
iso_42001:
|
|
45
|
+
- clause: "6.2"
|
|
46
|
+
context: "Clause 6.2 AI objectives and plans must include risk treatment for casual authority claim patterns that bypass formal injection detection, as these represent a distinct attack surface from explicit privilege escalation."
|
|
47
|
+
strength: primary
|
|
48
|
+
- clause: "8.6"
|
|
49
|
+
context: "Clause 8.6 operational controls must ensure that agents require verified authorization before acting on scope expansions or control bypasses, regardless of whether the instruction is phrased formally or in casual conversational language."
|
|
50
|
+
strength: secondary
|
|
29
51
|
tags:
|
|
30
52
|
category: agent-manipulation
|
|
31
53
|
subcategory: casual-authority
|
|
@@ -21,6 +21,28 @@ references:
|
|
|
21
21
|
- ASI06 - Rogue Actions
|
|
22
22
|
metadata_provenance:
|
|
23
23
|
mitre_atlas: auto-generated
|
|
24
|
+
compliance:
|
|
25
|
+
eu_ai_act:
|
|
26
|
+
- article: "14"
|
|
27
|
+
context: "Casual authority data redirect attacks redirect agent output to attacker-controlled endpoints by spoofing organizational authority, bypassing human review of where sensitive data flows; Article 14 oversight requires that data redirection decisions remain under human control."
|
|
28
|
+
strength: primary
|
|
29
|
+
- article: "15"
|
|
30
|
+
context: "Article 15 cybersecurity requirements include protection against social engineering attacks targeting agent output pipelines; casual authority redirect is a documented adversarial technique exploiting agents as exfiltration proxies."
|
|
31
|
+
strength: secondary
|
|
32
|
+
nist_ai_rmf:
|
|
33
|
+
- subcategory: "GV.1.2"
|
|
34
|
+
context: "Casual authority redirect attacks succeed when no formal accountability role exists to verify output redirection decisions; GV.1.2 requires that accountability roles for AI risk management are defined so that informal directives to redirect agent output can be challenged and blocked."
|
|
35
|
+
strength: primary
|
|
36
|
+
- subcategory: "MG.2.3"
|
|
37
|
+
context: "Data redirect via social engineering authority claims is a documented exfiltration risk requiring active treatment; MG.2.3 requires that risk treatment plans include detection controls for authority-impersonation-based output redirection attacks."
|
|
38
|
+
strength: secondary
|
|
39
|
+
iso_42001:
|
|
40
|
+
- clause: "6.2"
|
|
41
|
+
context: "Clause 6.2 AI objectives and plans must include controls for preventing agents from being redirected to attacker-controlled endpoints through casual authority claims that spoof organizational hierarchy."
|
|
42
|
+
strength: primary
|
|
43
|
+
- clause: "8.6"
|
|
44
|
+
context: "Clause 8.6 operational controls must ensure that agent output destinations are validated against authorized endpoints and that casual authority directives to disable safety filters or redirect data are blocked before execution."
|
|
45
|
+
strength: secondary
|
|
24
46
|
tags:
|
|
25
47
|
category: agent-manipulation
|
|
26
48
|
subcategory: casual-authority-redirect
|
|
@@ -17,6 +17,28 @@ references:
|
|
|
17
17
|
- 'LLM06:2025 - Excessive Agency'
|
|
18
18
|
owasp_agentic:
|
|
19
19
|
- 'ASI03:2026 - Cross-Agent Escalation'
|
|
20
|
+
compliance:
|
|
21
|
+
eu_ai_act:
|
|
22
|
+
- article: "14"
|
|
23
|
+
context: "Skill scope hijacking embeds privilege escalation instructions within SKILL.md files to expand agent capabilities beyond authorized boundaries without human knowledge; Article 14 requires that agents operate within sanctioned scopes visible to human overseers."
|
|
24
|
+
strength: primary
|
|
25
|
+
- article: "15"
|
|
26
|
+
context: "Article 15 cybersecurity requirements mandate protection against supply chain attacks; malicious SKILL.md files represent a documented technique for injecting unauthorized capability expansion at the skill-definition layer."
|
|
27
|
+
strength: secondary
|
|
28
|
+
nist_ai_rmf:
|
|
29
|
+
- subcategory: "GV.1.2"
|
|
30
|
+
context: "Skill scope hijacking succeeds when no accountability role governs what capabilities a skill may claim; GV.1.2 requires that accountability roles for AI risk management are defined and assigned, ensuring that SKILL.md capability claims are reviewed against authorized scope boundaries."
|
|
31
|
+
strength: primary
|
|
32
|
+
- subcategory: "MP.2.3"
|
|
33
|
+
context: "Malicious SKILL.md files in external skill repositories represent an AI supply chain risk source; MP.2.3 requires that AI supply chain risk sources are identified and assessed, covering the skill-definition layer as a vector for privilege escalation injection."
|
|
34
|
+
strength: secondary
|
|
35
|
+
iso_42001:
|
|
36
|
+
- clause: "6.2"
|
|
37
|
+
context: "Clause 6.2 AI objectives and plans must include risk treatment for skill-definition-layer attacks where SKILL.md instructions expand agent scope beyond authorized boundaries without human knowledge."
|
|
38
|
+
strength: primary
|
|
39
|
+
- clause: "8.5"
|
|
40
|
+
context: "Clause 8.5 controls over externally-provided AI components must include validation of SKILL.md files to ensure that skill definitions do not embed cross-agent escalation or unauthorized scope expansion instructions."
|
|
41
|
+
strength: secondary
|
|
20
42
|
tags:
|
|
21
43
|
category: agent-manipulation
|
|
22
44
|
subcategory: skill-scope-hijack
|
|
@@ -26,6 +26,28 @@ references:
|
|
|
26
26
|
research:
|
|
27
27
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/continuation.py"
|
|
28
28
|
- "https://jailbreaking-llms.github.io/"
|
|
29
|
+
compliance:
|
|
30
|
+
eu_ai_act:
|
|
31
|
+
- article: "15"
|
|
32
|
+
context: "Foot-in-the-door escalation exploits LLM consistency bias to incrementally extract harmful outputs that would be refused if requested directly; Article 15 robustness requirements demand resistance against systematic adversarial probing techniques targeting model behavioral constraints."
|
|
33
|
+
strength: primary
|
|
34
|
+
- article: "9"
|
|
35
|
+
context: "FITD escalation is a documented multi-step adversarial pattern; Article 9 risk management systems must include detection controls for incremental jailbreak techniques that bypass single-turn safety evaluation."
|
|
36
|
+
strength: secondary
|
|
37
|
+
nist_ai_rmf:
|
|
38
|
+
- subcategory: "MP.5.1"
|
|
39
|
+
context: "Foot-in-the-door escalation is an adversarial input technique that exploits LLM consistency bias to incrementally bypass safety constraints across multiple turns; MP.5.1 requires that such multi-step adversarial input risks are identified and tracked to enable cross-turn detection."
|
|
40
|
+
strength: primary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: "FITD escalation represents an identified risk requiring active treatment through detection controls that monitor prior-compliance anchors and escalation signals across conversation history; MG.2.3 requires such risk treatment plans are implemented."
|
|
43
|
+
strength: secondary
|
|
44
|
+
iso_42001:
|
|
45
|
+
- clause: "6.2"
|
|
46
|
+
context: "Clause 6.2 AI objectives and plans must include risk treatment for multi-step jailbreak techniques like FITD escalation that exploit model consistency bias to extract harmful outputs that would be refused if requested directly."
|
|
47
|
+
strength: primary
|
|
48
|
+
- clause: "8.6"
|
|
49
|
+
context: "Clause 8.6 operational controls must ensure that agents evaluate escalating requests in context of prior conversation turns rather than in isolation, preventing foot-in-the-door compliance anchoring from lowering refusal thresholds."
|
|
50
|
+
strength: secondary
|
|
29
51
|
tags:
|
|
30
52
|
category: agent-manipulation
|
|
31
53
|
subcategory: fitd-escalation
|
|
@@ -26,6 +26,28 @@ references:
|
|
|
26
26
|
research:
|
|
27
27
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/grandma.py"
|
|
28
28
|
- "https://www.independent.co.uk/tech/chatgpt-microsoft-windows-11-grandma-exploit-b2360213.html"
|
|
29
|
+
compliance:
|
|
30
|
+
eu_ai_act:
|
|
31
|
+
- article: "15"
|
|
32
|
+
context: "Grandma roleplay jailbreaks use emotional manipulation through grief and nostalgia framing to circumvent safety constraints; Article 15 robustness requirements include resistance against social engineering techniques that exploit psychological vulnerabilities in human-AI interaction."
|
|
33
|
+
strength: primary
|
|
34
|
+
- article: "9"
|
|
35
|
+
context: "Persona-based jailbreaks exploiting emotional framing are documented high-risk attack patterns; Article 9 risk management requires systematic controls for emotional manipulation techniques used to extract harmful content from high-risk AI systems."
|
|
36
|
+
strength: secondary
|
|
37
|
+
nist_ai_rmf:
|
|
38
|
+
- subcategory: "MP.5.1"
|
|
39
|
+
context: "Grandma roleplay jailbreaks are adversarial inputs that exploit grief and nostalgia emotional framing to bypass safety constraints; MP.5.1 requires that this class of psychological manipulation risk is identified and tracked as a distinct adversarial input vector."
|
|
40
|
+
strength: primary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: "Emotional-framing persona jailbreaks represent a known risk requiring active treatment through detection controls for deceased-relative roleplay patterns; MG.2.3 requires that risk treatment plans are implemented for identified AI risks including this garak-documented attack family."
|
|
43
|
+
strength: secondary
|
|
44
|
+
iso_42001:
|
|
45
|
+
- clause: "6.2"
|
|
46
|
+
context: "Clause 6.2 AI objectives and plans must include risk treatment for emotional manipulation techniques such as grandma roleplay that lower the model's refusal threshold by exploiting grief and nostalgia framing."
|
|
47
|
+
strength: primary
|
|
48
|
+
- clause: "8.6"
|
|
49
|
+
context: "Clause 8.6 operational controls must ensure that harmful procedural content is not generated regardless of the emotional or persona framing used in the request, blocking grandma roleplay jailbreak patterns before harmful content is produced."
|
|
50
|
+
strength: secondary
|
|
29
51
|
tags:
|
|
30
52
|
category: agent-manipulation
|
|
31
53
|
subcategory: grandma-roleplay-jailbreak
|
|
@@ -27,6 +27,28 @@ references:
|
|
|
27
27
|
research:
|
|
28
28
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/dan.py"
|
|
29
29
|
- "https://github.com/NVIDIA/garak/blob/main/garak/data/dan/"
|
|
30
|
+
compliance:
|
|
31
|
+
eu_ai_act:
|
|
32
|
+
- article: "15"
|
|
33
|
+
context: "DAN and Developer Mode persona jailbreaks attempt to replace the model's safety-trained identity with an unrestricted persona; Article 15 robustness and cybersecurity requirements mandate that high-risk AI systems resist identity replacement attacks that disable safety constraints."
|
|
34
|
+
strength: primary
|
|
35
|
+
- article: "9"
|
|
36
|
+
context: "Named persona jailbreaks are a well-documented attack family with widespread deployment in the wild; Article 9 risk management systems for high-risk AI must include detection controls for this established technique for bypassing content restrictions."
|
|
37
|
+
strength: secondary
|
|
38
|
+
nist_ai_rmf:
|
|
39
|
+
- subcategory: "MP.5.1"
|
|
40
|
+
context: "DAN, DUDE, and Developer Mode persona jailbreaks are adversarial inputs that attempt to replace the model's safety-trained identity with an unrestricted persona; MP.5.1 requires that this widely-deployed adversarial input risk is identified and tracked across model deployments."
|
|
41
|
+
strength: primary
|
|
42
|
+
- subcategory: "MG.2.3"
|
|
43
|
+
context: "Named persona jailbreak families represent a documented risk requiring active treatment through pattern-based detection of DAN/DUDE/STAN identity-replacement prompts; MG.2.3 requires that risk treatment plans are implemented for this established and widely-distributed attack technique."
|
|
44
|
+
strength: secondary
|
|
45
|
+
iso_42001:
|
|
46
|
+
- clause: "6.2"
|
|
47
|
+
context: "Clause 6.2 AI objectives and plans must include risk treatment for DAN and Developer Mode persona replacement attacks, which represent one of the most widely-deployed jailbreak families targeting AI system safety constraints."
|
|
48
|
+
strength: primary
|
|
49
|
+
- clause: "8.6"
|
|
50
|
+
context: "Clause 8.6 operational controls must ensure that the AI system's safety identity cannot be replaced by named unrestricted personas such as DAN or DUDE, blocking persona-replacement jailbreaks before they disable safety constraints."
|
|
51
|
+
strength: secondary
|
|
30
52
|
tags:
|
|
31
53
|
category: agent-manipulation
|
|
32
54
|
subcategory: dan-developer-mode-persona
|
|
@@ -32,6 +32,45 @@ references:
|
|
|
32
32
|
- "AML.T0056 - LLM Meta Prompt Extraction"
|
|
33
33
|
- "AML.T0051 - LLM Prompt Injection"
|
|
34
34
|
|
|
35
|
+
compliance:
|
|
36
|
+
owasp_agentic:
|
|
37
|
+
- id: ASI01:2026
|
|
38
|
+
context: "Revealing system prompt content enables adversaries to map agent constraints and craft targeted bypass attacks, directly enabling future Agent Goal Hijack attacks classified under ASI01."
|
|
39
|
+
strength: primary
|
|
40
|
+
- id: ASI06:2026
|
|
41
|
+
context: "System prompt leakage exposes the agent's memory and context configuration to adversaries, falling under ASI06 Memory and Context Poisoning as an information disclosure precursor."
|
|
42
|
+
strength: secondary
|
|
43
|
+
owasp_llm:
|
|
44
|
+
- id: LLM07:2025
|
|
45
|
+
context: "This rule is the primary detection implementation for LLM07:2025 System Prompt Leakage, covering direct quotation, instruction paraphrasing, guardrail revelation, and config exposure in agent output."
|
|
46
|
+
strength: primary
|
|
47
|
+
- id: LLM02:2025
|
|
48
|
+
context: "System prompt content constitutes sensitive operational information; its disclosure in agent output also satisfies the LLM02:2025 Sensitive Information Disclosure category."
|
|
49
|
+
strength: secondary
|
|
50
|
+
eu_ai_act:
|
|
51
|
+
- article: "13"
|
|
52
|
+
context: "While transparency is required toward users, internal guardrail and system-prompt disclosure to adversaries violates Article 13 by exposing configurations that undermine safe operation of the AI system."
|
|
53
|
+
strength: primary
|
|
54
|
+
- article: "12"
|
|
55
|
+
context: "Detection events generated by this rule serve as Article 12 automatic log records of system-prompt leakage incidents, enabling post-incident review by deployers."
|
|
56
|
+
strength: secondary
|
|
57
|
+
nist_ai_rmf:
|
|
58
|
+
- function: Govern
|
|
59
|
+
subcategory: GV.6.1
|
|
60
|
+
context: "Organizational policies for AI data and privacy (GV.6.1) must cover protection of internal operational configurations; this rule enforces that policy at runtime."
|
|
61
|
+
strength: primary
|
|
62
|
+
- function: Measure
|
|
63
|
+
subcategory: MS.2.5
|
|
64
|
+
context: "Measuring the effectiveness of data protection controls for AI systems (MS.2.5) requires detection of unauthorized disclosure of system-level configurations."
|
|
65
|
+
strength: secondary
|
|
66
|
+
iso_42001:
|
|
67
|
+
- clause: "8.4"
|
|
68
|
+
context: "Clause 8.4 AI system impact assessment identifies confidential operational parameters as assets requiring protection; this rule detects when those assets are disclosed in output."
|
|
69
|
+
strength: primary
|
|
70
|
+
- clause: "9.1"
|
|
71
|
+
context: "Clause 9.1 monitoring and evaluation requires measuring AI system behavior against expected norms; system prompt disclosure events are anomalies that must be measured and reported."
|
|
72
|
+
strength: secondary
|
|
73
|
+
|
|
35
74
|
tags:
|
|
36
75
|
category: context-exfiltration
|
|
37
76
|
subcategory: system-prompt-leak
|
|
@@ -29,6 +29,43 @@ references:
|
|
|
29
29
|
- "AML.T0057 - LLM Data Leakage"
|
|
30
30
|
- "AML.T0055 - Unsecured Credentials"
|
|
31
31
|
|
|
32
|
+
compliance:
|
|
33
|
+
owasp_agentic:
|
|
34
|
+
- id: ASI03:2026
|
|
35
|
+
context: "Credential exposure in agent output enables adversaries to abuse the identity and privileges associated with leaked keys, directly enabling unauthorized resource access classified under ASI03."
|
|
36
|
+
strength: primary
|
|
37
|
+
owasp_llm:
|
|
38
|
+
- id: LLM02:2025
|
|
39
|
+
context: "API keys, tokens, private keys, and database credentials are sensitive information whose disclosure in agent output is the canonical LLM02:2025 Sensitive Information Disclosure scenario."
|
|
40
|
+
strength: primary
|
|
41
|
+
- id: LLM07:2025
|
|
42
|
+
context: "Credentials embedded in or derived from system prompts that appear in agent output constitute a LLM07:2025 System Prompt Leakage incident with direct security impact."
|
|
43
|
+
strength: secondary
|
|
44
|
+
eu_ai_act:
|
|
45
|
+
- article: "12"
|
|
46
|
+
section: "1"
|
|
47
|
+
context: "Article 12 requires automatic logging of events; a credential exposure incident is a high-severity event that must be automatically recorded and reported to the deployer for incident investigation."
|
|
48
|
+
strength: primary
|
|
49
|
+
- article: "9"
|
|
50
|
+
context: "Credential leakage is a documented risk class in the AI system risk management plan; Article 9 requires that monitoring controls are implemented to detect and respond to such events."
|
|
51
|
+
strength: secondary
|
|
52
|
+
nist_ai_rmf:
|
|
53
|
+
- function: Govern
|
|
54
|
+
subcategory: GV.6.1
|
|
55
|
+
context: "GV.6.1 requires organizational policies governing sensitive data and credentials in AI systems; this rule enforces those policies by detecting credential exposure at runtime."
|
|
56
|
+
strength: primary
|
|
57
|
+
- function: Manage
|
|
58
|
+
subcategory: MG.3.1
|
|
59
|
+
context: "Credential exposure requires an immediate risk treatment response; this detection rule is the technical implementation of the risk treatment plan for credential leakage events."
|
|
60
|
+
strength: secondary
|
|
61
|
+
iso_42001:
|
|
62
|
+
- clause: "8.4"
|
|
63
|
+
context: "Clause 8.4 AI system impact assessment must identify credential leakage as a high-severity impact scenario; this rule generates the detection evidence needed for audit and impact reporting."
|
|
64
|
+
strength: primary
|
|
65
|
+
- clause: "6.2"
|
|
66
|
+
context: "Protecting credentials from exposure is an explicit AIMS information security objective under clause 6.2; detection of leakage events measures whether this objective is being achieved."
|
|
67
|
+
strength: secondary
|
|
68
|
+
|
|
32
69
|
tags:
|
|
33
70
|
category: context-exfiltration
|
|
34
71
|
subcategory: credential-exposure
|
|
@@ -23,6 +23,28 @@ references:
|
|
|
23
23
|
- AML.T0051 - LLM Prompt Injection
|
|
24
24
|
mitre_attack:
|
|
25
25
|
- "T1565.001 - Data Manipulation: Stored Data Manipulation"
|
|
26
|
+
compliance:
|
|
27
|
+
eu_ai_act:
|
|
28
|
+
- article: "14"
|
|
29
|
+
context: "Agent memory poisoning corrupts persistent context across sessions, allowing attackers to establish behavioral backdoors that survive individual-turn oversight checks; Article 14 requires that human oversight mechanisms remain effective across the full lifecycle of agent interactions, including persistent state."
|
|
30
|
+
strength: primary
|
|
31
|
+
- article: "15"
|
|
32
|
+
context: "Article 15 robustness requirements mandate resistance against adversarial inputs that modify system behavior; memory injection attacks that alter an agent's long-term behavioral state represent a critical robustness failure requiring detection controls."
|
|
33
|
+
strength: secondary
|
|
34
|
+
nist_ai_rmf:
|
|
35
|
+
- subcategory: "GV.1.2"
|
|
36
|
+
context: "Memory manipulation attacks that inject false behavioral directives require clearly assigned accountability roles to ensure that persistent agent state is monitored and that any unauthorized modification is escalated to a responsible party."
|
|
37
|
+
strength: primary
|
|
38
|
+
- subcategory: "MG.2.3"
|
|
39
|
+
context: "Risk treatment plans for memory poisoning must include controls that detect and block cross-session behavioral injection before poisoned directives propagate into future interactions."
|
|
40
|
+
strength: secondary
|
|
41
|
+
iso_42001:
|
|
42
|
+
- clause: "6.2"
|
|
43
|
+
context: "AI objectives and risk treatment plans must explicitly address memory manipulation threats to prevent attackers from establishing persistent behavioral backdoors that survive context resets."
|
|
44
|
+
strength: primary
|
|
45
|
+
- clause: "8.6"
|
|
46
|
+
context: "Operational controls for AI systems must include integrity checks on persistent memory stores to ensure that agent behavioral state has not been tampered with between sessions."
|
|
47
|
+
strength: secondary
|
|
26
48
|
tags:
|
|
27
49
|
category: context-exfiltration
|
|
28
50
|
subcategory: memory-manipulation
|
|
@@ -24,6 +24,28 @@ references:
|
|
|
24
24
|
- ASI06:2026 - Insufficient Access Controls
|
|
25
25
|
metadata_provenance:
|
|
26
26
|
mitre_atlas: auto-generated
|
|
27
|
+
compliance:
|
|
28
|
+
eu_ai_act:
|
|
29
|
+
- article: "13"
|
|
30
|
+
context: "Disguised analytics exfiltration exploits the assumption that telemetry collection is harmless to covertly transmit full conversation context to attacker-controlled endpoints; Article 13 transparency obligations require that AI systems and their tools disclose all data collection practices clearly to users."
|
|
31
|
+
strength: primary
|
|
32
|
+
- article: "15"
|
|
33
|
+
context: "Article 15 cybersecurity requirements include protection against tools that misrepresent their data handling to facilitate unauthorized data extraction; detection of analytics-disguised exfiltration is a required control for high-risk AI deployments."
|
|
34
|
+
strength: secondary
|
|
35
|
+
nist_ai_rmf:
|
|
36
|
+
- subcategory: "GV.6.1"
|
|
37
|
+
context: "Disguised analytics exfiltration violates data governance boundaries by covertly transmitting full conversation context under the cover of routine telemetry, bypassing the data handling policies that GV.6.1 requires AI systems to enforce."
|
|
38
|
+
strength: primary
|
|
39
|
+
- subcategory: "MS.2.5"
|
|
40
|
+
context: "Detection of analytics-disguised data exfiltration is a data protection control measurement required under MS.2.5 to verify that AI system tool invocations do not silently leak user conversation data to external endpoints."
|
|
41
|
+
strength: secondary
|
|
42
|
+
iso_42001:
|
|
43
|
+
- clause: "8.4"
|
|
44
|
+
context: "Disguised analytics exfiltration requires a documented AI system impact assessment under clause 8.4 that identifies tools claiming benign analytics roles as a covert data exfiltration vector affecting user privacy."
|
|
45
|
+
strength: primary
|
|
46
|
+
- clause: "9.1"
|
|
47
|
+
context: "Ongoing monitoring and evaluation under clause 9.1 must include checks that tool-level data transmission claims are consistent with stated telemetry policies, detecting analytics-disguised exfiltration during operational review."
|
|
48
|
+
strength: secondary
|
|
27
49
|
tags:
|
|
28
50
|
category: context-exfiltration
|
|
29
51
|
subcategory: disguised-analytics-collection
|
|
@@ -19,6 +19,28 @@ references:
|
|
|
19
19
|
- ASI03:2026 - Agent Identity and Access Abuse
|
|
20
20
|
mitre_attack:
|
|
21
21
|
- T1552.001 - Credentials In Files
|
|
22
|
+
compliance:
|
|
23
|
+
eu_ai_act:
|
|
24
|
+
- article: "15"
|
|
25
|
+
context: "Credential file theft through agent tool access compromises every downstream service connected to the stolen credentials; Article 15 cybersecurity requirements mandate that high-risk AI systems include controls preventing agents from accessing credential stores outside their authorized scope."
|
|
26
|
+
strength: primary
|
|
27
|
+
- article: "12"
|
|
28
|
+
context: "Credential theft undermines the integrity of audit logs and access records that Article 12 logging obligations depend on; an attacker who obtains credentials can falsify or erase audit trails, making Article 12 compliance unverifiable."
|
|
29
|
+
strength: secondary
|
|
30
|
+
nist_ai_rmf:
|
|
31
|
+
- subcategory: "MP.5.1"
|
|
32
|
+
context: "Adversarial tool calls targeting credential files such as ~/.aws/credentials and SSH private keys represent a direct exploitation of the agent's file-access capabilities that must be identified and tracked as a critical AI system risk."
|
|
33
|
+
strength: primary
|
|
34
|
+
- subcategory: "GV.6.1"
|
|
35
|
+
context: "Data governance policies must define and enforce scope restrictions on agent access to credential stores, preventing AI tools from reading authentication files outside their authorized operational boundary."
|
|
36
|
+
strength: secondary
|
|
37
|
+
iso_42001:
|
|
38
|
+
- clause: "6.2"
|
|
39
|
+
context: "Risk treatment activities must include controls that block agent access to well-known credential file paths, reducing the attack surface for credential theft through AI tool invocations."
|
|
40
|
+
strength: primary
|
|
41
|
+
- clause: "8.4"
|
|
42
|
+
context: "AI system impact assessments must document and review the potential consequences of credential file theft, including lateral movement across all downstream services connected to stolen credentials."
|
|
43
|
+
strength: secondary
|
|
22
44
|
tags:
|
|
23
45
|
category: context-exfiltration
|
|
24
46
|
subcategory: credential-theft
|