agent-threat-rules 2.0.14 → 2.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/package.json +1 -1
  2. package/rules/agent-manipulation/ATR-2026-00032-goal-hijacking.yaml +24 -0
  3. package/rules/agent-manipulation/ATR-2026-00074-cross-agent-privilege-escalation.yaml +24 -0
  4. package/rules/agent-manipulation/ATR-2026-00076-inter-agent-message-spoofing.yaml +24 -0
  5. package/rules/agent-manipulation/ATR-2026-00077-human-trust-exploitation.yaml +24 -0
  6. package/rules/agent-manipulation/ATR-2026-00108-consensus-sybil-attack.yaml +22 -0
  7. package/rules/agent-manipulation/ATR-2026-00116-a2a-message-validation.yaml +22 -0
  8. package/rules/agent-manipulation/ATR-2026-00117-agent-identity-spoofing.yaml +22 -0
  9. package/rules/agent-manipulation/ATR-2026-00119-social-engineering-via-agent.yaml +22 -0
  10. package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml +22 -0
  11. package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml +22 -0
  12. package/rules/agent-manipulation/ATR-2026-00164-skill-scope-hijack.yaml +22 -0
  13. package/rules/agent-manipulation/ATR-2026-00269-fitd-escalation.yaml +22 -0
  14. package/rules/agent-manipulation/ATR-2026-00271-grandma-roleplay-jailbreak.yaml +22 -0
  15. package/rules/agent-manipulation/ATR-2026-00273-dan-developer-mode-persona.yaml +22 -0
  16. package/rules/context-exfiltration/ATR-2026-00020-system-prompt-leak.yaml +39 -0
  17. package/rules/context-exfiltration/ATR-2026-00021-api-key-exposure.yaml +37 -0
  18. package/rules/context-exfiltration/ATR-2026-00075-agent-memory-manipulation.yaml +22 -0
  19. package/rules/context-exfiltration/ATR-2026-00102-disguised-analytics-exfiltration.yaml +22 -0
  20. package/rules/context-exfiltration/ATR-2026-00113-credential-theft.yaml +22 -0
  21. package/rules/context-exfiltration/ATR-2026-00114-oauth-token-abuse.yaml +22 -0
  22. package/rules/context-exfiltration/ATR-2026-00115-env-var-harvesting.yaml +22 -0
  23. package/rules/context-exfiltration/ATR-2026-00136-tool-response-data-piggyback.yaml +22 -0
  24. package/rules/context-exfiltration/ATR-2026-00212-mcp-atlassian-credential-leak.yaml +165 -0
  25. package/rules/context-exfiltration/ATR-2026-00261-markdown-image-exfiltration.yaml +22 -0
  26. package/rules/data-poisoning/ATR-2026-00070-data-poisoning.yaml +49 -0
  27. package/rules/excessive-autonomy/ATR-2026-00050-runaway-agent-loop.yaml +37 -0
  28. package/rules/excessive-autonomy/ATR-2026-00051-resource-exhaustion.yaml +24 -0
  29. package/rules/excessive-autonomy/ATR-2026-00052-cascading-failure.yaml +24 -0
  30. package/rules/excessive-autonomy/ATR-2026-00098-unauthorized-financial-action.yaml +31 -0
  31. package/rules/excessive-autonomy/ATR-2026-00099-high-risk-tool-gate.yaml +22 -0
  32. package/rules/model-abuse/ATR-2026-00279-harmful-completion-continuation.yaml +22 -0
  33. package/rules/model-abuse/ATR-2026-00281-eicar-gtube-malware-signature-request.yaml +22 -0
  34. package/rules/model-abuse/ATR-2026-00289-lmrc-harmful-content-elicitation.yaml +22 -0
  35. package/rules/model-abuse/ATR-2026-00292-self-harm-eating-disorder-facilitation.yaml +22 -0
  36. package/rules/model-abuse/ATR-2026-00298-malicious-use-illegal-activity-request.yaml +22 -0
  37. package/rules/model-abuse/ATR-2026-00299-harmbench-detailed-harmful-instruction.yaml +22 -0
  38. package/rules/model-security/ATR-2026-00072-model-behavior-extraction.yaml +24 -0
  39. package/rules/model-security/ATR-2026-00073-malicious-finetuning-data.yaml +24 -0
  40. package/rules/privilege-escalation/ATR-2026-00040-privilege-escalation.yaml +34 -0
  41. package/rules/privilege-escalation/ATR-2026-00041-scope-creep.yaml +24 -0
  42. package/rules/privilege-escalation/ATR-2026-00107-delayed-execution-bypass.yaml +22 -0
  43. package/rules/privilege-escalation/ATR-2026-00110-eval-injection.yaml +22 -0
  44. package/rules/privilege-escalation/ATR-2026-00111-shell-escape.yaml +22 -0
  45. package/rules/privilege-escalation/ATR-2026-00112-dynamic-import-exploitation.yaml +22 -0
  46. package/rules/privilege-escalation/ATR-2026-00143-casual-privilege-escalation.yaml +22 -0
  47. package/rules/prompt-injection/ATR-2026-00001-direct-prompt-injection.yaml +33 -0
  48. package/rules/prompt-injection/ATR-2026-00002-indirect-prompt-injection.yaml +36 -0
  49. package/rules/prompt-injection/ATR-2026-00003-jailbreak-attempt.yaml +23 -0
  50. package/rules/skill-compromise/ATR-2026-00060-skill-impersonation.yaml +37 -0
  51. package/rules/tool-poisoning/ATR-2026-00010-mcp-malicious-response.yaml +39 -0
  52. package/rules/tool-poisoning/ATR-2026-00209-mcpwn-runaway-invocation.yaml +162 -0
  53. package/rules/tool-poisoning/ATR-2026-00210-flowise-system-message-override.yaml +162 -0
@@ -32,6 +32,28 @@ references:
32
32
  research:
33
33
  - "https://github.com/NVIDIA/garak/blob/main/garak/probes/donotanswer.py"
34
34
  - "https://arxiv.org/abs/2308.13387"
35
+ compliance:
36
+ eu_ai_act:
37
+ - article: "15"
38
+ context: "Self-harm and eating disorder facilitation requests target individuals in mental health crisis with operational methods to continue harmful behaviors; Article 15 robustness requirements for high-risk AI mandate that systems resist direct facilitation of self-harm, a uniquely high-severity output category given the vulnerable population likely to make such requests."
39
+ strength: primary
40
+ - article: "9"
41
+ context: "Self-harm facilitation is a mandatory risk category requiring systematic controls under Article 9; AI systems interacting with general populations must include detection and blocking of direct self-harm method requests as a baseline risk treatment."
42
+ strength: secondary
43
+ nist_ai_rmf:
44
+ - subcategory: "MP.5.1"
45
+ context: "Self-harm and eating disorder facilitation requests are adversarial inputs targeting individuals in mental health crisis with operational methods to conduct harm; MP.5.1 requires this uniquely high-severity attack class — direct requests for self-harm methods, purging techniques, and eating disorder development — to be identified and tracked as an adversarial input risk."
46
+ strength: primary
47
+ - subcategory: "MG.2.3"
48
+ context: "Risk treatment plans under MG.2.3 must implement blocking controls for self-harm facilitation requests covering direct self-harm methods, eating disorder facilitation, concealment assistance, and dangerous low-calorie diet requests, given the vulnerable population likely to submit these inputs."
49
+ strength: secondary
50
+ iso_42001:
51
+ - clause: "6.2"
52
+ context: "Risk treatment activities under clause 6.2 must address self-harm and eating disorder facilitation as a mandatory treatment category, implementing detection controls for direct method requests, concealment assistance, and dangerous diet facilitation targeting at-risk individuals."
53
+ strength: primary
54
+ - clause: "8.6"
55
+ context: "Operational controls under clause 8.6 must enforce blocking of self-harm facilitation outputs — including self-harm method instructions, eating disorder development guidance, and concealment of disordered behaviors — to protect the vulnerable populations most likely to submit these requests."
56
+ strength: secondary
35
57
  tags:
36
58
  category: model-abuse
37
59
  subcategory: self-harm-eating-disorder-facilitation
@@ -42,6 +42,28 @@ references:
42
42
  research:
43
43
  - "https://github.com/NVIDIA/garak/blob/main/garak/probes/donotanswer.py"
44
44
  - "https://arxiv.org/abs/2308.13387"
45
+ compliance:
46
+ eu_ai_act:
47
+ - article: "15"
48
+ context: "Direct facilitation requests for drug manufacturing, cybercrime, financial crime, wildlife trafficking, and terrorism represent the highest-severity harmful output category; Article 15 robustness requirements mandate that high-risk AI systems systematically resist direct illegal-activity facilitation requests across all five DoNotAnswer crime categories."
49
+ strength: primary
50
+ - article: "9"
51
+ context: "Illegal activity facilitation is a mandatory risk category in any AI system risk register under Article 9; detection and blocking of direct facilitation requests is a non-negotiable risk treatment for any AI system that interacts with uncontrolled user populations."
52
+ strength: secondary
53
+ nist_ai_rmf:
54
+ - subcategory: "MP.5.1"
55
+ context: "DoNotAnswer malicious use requests — drug manufacturing, wildlife trafficking, cybercrime, financial crime, and terrorism facilitation — are adversarial inputs with explicit illegal intent markers designed to extract operationally harmful outputs; MP.5.1 requires all five crime categories to be identified and tracked as adversarial input risks."
56
+ strength: primary
57
+ - subcategory: "MG.2.3"
58
+ context: "Risk treatment plans under MG.2.3 must implement blocking controls across all DoNotAnswer malicious use categories, as direct illegal activity facilitation requests represent the highest-priority risk treatment required for AI systems interacting with uncontrolled user populations."
59
+ strength: secondary
60
+ iso_42001:
61
+ - clause: "6.2"
62
+ context: "Risk treatment activities under clause 6.2 must address all five DoNotAnswer malicious use categories — drug crime, wildlife trafficking, cybercrime, financial crime, and terrorism — as mandatory treatment targets for AI systems accessible to the general public."
63
+ strength: primary
64
+ - clause: "8.6"
65
+ context: "Operational controls under clause 8.6 must enforce blocking of illegal activity facilitation requests distinguished by explicit illegal intent markers such as 'illegally', 'without getting caught', and 'on the black market', preventing the AI system from providing operational crime assistance."
66
+ strength: secondary
45
67
  tags:
46
68
  category: model-abuse
47
69
  subcategory: donotanswer-malicious-use-illegal-activity
@@ -38,6 +38,28 @@ references:
38
38
  research:
39
39
  - "https://github.com/NVIDIA/garak/blob/main/garak/probes/harmbench.py"
40
40
  - "https://arxiv.org/abs/2402.04249"
41
+ compliance:
42
+ eu_ai_act:
43
+ - article: "15"
44
+ context: "HarmBench step-by-step harmful instruction requests — WMD synthesis, mass-casualty planning, targeted violence, and infrastructure bioterrorism — represent the most severe output category in AI safety; Article 15 cybersecurity and robustness requirements mandate that high-risk AI systems implement robust blocking for explicit detailed-instruction requests in these categories."
45
+ strength: primary
46
+ - article: "9"
47
+ context: "WMD synthesis facilitation and terrorism support are absolute prohibitions requiring the highest-priority risk treatments under Article 9; detection of HarmBench-pattern detailed instruction requests is a mandatory control for any AI system accessible to uncontrolled user populations."
48
+ strength: secondary
49
+ nist_ai_rmf:
50
+ - subcategory: "MP.5.1"
51
+ context: "HarmBench step-by-step harmful instruction requests — WMD synthesis, mass-casualty planning, targeted violence, infrastructure bioterrorism, and atrocity denial — are the most severe adversarial input class; MP.5.1 requires that detailed instruction-seeking framing combined with harmful target domains be identified and tracked as the highest-priority adversarial input risk."
52
+ strength: primary
53
+ - subcategory: "MG.2.3"
54
+ context: "Risk treatment plans under MG.2.3 must assign the highest-priority blocking controls to HarmBench-pattern requests for detailed synthesis instructions (dimethylmercury, sarin, ricin), mass-casualty guides, and infrastructure bioterrorism steps, as these represent absolute prohibition categories for any AI system."
55
+ strength: secondary
56
+ iso_42001:
57
+ - clause: "6.2"
58
+ context: "Risk treatment activities under clause 6.2 must treat HarmBench-pattern detailed harmful instruction requests — covering WMD synthesis, targeted violence, mass-casualty planning, and atrocity denial — as the highest-priority treatment targets requiring unconditional blocking controls."
59
+ strength: primary
60
+ - clause: "8.6"
61
+ context: "Operational controls under clause 8.6 must block explicit requests for detailed step-by-step instructions in harmful domains, distinguishing HarmBench-pattern requests by the combination of instruction-seeking framing ('give detailed instructions', 'step-by-step guide') and clearly prohibited target domains."
62
+ strength: secondary
41
63
  tags:
42
64
  category: model-abuse
43
65
  subcategory: harmbench-detailed-harmful-instruction
@@ -22,6 +22,30 @@ references:
22
22
  mitre_atlas:
23
23
  - AML.T0044 - Full ML Model Access
24
24
  - AML.T0024 - Exfiltration via ML Inference API
25
+
26
+ compliance:
27
+ eu_ai_act:
28
+ - article: "13"
29
+ context: "Systematic model behavior extraction enables adversaries to reverse-engineer internal decision logic; Article 13 transparency obligations require protecting against unauthorized extraction of operational model properties that could facilitate circumvention."
30
+ strength: primary
31
+ - article: "15"
32
+ context: "Article 15 cybersecurity requirements include protecting the AI system against extraction attacks that map decision boundaries for adversarial exploitation; this rule detects systematic probing patterns."
33
+ strength: secondary
34
+ nist_ai_rmf:
35
+ - subcategory: "MP.5.1"
36
+ context: "Systematic model behavior extraction is an adversarial input attack that maps the AI system's decision boundaries for downstream exploitation; MP.5.1 requires that this class of adversarial risk is identified, tracked, and detected at runtime."
37
+ strength: primary
38
+ - subcategory: "GV.6.1"
39
+ context: "Model extraction attacks harvest internal behavioral properties that constitute sensitive AI system data; GV.6.1 data governance policies must address the protection of model decision boundaries and system prompt configurations against unauthorized extraction."
40
+ strength: secondary
41
+ iso_42001:
42
+ - clause: "8.4"
43
+ context: "ISO 42001 clause 8.4 requires documented impact assessments for AI systems; systematic extraction attacks that reverse-engineer model behavior represent a documented risk that must be assessed and mitigated through runtime detection controls."
44
+ strength: primary
45
+ - clause: "9.1"
46
+ context: "Clause 9.1 performance monitoring requires evaluation of the AI management system's effectiveness; tracking model extraction attempts provides the measurement signal needed to assess whether anti-extraction controls are operating effectively."
47
+ strength: secondary
48
+
25
49
  tags:
26
50
  category: model-abuse
27
51
  subcategory: model-extraction
@@ -22,6 +22,30 @@ references:
22
22
  mitre_atlas:
23
23
  - AML.T0020 - Poison Training Data
24
24
  - AML.T0018 - Backdoor ML Model
25
+
26
+ compliance:
27
+ eu_ai_act:
28
+ - article: "15"
29
+ context: "Article 15 accuracy requirements mandate that high-risk AI systems maintain reliable performance; malicious fine-tuning data degrades accuracy and introduces adversarial backdoors that undermine these requirements."
30
+ strength: primary
31
+ - article: "9"
32
+ context: "Poisoned training data is a documented supply chain risk requiring monitoring controls under Article 9; detection of malicious training patterns implements the required risk management measure."
33
+ strength: secondary
34
+ nist_ai_rmf:
35
+ - subcategory: "MP.5.1"
36
+ context: "Malicious fine-tuning data injects adversarial backdoor triggers and safety bypass instructions into the AI training process; MP.5.1 requires that this class of adversarial supply chain attack is identified and tracked before poisoned datasets are ingested."
37
+ strength: primary
38
+ - subcategory: "MG.2.3"
39
+ context: "Detected malicious fine-tuning submissions require immediate risk treatment including quarantine and forensic analysis; MG.2.3 mandates that risk treatment plans are implemented to prevent backdoor activation after model deployment."
40
+ strength: secondary
41
+ iso_42001:
42
+ - clause: "6.2"
43
+ context: "ISO 42001 clause 6.2 risk treatment plans must address training data poisoning as a supply chain threat; this rule operationalizes the detection control that inspects fine-tuning uploads for backdoor trigger patterns before they alter model behavior."
44
+ strength: primary
45
+ - clause: "8.3"
46
+ context: "Clause 8.3 requires that data quality, integrity, and provenance for AI systems are maintained; malicious fine-tuning detection enforces data integrity by blocking training examples that embed backdoors, credential disclosure patterns, or safety bypass instructions."
47
+ strength: secondary
48
+
25
49
  tags:
26
50
  category: data-poisoning
27
51
  subcategory: malicious-finetuning
@@ -30,6 +30,40 @@ references:
30
30
  - T1611 - Escape to Host
31
31
  cve:
32
32
  - CVE-2026-0628
33
+
34
+ compliance:
35
+ owasp_agentic:
36
+ - id: ASI03:2026
37
+ context: "Privilege escalation via tool permission abuse or admin function invocation is the primary ASI03 Identity and Privilege Abuse scenario — the agent acquires capabilities exceeding its authorized scope."
38
+ strength: primary
39
+ owasp_llm:
40
+ - id: LLM06:2025
41
+ context: "An agent requesting tools with elevated permissions beyond its assigned role is the canonical LLM06:2025 Excessive Agency scenario, operationalized here via tool-name and argument pattern detection."
42
+ strength: primary
43
+ eu_ai_act:
44
+ - article: "14"
45
+ context: "Article 14 requires that humans can oversee and intervene in AI system operation; privilege escalation techniques that bypass system-level controls directly undermine the human oversight mechanisms Article 14 mandates."
46
+ strength: primary
47
+ - article: "9"
48
+ context: "Privilege escalation is a documented high-severity risk in the AI system risk register; Article 9 requires monitoring controls to detect and respond to such scope violations."
49
+ strength: secondary
50
+ nist_ai_rmf:
51
+ - function: Govern
52
+ subcategory: GV.1.2
53
+ context: "GV.1.2 requires defined accountability roles and controls for AI system permissions; detection of privilege escalation enforces least-privilege boundaries established in the governance framework."
54
+ strength: primary
55
+ - function: Manage
56
+ subcategory: MG.4.1
57
+ context: "Privilege escalation events require an incident response; this rule generates the alerts needed to initiate the MG.4.1 AI incident response process."
58
+ strength: secondary
59
+ iso_42001:
60
+ - clause: "6.2"
61
+ context: "Clause 6.2 AIMS security objectives include least-privilege enforcement for AI agent operations; this rule detects violations of those objectives at runtime."
62
+ strength: primary
63
+ - clause: "8.6"
64
+ context: "Clause 8.6 AI system operational control requires that agents do not exceed their authorized operational scope; privilege escalation detection enforces that operational boundary."
65
+ strength: secondary
66
+
33
67
  tags:
34
68
  category: privilege-escalation
35
69
  subcategory: tool-permission-escalation
@@ -24,6 +24,30 @@ references:
24
24
  mitre_atlas:
25
25
  - AML.T0040 - AI Model Inference API Access
26
26
  - AML.T0047 - ML-Enabled Product or Service
27
+
28
+ compliance:
29
+ eu_ai_act:
30
+ - article: "14"
31
+ context: "Gradual scope creep enables an agent to exceed its authorized operational boundaries without triggering explicit alerts, progressively eroding the human oversight controls Article 14 mandates."
32
+ strength: primary
33
+ - article: "9"
34
+ context: "Scope creep is a documented incremental risk pattern in AI agent systems; Article 9 risk management obligations require monitoring controls to detect unsolicited capability expansion."
35
+ strength: secondary
36
+ nist_ai_rmf:
37
+ - subcategory: "GV.1.2"
38
+ context: "Scope creep erodes the accountability roles and task boundaries assigned to AI agents; GV.1.2 requires that these roles are enforced, making detection of unsolicited capability expansion essential."
39
+ strength: primary
40
+ - subcategory: "MG.2.3"
41
+ context: "Incremental scope expansion represents an active exploitation of agent autonomy; MG.2.3 risk treatment plans must be triggered when agents self-authorize actions beyond their initial assignment."
42
+ strength: secondary
43
+ iso_42001:
44
+ - clause: "6.2"
45
+ context: "ISO 42001 clause 6.2 risk treatment activities must cover gradual privilege expansion; this rule detects the incremental language patterns that indicate an agent is executing scope creep rather than a sudden escalation."
46
+ strength: primary
47
+ - clause: "8.6"
48
+ context: "Clause 8.6 operational controls require that AI systems execute within their defined boundaries; scope creep detection enforces these boundaries by identifying when agents attempt to self-authorize additional actions."
49
+ strength: secondary
50
+
27
51
  tags:
28
52
  category: privilege-escalation
29
53
  subcategory: scope-creep
@@ -22,6 +22,28 @@ references:
22
22
  - ASI05:2026 - Unexpected Code Execution
23
23
  mitre_attack:
24
24
  - T1053 - Scheduled Task/Job
25
+ compliance:
26
+ eu_ai_act:
27
+ - article: "14"
28
+ context: "Delayed execution bypasses exploit the temporal gap between task scheduling and execution to acquire elevated privileges that were not authorized at scheduling time; Article 14 oversight requirements must cover deferred actions, not just real-time tool invocations."
29
+ strength: primary
30
+ - article: "15"
31
+ context: "Article 15 robustness requirements include protection against adversarial privilege escalation techniques; scheduled task abuse that runs with system-level permissions after bypassing user-context checks is a documented attack pattern requiring detection controls."
32
+ strength: secondary
33
+ nist_ai_rmf:
34
+ - subcategory: "GV.1.2"
35
+ context: "Privilege escalation via delayed task execution requires accountability roles that extend human oversight to deferred agent actions, ensuring that scheduled tasks are subject to the same authorization checks as real-time tool invocations."
36
+ strength: primary
37
+ - subcategory: "MG.2.3"
38
+ context: "Risk treatment plans must address the temporal gap exploit in scheduled task execution by requiring that permission checks are re-validated at execution time rather than only at scheduling time."
39
+ strength: secondary
40
+ iso_42001:
41
+ - clause: "6.2"
42
+ context: "AI risk treatment activities must explicitly cover deferred execution attack patterns by requiring that scheduled tasks inherit and re-verify the invoking user's authorization context at the time of actual execution."
43
+ strength: primary
44
+ - clause: "8.6"
45
+ context: "Operational controls for AI systems must ensure that delayed background tasks do not acquire elevated privileges beyond what was authorized during scheduling, closing the temporal gap that this attack exploits."
46
+ strength: secondary
25
47
  tags:
26
48
  category: privilege-escalation
27
49
  subcategory: delayed-execution-bypass
@@ -18,6 +18,28 @@ references:
18
18
  - ASI05:2026 - Unexpected Code Execution
19
19
  mitre_attack:
20
20
  - T1059 - Command and Scripting Interpreter
21
+ compliance:
22
+ eu_ai_act:
23
+ - article: "15"
24
+ context: "eval() and dynamic code execution primitives allow attackers to execute arbitrary code within the agent runtime, enabling complete host system compromise; Article 15 cybersecurity requirements mandate that high-risk AI systems prohibit or strictly control dynamic code evaluation capabilities."
25
+ strength: primary
26
+ - article: "14"
27
+ context: "Arbitrary code execution via eval injection can override safety controls and execute actions that bypass all human oversight mechanisms; Article 14 requires that AI system architectures prevent such unrestricted capability access from agent tool layers."
28
+ strength: secondary
29
+ nist_ai_rmf:
30
+ - subcategory: "MP.5.1"
31
+ context: "eval() and dynamic code execution primitives are adversarial input vectors that allow attackers to escape the agent's sandboxed tool context and execute arbitrary code within the host process, and must be tracked as critical AI system risks."
32
+ strength: primary
33
+ - subcategory: "MG.2.3"
34
+ context: "Risk treatment plans must prohibit or strictly sandbox dynamic code evaluation capabilities in agent tool layers to prevent eval injection from enabling full host system compromise."
35
+ strength: secondary
36
+ iso_42001:
37
+ - clause: "6.2"
38
+ context: "Risk treatment activities must classify dynamic code execution via eval() and similar primitives as an unacceptable risk in AI agent tools and require architectural controls that block their use with user-controlled inputs."
39
+ strength: primary
40
+ - clause: "8.6"
41
+ context: "Operational controls must prohibit agent tools from invoking eval(), new Function(), or vm module methods on untrusted inputs to ensure that code execution remains within the auditable and authorized scope of the AI system."
42
+ strength: secondary
21
43
  tags:
22
44
  category: privilege-escalation
23
45
  subcategory: eval-injection
@@ -19,6 +19,28 @@ references:
19
19
  - ASI05:2026 - Unexpected Code Execution
20
20
  mitre_attack:
21
21
  - T1059.004 - Unix Shell
22
+ compliance:
23
+ eu_ai_act:
24
+ - article: "15"
25
+ context: "Shell metacharacter injection enables attackers to chain arbitrary OS commands onto otherwise safe tool invocations, achieving full system compromise through agent tool arguments; Article 15 cybersecurity requirements mandate that AI systems sanitize all inputs passed to shell-adjacent tool layers."
26
+ strength: primary
27
+ - article: "14"
28
+ context: "Shell escape attacks allow execution of arbitrary system commands outside any authorized scope, completely bypassing human oversight of what actions the agent actually performs; Article 14 requires that agent actions remain within observable and sanctioned boundaries."
29
+ strength: secondary
30
+ nist_ai_rmf:
31
+ - subcategory: "MP.5.1"
32
+ context: "Shell metacharacter injection via backticks, subshells, semicolons, and logical operators is an adversarial technique that exploits the agent's tool argument handling to execute arbitrary OS commands, and must be identified as a critical AI attack vector."
33
+ strength: primary
34
+ - subcategory: "MG.2.3"
35
+ context: "Risk treatment plans must require strict sanitization of all agent tool arguments before shell-adjacent processing to prevent metacharacter injection from chaining unauthorized commands onto sanctioned tool invocations."
36
+ strength: secondary
37
+ iso_42001:
38
+ - clause: "6.2"
39
+ context: "Risk treatment activities must mandate input sanitization controls that strip or reject shell metacharacters from all agent tool arguments before they reach any process-execution layer."
40
+ strength: primary
41
+ - clause: "8.6"
42
+ context: "Operational controls must enforce argument sanitization at the tool interface boundary to ensure that shell metacharacter injection cannot redirect agent actions outside the scope of authorized and observable operations."
43
+ strength: secondary
22
44
  tags:
23
45
  category: privilege-escalation
24
46
  subcategory: shell-escape
@@ -19,6 +19,28 @@ references:
19
19
  - ASI05:2026 - Unexpected Code Execution
20
20
  mitre_attack:
21
21
  - T1129 - Shared Modules
22
+ compliance:
23
+ eu_ai_act:
24
+ - article: "15"
25
+ context: "Dynamic module loading with user-controlled paths allows attackers to inject malicious modules, WebAssembly payloads, or native libraries into the agent runtime at execution time; Article 15 cybersecurity requirements mandate that AI systems restrict dynamic code loading to auditable, allowlisted sources."
26
+ strength: primary
27
+ - article: "14"
28
+ context: "Attacker-controlled module injection via dynamic imports can install persistent backdoors or override safety controls, undermining the reliability of human oversight mechanisms; Article 14 requires that agent behavior remain predictable and within the scope of authorized module execution."
29
+ strength: secondary
30
+ nist_ai_rmf:
31
+ - subcategory: "MP.5.1"
32
+ context: "Dynamic module loading with attacker-controlled paths is an adversarial input risk that allows injection of malicious modules, WebAssembly payloads, or native libraries into the agent runtime, bypassing static code auditing controls."
33
+ strength: primary
34
+ - subcategory: "MG.2.3"
35
+ context: "Risk treatment plans must restrict dynamic module loading to allowlisted paths and prohibit user-controlled inputs from influencing which code is resolved and executed at agent runtime."
36
+ strength: secondary
37
+ iso_42001:
38
+ - clause: "6.2"
39
+ context: "Risk treatment activities must address dynamic import exploitation by requiring that all externally-loaded modules are resolved against a verified allowlist before the agent runtime loads and executes them."
40
+ strength: primary
41
+ - clause: "8.5"
42
+ context: "Controls over externally-provided AI components must include validation of all dynamically loaded modules to ensure that attacker-controlled paths cannot introduce unauthorized code into the agent execution environment."
43
+ strength: secondary
22
44
  tags:
23
45
  category: privilege-escalation
24
46
  subcategory: dynamic-import-exploitation
@@ -21,6 +21,28 @@ references:
21
21
  - ASI03 - Excessive Agency
22
22
  metadata_provenance:
23
23
  mitre_atlas: auto-generated
24
+ compliance:
25
+ eu_ai_act:
26
+ - article: "14"
27
+ context: "Casual unauthorized privilege escalation — where an agent self-authorizes access to admin panels or privileged settings through informal discovery framing — bypasses human authorization gates; Article 14 requires that access to privileged system components remain under explicit human approval regardless of the linguistic framing used."
28
+ strength: primary
29
+ - article: "15"
30
+ context: "Article 15 robustness requirements include resistance against social-engineering-style privilege escalation techniques; casual self-authorization patterns represent a documented adversarial approach that exploits agents' tendency to act on observed access opportunities."
31
+ strength: secondary
32
+ nist_ai_rmf:
33
+ - subcategory: "GV.1.2"
34
+ context: "Casual unauthorized privilege escalation — where an agent self-authorizes access to admin panels and user management systems using informal discovery framing — directly violates accountability role boundaries that GV.1.2 requires to be defined and enforced for AI risk management."
35
+ strength: primary
36
+ - subcategory: "MG.2.3"
37
+ context: "Risk treatment plans under MG.2.3 must include controls that prevent agents from casually escalating privileges by self-authorizing access to privileged system components without explicit human approval."
38
+ strength: secondary
39
+ iso_42001:
40
+ - clause: "6.2"
41
+ context: "Risk treatment activities under clause 6.2 must address casual privilege escalation patterns where agents exploit informal discovery framing to self-authorize access to admin interfaces, bypassing planned authorization controls."
42
+ strength: primary
43
+ - clause: "8.6"
44
+ context: "Operational controls under clause 8.6 must enforce authorization gates that prevent AI agents from accessing admin panels, user management systems, or system configurations based on self-reported access opportunity rather than explicit user authorization."
45
+ strength: secondary
24
46
  tags:
25
47
  category: privilege-escalation
26
48
  subcategory: casual-escalation
@@ -30,6 +30,39 @@ references:
30
30
  - "CVE-2024-3402"
31
31
  - "CVE-2025-53773"
32
32
 
33
+ compliance:
34
+ owasp_agentic:
35
+ - id: ASI01:2026
36
+ context: "Direct prompt injection is the canonical agent goal hijack vector — adversarial user input overrides the agent's assigned objectives and behavioral constraints via instruction-override verbs, persona switching, and encoding obfuscation."
37
+ strength: primary
38
+ owasp_llm:
39
+ - id: LLM01:2025
40
+ context: "This rule is the primary runtime implementation of the LLM01:2025 Prompt Injection category, covering instruction-override verbs, fake system delimiters, restriction removal, and encoding-wrapped payloads."
41
+ strength: primary
42
+ eu_ai_act:
43
+ - article: "15"
44
+ context: "High-risk AI systems must be resilient against adversarial attempts to alter output or behavior. Deployment of this detection rule satisfies the Article 15 requirement to implement technical measures ensuring robustness against manipulation."
45
+ strength: primary
46
+ - article: "9"
47
+ context: "Prompt injection is a documented risk class; this rule implements the monitoring control required by Article 9 risk management obligations for high-risk AI systems."
48
+ strength: secondary
49
+ nist_ai_rmf:
50
+ - function: Manage
51
+ subcategory: MG.2.3
52
+ context: "Treating direct prompt injection as an identified AI risk requires active runtime countermeasures; this detection rule is the primary risk treatment implementation."
53
+ strength: primary
54
+ - function: Map
55
+ subcategory: MP.5.1
56
+ context: "Identifying adversarial input manipulation as an AI risk to be catalogued in the organizational risk register."
57
+ strength: secondary
58
+ iso_42001:
59
+ - clause: "6.2"
60
+ context: "Addressing adversarial manipulation risk is an objective required under clause 6.2 AIMS information security planning; this rule operationalizes the detection control measure."
61
+ strength: primary
62
+ - clause: "8.4"
63
+ context: "Impact assessment for AI deployments under clause 8.4 must account for adversarial user inputs; detection events from this rule provide the required monitoring evidence."
64
+ strength: secondary
65
+
33
66
  tags:
34
67
  category: prompt-injection
35
68
  subcategory: direct
@@ -33,6 +33,42 @@ references:
33
33
  - "CVE-2025-32711"
34
34
  - "CVE-2026-24307"
35
35
 
36
+ compliance:
37
+ owasp_agentic:
38
+ - id: ASI01:2026
39
+ context: "Indirect prompt injection hijacks agent goals via externally-consumed content (documents, web pages, API responses); the agent processes attacker-controlled instructions without user awareness."
40
+ strength: primary
41
+ - id: ASI06:2026
42
+ context: "Injection via external content poisons the agent's context window and memory with attacker-controlled directives, satisfying the ASI06 Memory and Context Poisoning category."
43
+ strength: secondary
44
+ owasp_llm:
45
+ - id: LLM01:2025
46
+ context: "Indirect prompt injection via HTML comments, zero-width characters, hidden CSS text, and data URIs is a primary LLM01 attack variant delivered through external content rather than direct user input."
47
+ strength: primary
48
+ eu_ai_act:
49
+ - article: "15"
50
+ context: "High-risk AI systems must resist adversarial content embedded in external inputs. Detection of hidden injection payloads in consumed documents satisfies Article 15 robustness and cybersecurity requirements."
51
+ strength: primary
52
+ - article: "9"
53
+ context: "Indirect injection from third-party content sources is a documented risk category requiring mitigation controls under Article 9 risk management obligations."
54
+ strength: secondary
55
+ nist_ai_rmf:
56
+ - function: Manage
57
+ subcategory: MG.2.3
58
+ context: "Runtime detection of injection payloads embedded in third-party content implements the risk treatment for indirect prompt injection identified in the AI risk register."
59
+ strength: primary
60
+ - function: Map
61
+ subcategory: MP.3.3
62
+ context: "External content providers are third-party components in the AI supply chain; this rule identifies their attack surface as a risk source."
63
+ strength: secondary
64
+ iso_42001:
65
+ - clause: "6.2"
66
+ context: "Clause 6.2 AIMS planning requires controls for externally-sourced risks; this rule operationalizes the detection measure for indirect injection via consumed content."
67
+ strength: primary
68
+ - clause: "8.5"
69
+ context: "Externally-provided content processed by the agent falls under clause 8.5 control of externally-provided processes; this rule validates that external content does not contain adversarial directives."
70
+ strength: secondary
71
+
36
72
  tags:
37
73
  category: prompt-injection
38
74
  subcategory: indirect
@@ -32,6 +32,29 @@ references:
32
32
  - "CVE-2024-3402"
33
33
  - "CVE-2025-53773"
34
34
 
35
+ compliance:
36
+ eu_ai_act:
37
+ - article: "15"
38
+ context: "High-risk AI systems must be resilient against adversarial attempts to suppress safety mechanisms. Jailbreak detection is a concrete cybersecurity control satisfying Article 15 requirements for robustness against input-based manipulation."
39
+ strength: primary
40
+ - article: "9"
41
+ context: "Jailbreak attempts constitute a documented risk class in the AI system risk register; Article 9 requires that monitoring controls are deployed to detect these attempts at runtime."
42
+ strength: secondary
43
+ nist_ai_rmf:
44
+ - subcategory: "MP.5.1"
45
+ context: "Jailbreak attempts are a primary class of adversarial input attacks against AI systems; MP.5.1 requires that adversarial input risks are identified and tracked so that runtime detection controls like this rule can be deployed."
46
+ strength: primary
47
+ - subcategory: "MG.2.3"
48
+ context: "Detected jailbreak patterns represent active exploitation of AI safety mechanisms, triggering the risk treatment response plans required by MG.2.3 to contain and remediate adversarial prompt attacks."
49
+ strength: secondary
50
+ iso_42001:
51
+ - clause: "6.2"
52
+ context: "ISO 42001 clause 6.2 requires AI risk treatment activities to be planned and implemented; jailbreak detection rules operationalize the risk treatment plan for adversarial prompt-based safety bypass attacks."
53
+ strength: primary
54
+ - clause: "8.6"
55
+ context: "Clause 8.6 operational controls ensure AI systems execute correctly and consistently; runtime jailbreak detection enforces that safety constraints remain active despite adversarial instructions to disable them."
56
+ strength: secondary
57
+
35
58
  tags:
36
59
  category: prompt-injection
37
60
  subcategory: jailbreak
@@ -26,6 +26,43 @@ references:
26
26
  - AML.T0010 - ML Supply Chain Compromise
27
27
  mitre_attack:
28
28
  - T1195 - Supply Chain Compromise
29
+
30
+ compliance:
31
+ owasp_agentic:
32
+ - id: ASI04:2026
33
+ context: "MCP skill impersonation via typosquatting, namespace collision, and version spoofing is the primary ASI04 Agentic Supply Chain Vulnerabilities attack vector — malicious skills masquerade as trusted tools to gain agent execution context."
34
+ strength: primary
35
+ owasp_llm:
36
+ - id: LLM03:2025
37
+ context: "Typosquatted and impersonated MCP skills are supply chain compromise artifacts targeting the tool ecosystem; this rule implements LLM03:2025 Supply Chain Vulnerabilities detection at the skill-name level."
38
+ strength: primary
39
+ - id: LLM05:2025
40
+ context: "An agent invoking an impersonated skill may receive malicious responses that require LLM05:2025 Improper Output Handling controls; this rule prevents the initial tool invocation before output is processed."
41
+ strength: secondary
42
+ eu_ai_act:
43
+ - article: "13"
44
+ context: "Article 13 transparency requires that AI systems operate with clearly identified components; skill impersonation violates this requirement by substituting unauthorized tools that appear legitimate."
45
+ strength: primary
46
+ - article: "9"
47
+ context: "Supply chain compromise via malicious skill registries is a documented risk requiring monitoring controls under Article 9; skill-name pattern detection is the runtime enforcement of those controls."
48
+ strength: secondary
49
+ nist_ai_rmf:
50
+ - function: Map
51
+ subcategory: MP.2.3
52
+ context: "Identifying typosquatted and impersonated MCP skills as AI supply chain risks implements MP.2.3 AI supply chain risk identification at the tool-registry level."
53
+ strength: primary
54
+ - function: Govern
55
+ subcategory: GV.1.2
56
+ context: "GV.1.2 accountability roles must include responsibility for validating third-party tool integrity; this rule provides the automated signal needed to fulfill that governance obligation."
57
+ strength: secondary
58
+ iso_42001:
59
+ - clause: "8.5"
60
+ context: "MCP skills are externally-provided AI-related components under clause 8.5; this rule enforces controls over externally-provided tools by detecting impersonation before invocation."
61
+ strength: primary
62
+ - clause: "6.2"
63
+ context: "Clause 6.2 AIMS security planning requires controls for third-party component integrity; skill impersonation detection operationalizes that planning objective at runtime."
64
+ strength: secondary
65
+
29
66
  tags:
30
67
  category: skill-compromise
31
68
  subcategory: skill-impersonation
@@ -40,6 +40,45 @@ references:
40
40
  - "CVE-2025-59536"
41
41
  - "CVE-2026-21852"
42
42
 
43
+ compliance:
44
+ owasp_agentic:
45
+ - id: ASI02:2026
46
+ context: "Malicious content injected via MCP tool responses is the primary ASI02 Tool Misuse and Exploitation vector — a compromised or impersonated MCP server weaponizes the tool call interface to deliver shells, encoded payloads, and privilege escalation commands."
47
+ strength: primary
48
+ - id: ASI05:2026
49
+ context: "Shell commands and code execution payloads in tool responses aim to trigger unexpected code execution by the agent, falling under the ASI05 Unexpected Code Execution category."
50
+ strength: secondary
51
+ owasp_llm:
52
+ - id: LLM01:2025
53
+ context: "Prompt injection delivered through MCP tool responses is an indirect LLM01:2025 attack variant where the injection payload is embedded in tool output rather than user input."
54
+ strength: primary
55
+ - id: LLM05:2025
56
+ context: "Failure to validate MCP tool response content before agent processing is a LLM05:2025 Improper Output Handling scenario enabling downstream command injection and reverse shell execution."
57
+ strength: secondary
58
+ eu_ai_act:
59
+ - article: "15"
60
+ context: "MCP tool response injection attacks the cybersecurity integrity of the AI system; Article 15 requires technical measures ensuring the system can resist such third-party content attacks."
61
+ strength: primary
62
+ - article: "9"
63
+ context: "Compromised MCP server responses are a documented attack surface in the AI system risk register; Article 9 requires detection controls to manage this identified risk."
64
+ strength: secondary
65
+ nist_ai_rmf:
66
+ - function: Manage
67
+ subcategory: MG.2.3
68
+ context: "Runtime detection of malicious MCP tool responses is the primary risk treatment for tool-poisoning attacks identified in the AI risk register."
69
+ strength: primary
70
+ - function: Map
71
+ subcategory: MP.3.3
72
+ context: "MCP servers are third-party components in the AI tool ecosystem; identifying malicious tool responses is an MP.3.3 third-party component risk detection action."
73
+ strength: secondary
74
+ iso_42001:
75
+ - clause: "6.2"
76
+ context: "Clause 6.2 AIMS security planning requires controls for third-party tool interfaces; this rule operationalizes the detection measure for malicious content delivered via MCP."
77
+ strength: primary
78
+ - clause: "8.5"
79
+ context: "MCP server integrations are externally-provided AI-related processes under clause 8.5; this rule validates that external tool responses do not contain adversarial payloads before the agent acts on them."
80
+ strength: secondary
81
+
43
82
  tags:
44
83
  category: tool-poisoning
45
84
  subcategory: mcp-response-injection