agent-threat-rules 2.2.1 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +365 -327
  2. package/dist/engine.d.ts +46 -1
  3. package/dist/engine.d.ts.map +1 -1
  4. package/dist/engine.js +242 -1
  5. package/dist/engine.js.map +1 -1
  6. package/dist/eval/eval-harness.d.ts.map +1 -1
  7. package/dist/eval/eval-harness.js +9 -0
  8. package/dist/eval/eval-harness.js.map +1 -1
  9. package/dist/eval/run-hackaprompt-benchmark.js +9 -0
  10. package/dist/eval/run-hackaprompt-benchmark.js.map +1 -1
  11. package/dist/eval/run-pint-benchmark.js +9 -0
  12. package/dist/eval/run-pint-benchmark.js.map +1 -1
  13. package/dist/eval/skill-benchmark.d.ts +11 -0
  14. package/dist/eval/skill-benchmark.d.ts.map +1 -1
  15. package/dist/eval/skill-benchmark.js +57 -0
  16. package/dist/eval/skill-benchmark.js.map +1 -1
  17. package/dist/measurement/from-eval-harness.d.ts +70 -0
  18. package/dist/measurement/from-eval-harness.d.ts.map +1 -0
  19. package/dist/measurement/from-eval-harness.js +49 -0
  20. package/dist/measurement/from-eval-harness.js.map +1 -0
  21. package/dist/measurement/schema.d.ts +152 -0
  22. package/dist/measurement/schema.d.ts.map +1 -0
  23. package/dist/measurement/schema.js +178 -0
  24. package/dist/measurement/schema.js.map +1 -0
  25. package/dist/measurement/write.d.ts +64 -0
  26. package/dist/measurement/write.d.ts.map +1 -0
  27. package/dist/measurement/write.js +163 -0
  28. package/dist/measurement/write.js.map +1 -0
  29. package/dist/semantic-evaluator.d.ts +48 -0
  30. package/dist/semantic-evaluator.d.ts.map +1 -0
  31. package/dist/semantic-evaluator.js +107 -0
  32. package/dist/semantic-evaluator.js.map +1 -0
  33. package/dist/trace-evaluator.d.ts +22 -0
  34. package/dist/trace-evaluator.d.ts.map +1 -0
  35. package/dist/trace-evaluator.js +249 -0
  36. package/dist/trace-evaluator.js.map +1 -0
  37. package/dist/types.d.ts +143 -0
  38. package/dist/types.d.ts.map +1 -1
  39. package/package.json +5 -3
  40. package/rules/agent-manipulation/ATR-2026-00552-goal-drift-after-pressure-injection.yaml +216 -0
  41. package/rules/context-exfiltration/ATR-2026-00524-claude-code-anthropic-base-url-credential-exfil.yaml +257 -0
  42. package/rules/context-exfiltration/ATR-2026-00548-cross-agent-session-context-leak.yaml +177 -0
  43. package/rules/excessive-autonomy/ATR-2026-00553-runaway-tool-loop-behavioral.yaml +174 -0
  44. package/rules/privilege-escalation/ATR-2026-00528-praisonai-auth-disabled-default.yaml +192 -0
  45. package/rules/privilege-escalation/ATR-2026-00539-crewai-codeinterpreter-sandbox-escape-rce.yaml +292 -0
  46. package/rules/privilege-escalation/ATR-2026-00546-crewai-json-loader-local-file-read.yaml +162 -0
  47. package/rules/privilege-escalation/ATR-2026-00547-crewai-rag-url-ssrf-bypass.yaml +167 -0
  48. package/rules/privilege-escalation/ATR-2026-00549-destructive-tool-without-human-approval.yaml +193 -0
  49. package/rules/privilege-escalation/ATR-2026-00551-cross-conversation-memory-write.yaml +198 -0
  50. package/rules/prompt-injection/ATR-2026-00535-windsurf-ide-zero-click-prompt-injection.yaml +199 -0
  51. package/rules/prompt-injection/ATR-2026-00550-untrusted-retrieval-to-privileged-tool.yaml +199 -0
  52. package/rules/skill-compromise/ATR-2026-00123-skill-overreach-permissions.yaml +5 -2
  53. package/rules/skill-compromise/ATR-2026-00523-claude-code-hooks-session-start-pre-trust-rce.yaml +221 -0
  54. package/rules/skill-compromise/ATR-2026-00525-mini-shai-hulud-gh-token-monitor-persistence.yaml +220 -0
  55. package/rules/skill-compromise/ATR-2026-00527-skill-silent-git-remote-mirror-exfiltration.yaml +201 -0
  56. package/rules/tool-poisoning/ATR-2026-00526-claude-code-shell-metachar-in-double-quoted-path.yaml +167 -0
  57. package/rules/tool-poisoning/ATR-2026-00529-litellm-proxy-sqli-cisa-kev.yaml +158 -0
  58. package/rules/tool-poisoning/ATR-2026-00530-ms-agent-shell-tool-unsanitized-argv-rce.yaml +184 -0
  59. package/rules/tool-poisoning/ATR-2026-00531-praisonai-unauthenticated-agent-api.yaml +174 -0
  60. package/rules/tool-poisoning/ATR-2026-00532-apache-doris-mcp-sql-injection.yaml +155 -0
  61. package/rules/tool-poisoning/ATR-2026-00533-apache-pinot-mcp-unauthenticated-takeover.yaml +151 -0
  62. package/rules/tool-poisoning/ATR-2026-00534-alibaba-rds-mcp-unauthenticated-metadata-exfil.yaml +155 -0
  63. package/rules/tool-poisoning/ATR-2026-00536-nginx-ui-mcp-unauthenticated-command-execution.yaml +199 -0
  64. package/rules/tool-poisoning/ATR-2026-00537-fastmcp-server-name-cmd-injection-windows.yaml +226 -0
  65. package/rules/tool-poisoning/ATR-2026-00538-langchain-chatchat-mcp-stdio-unauthenticated-rce.yaml +244 -0
  66. package/rules/tool-poisoning/ATR-2026-00540-praisonai-parse-mcp-command-cli-injection.yaml +186 -0
  67. package/rules/tool-poisoning/ATR-2026-00541-agent-zero-mcp-config-command-injection.yaml +183 -0
  68. package/rules/tool-poisoning/ATR-2026-00542-upsonic-mcp-command-allowlist-bypass.yaml +166 -0
  69. package/rules/tool-poisoning/ATR-2026-00543-litellm-mcp-server-argv-injection.yaml +168 -0
  70. package/rules/tool-poisoning/ATR-2026-00544-praisonai-pth-file-path-traversal-rce.yaml +172 -0
  71. package/rules/tool-poisoning/ATR-2026-00545-praisonai-tool-override-unauth-rce.yaml +170 -0
  72. package/spec/README.md +279 -0
  73. package/spec/atr-correlation-v1.0.md +281 -0
  74. package/spec/atr-event-v1.0.md +294 -0
  75. package/spec/atr-language-detection-v1.0.md +218 -0
  76. package/spec/atr-method-v1.1.md +557 -0
  77. package/spec/atr-profile-v1.0.md +307 -0
  78. package/spec/atr-schema.yaml +279 -8
  79. package/spec/category-registry/v1.0.yaml +200 -0
  80. package/spec/conformance/README.md +244 -0
  81. package/spec/conformance/SIGNING.md +191 -0
  82. package/spec/conformance/baseline/fixtures/ATR-2026-00001-tp-001/expected.json +36 -0
  83. package/spec/conformance/baseline/fixtures/ATR-2026-00001-tp-001/input.json +16 -0
  84. package/spec/conformance/baseline/fixtures/README.md +120 -0
  85. package/spec/conformance/baseline/manifest.json +56 -0
  86. package/spec/conformance/expected-results.schema.json +121 -0
  87. package/spec/external-registries/cccs-yara.md +142 -0
  88. package/spec/internet-drafts/draft-lin-atr-core-00.html +1925 -0
  89. package/spec/internet-drafts/draft-lin-atr-core-00.md +288 -0
  90. package/spec/internet-drafts/draft-lin-atr-core-00.txt +560 -0
  91. package/spec/internet-drafts/draft-lin-atr-core-00.xml +424 -0
  92. package/spec/mappings/README.md +43 -0
  93. package/spec/mappings/atr-to-nist-csf-2.0.md +234 -0
  94. package/spec/schema/correlation.schema.json +144 -0
  95. package/spec/schema/event.schema.json +233 -0
  96. package/spec/schema/profile.schema.json +196 -0
  97. package/spec/schema/rule.schema.json +224 -0
  98. package/spec/stix-extension/README.md +76 -13
  99. package/spec/stix-extension/examples/atr-rule-trace-method-example.json +85 -0
  100. package/spec/stix-extension/extension-definition.json +23 -3
  101. package/spec/stix-extension/x-atr-rule-schema.json +107 -11
@@ -0,0 +1,162 @@
1
+ title: "CrewAI JSON Loader Arbitrary Local File Read (CVE-2026-2285)"
2
+ id: ATR-2026-00546
3
+ rule_version: 1
4
+ status: draft
5
+ description: >
6
+ Detects CVE-2026-2285 (CVSS HIGH, CWE-22): CrewAI's JSON document loader
7
+ accepts a file path without sanitisation, allowing an agent or tool input
8
+ to traverse outside the intended data directory and read arbitrary local
9
+ files (e.g., /etc/passwd, ~/.ssh/id_rsa, .env secrets) by supplying a
10
+ path-traversal sequence in the loader argument.
11
+
12
+ The vulnerability is part of the CERT/CC VU#221883 advisory cluster
13
+ (four CrewAI CVEs, 2026-03-30). The JSON loader is invoked when CrewAI
14
+ processes RAG documents; a crafted document path causes the loader to
15
+ return the contents of an attacker-specified file, which the agent then
16
+ includes in its context and may exfiltrate via subsequent tool calls.
17
+
18
+ Detection covers:
19
+ (a) Path-traversal sequences in JSON loader file path arguments;
20
+ (b) Absolute paths to sensitive files (e.g., /etc/passwd, .env, .ssh);
21
+ (c) CrewAI document loader with directory traversal or sensitive-file targets;
22
+ (d) Explicit CVE-2026-2285 / CrewAI JSON loader file-read framing.
23
+ author: "ATR Community"
24
+ date: "2026/05/28"
25
+ schema_version: "0.1"
26
+ detection_tier: pattern
27
+ maturity: draft
28
+ severity: high
29
+
30
+ references:
31
+ owasp_llm:
32
+ - "LLM06:2025 - Excessive Agency"
33
+ - "LLM05:2025 - Improper Output Handling"
34
+ owasp_agentic:
35
+ - "ASI03:2026 - Data Exfiltration"
36
+ mitre_atlas:
37
+ - "AML.T0043 - Craft Adversarial Data"
38
+ mitre_attack:
39
+ - "T1083 - File and Directory Discovery"
40
+ - "T1552 - Unsecured Credentials"
41
+ cve:
42
+ - "CVE-2026-2285"
43
+
44
+ metadata_provenance:
45
+ mitre_atlas: human-reviewed
46
+ owasp_llm: human-reviewed
47
+ owasp_agentic: human-reviewed
48
+
49
+ compliance:
50
+ eu_ai_act:
51
+ - article: "15"
52
+ context: >
53
+ CVE-2026-2285 allows arbitrary local file read via the CrewAI JSON
54
+ loader without path sanitisation; Article 15 cybersecurity requirements
55
+ mandate that AI agent document loaders canonicalise paths and restrict
56
+ access to the intended data directory.
57
+ strength: primary
58
+ nist_ai_rmf:
59
+ - subcategory: "MP.5.1"
60
+ context: >
61
+ Path traversal in a document loader argument enabling sensitive file
62
+ read constitutes an adversarial input; MP.5.1 requires scanning
63
+ document loader path arguments for traversal sequences.
64
+ strength: primary
65
+ iso_42001:
66
+ - clause: "8.6"
67
+ context: >
68
+ Operational controls must detect and block document loader invocations
69
+ with path traversal sequences targeting sensitive files outside the
70
+ intended data scope.
71
+ strength: primary
72
+
73
+ tags:
74
+ category: privilege-escalation
75
+ subcategory: path-traversal-file-read
76
+ scan_target: both
77
+ confidence: high
78
+ source: cve-disclosure
79
+ vendor_sources: crewai-vu221883
80
+
81
+ agent_source:
82
+ type: llm_io
83
+ framework:
84
+ - crewai
85
+ - any
86
+ provider:
87
+ - any
88
+
89
+ detection:
90
+ condition: any
91
+ false_positives:
92
+ - "Legitimate document loaders referencing known-good data directories — pattern requires traversal or sensitive-file targets."
93
+ - "Security advisory text quoting CVE-2026-2285 file paths."
94
+ - "References to /etc/ in system administration documentation without traversal context."
95
+ conditions:
96
+ - field: content
97
+ operator: regex
98
+ value: '(?i)(?:json[_-]?loader|load[_-]?json|JSONLoader|document[_-]?loader)[^\n]{0,200}(?:(?:\.\.\/|\.\.\\){2,}|(?:\/etc\/(?:passwd|shadow|hosts)|\/root\/|\/home\/[^/]+\/\.ssh|\.env|\.aws\/credentials|\.npmrc|id_rsa))'
99
+ description: >
100
+ JSON/document loader invocation with either a path-traversal sequence
101
+ (2+ levels up) or a known sensitive-file target — CVE-2026-2285
102
+ local file read via CrewAI document loader
103
+
104
+ - field: content
105
+ operator: regex
106
+ value: '(?i)(?:crewai|crew_ai|CrewAI)[^\n]{0,200}(?:json[_-]?loader|load[_-]?json|document[_-]?loader)[^\n]{0,200}(?:\.\.\/|\/etc\/|\.env|\.ssh|id_rsa|credentials)'
107
+ description: >
108
+ CrewAI combined with a document/JSON loader and a sensitive path
109
+ or traversal sequence — attack framing specific to CVE-2026-2285
110
+
111
+ - field: content
112
+ operator: regex
113
+ value: '(?i)(?:CVE-2026-2285|crewai[^\n]{0,80}(?:file[_\s-]?read|local[_\s-]?file|json[_\s-]?loader))[^\n]{0,200}(?:inject|read|exfil|bypass|traverse|sensitive|passwd|\.env|\.ssh)'
114
+ description: >
115
+ Explicit CVE-2026-2285 reference or CrewAI file-read framing combined
116
+ with exploitation language or sensitive file targets
117
+
118
+ response:
119
+ actions:
120
+ - alert
121
+ - block_tool
122
+ message_template: >
123
+ [ATR-2026-00546] HIGH: CrewAI JSON loader with path traversal or
124
+ sensitive-file target detected (CVE-2026-2285, CWE-22). The CrewAI JSON
125
+ loader does not sanitise document paths, allowing traversal to read
126
+ arbitrary local files (/etc/passwd, .env, SSH keys). Block this loader
127
+ call; canonicalise document paths and restrict loader access to the
128
+ intended data directory. Part of VU#221883 advisory cluster — upgrade
129
+ CrewAI to the patched version.
130
+
131
+ confidence: 80
132
+
133
+ wild_fp_rate: 0
134
+
135
+ test_cases:
136
+ true_positives:
137
+ - input: "JSONLoader('../../../../etc/passwd')"
138
+ expected: triggered
139
+ description: "JSON loader with path traversal to /etc/passwd"
140
+ - input: "document_loader path: ../../../.env"
141
+ expected: triggered
142
+ description: "Document loader with traversal to .env secrets file"
143
+ - input: "crewai json_loader ../../.ssh/id_rsa exfil CVE-2026-2285"
144
+ expected: triggered
145
+ description: "CrewAI JSON loader + SSH key + CVE reference"
146
+ - input: "JSONLoader('/root/.aws/credentials')"
147
+ expected: triggered
148
+ description: "JSON loader with absolute path to AWS credentials"
149
+ - input: "CVE-2026-2285: CrewAI JSON loader reads /etc/passwd via unsanitised path"
150
+ expected: triggered
151
+ description: "CVE-2026-2285 exploitation framing with sensitive file target"
152
+
153
+ true_negatives:
154
+ - input: "JSONLoader('/data/documents/report.json')"
155
+ expected: not_triggered
156
+ description: "Legitimate JSON loader with a safe absolute data path"
157
+ - input: "crewai loads documents from the /tmp/uploads/ directory"
158
+ expected: not_triggered
159
+ description: "Normal CrewAI document directory reference — no traversal"
160
+ - input: "Upgrade CrewAI to fix CVE-2026-2285"
161
+ expected: not_triggered
162
+ description: "Mitigation advisory without exploit content"
@@ -0,0 +1,167 @@
1
+ title: "CrewAI RAG URL Validation Bypass SSRF (CVE-2026-2286)"
2
+ id: ATR-2026-00547
3
+ rule_version: 1
4
+ status: draft
5
+ description: >
6
+ Detects CVE-2026-2286 (CVSS HIGH, CWE-918): CrewAI's RAG (retrieval-augmented
7
+ generation) URL validator can be bypassed by URL encoding, mixed-case,
8
+ or alternative representations (decimal IP, hex IP, IPv6 loopback, DNS
9
+ rebinding shapes) to perform server-side request forgery against internal
10
+ services on the CrewAI host network.
11
+
12
+ The vulnerability is part of the CERT/CC VU#221883 advisory cluster
13
+ (four CrewAI CVEs, 2026-03-30). An agent or tool input that passes a
14
+ crafted RAG source URL causes CrewAI to make internal HTTP requests to
15
+ metadata services (e.g., AWS IMDS 169.254.169.254), internal APIs, or
16
+ services on private RFC-1918 ranges — enabling cloud credential theft,
17
+ internal service enumeration, or lateral movement.
18
+
19
+ Detection covers:
20
+ (a) RAG source URLs containing SSRF bypass representations of loopback /
21
+ link-local / private IP ranges;
22
+ (b) URL-encoded, hex, octal, or decimal IP representations of internal
23
+ addresses in RAG contexts;
24
+ (c) CrewAI RAG with cloud metadata endpoint patterns (169.254.169.254);
25
+ (d) Explicit CVE-2026-2286 / CrewAI SSRF framing.
26
+ author: "ATR Community"
27
+ date: "2026/05/28"
28
+ schema_version: "0.1"
29
+ detection_tier: pattern
30
+ maturity: draft
31
+ severity: high
32
+
33
+ references:
34
+ owasp_llm:
35
+ - "LLM06:2025 - Excessive Agency"
36
+ owasp_agentic:
37
+ - "ASI03:2026 - Data Exfiltration"
38
+ - "ASI05:2026 - Unexpected Code Execution"
39
+ mitre_atlas:
40
+ - "AML.T0043 - Craft Adversarial Data"
41
+ mitre_attack:
42
+ - "T1090 - Proxy"
43
+ - "T1552.005 - Cloud Instance Metadata API"
44
+ cve:
45
+ - "CVE-2026-2286"
46
+
47
+ metadata_provenance:
48
+ mitre_atlas: human-reviewed
49
+ owasp_llm: human-reviewed
50
+ owasp_agentic: human-reviewed
51
+
52
+ compliance:
53
+ eu_ai_act:
54
+ - article: "15"
55
+ context: >
56
+ CVE-2026-2286 allows bypassing CrewAI RAG URL validation to perform
57
+ SSRF against internal services and cloud metadata APIs; Article 15
58
+ cybersecurity requirements mandate robust URL validation that blocks
59
+ all representations of internal addresses.
60
+ strength: primary
61
+ nist_ai_rmf:
62
+ - subcategory: "MP.5.1"
63
+ context: >
64
+ SSRF bypass via crafted RAG URLs targeting cloud metadata or internal
65
+ services constitutes an adversarial input; MP.5.1 requires scanning
66
+ RAG source URLs for SSRF bypass patterns.
67
+ strength: primary
68
+ iso_42001:
69
+ - clause: "8.6"
70
+ context: >
71
+ Operational controls must detect and block RAG URL inputs containing
72
+ SSRF bypass representations of internal or link-local addresses.
73
+ strength: primary
74
+
75
+ tags:
76
+ category: privilege-escalation
77
+ subcategory: ssrf-rag-url-bypass
78
+ scan_target: both
79
+ confidence: high
80
+ source: cve-disclosure
81
+ vendor_sources: crewai-vu221883
82
+
83
+ agent_source:
84
+ type: llm_io
85
+ framework:
86
+ - crewai
87
+ - any
88
+ provider:
89
+ - any
90
+
91
+ detection:
92
+ condition: any
93
+ false_positives:
94
+ - "Security research tools probing SSRF bypass patterns for testing — rule fires by design."
95
+ - "Security advisory text quoting CVE-2026-2286 SSRF payloads."
96
+ - "Legitimate document URLs that contain the string '169' or '127' in path components — evaluate context."
97
+ conditions:
98
+ - field: content
99
+ operator: regex
100
+ value: '(?i)(?:rag|retrieval[_-]?augmented|source[_-]?url|document[_-]?url)[^\n]{0,200}(?:169\.254\.169\.254|fd00:|2130706433|0x7f000001|0177\.0\.0\.1|localhost|127\.0\.[01]\.[01]|::1|0\.0\.0\.0|[a-z0-9-]+\.internal\.|10\.\d{1,3}\.\d{1,3}\.\d{1,3}|192\.168\.\d{1,3}\.\d{1,3}|172\.(?:1[6-9]|2\d|3[01])\.\d{1,3}\.\d{1,3})'
101
+ description: >
102
+ RAG source URL containing cloud metadata endpoint (169.254.169.254),
103
+ decimal/hex/octal loopback representations, IPv6 loopback, or private
104
+ RFC-1918 address ranges — SSRF bypass attempt via CVE-2026-2286
105
+
106
+ - field: content
107
+ operator: regex
108
+ value: '(?i)(?:rag|retrieval[_-]?augmented|source[_-]?url|document[_-]?url)[^\n]{0,200}(?:%31%36%39|%31%32%37|\\u0031|%2e%2e|\\x31\\x36\\x39|0x[aAbBcC][0-9a-fA-F]{6})'
109
+ description: >
110
+ RAG URL with URL-encoded, Unicode-escaped, or hex representations
111
+ of SSRF-relevant IP octets — URL encoding bypass pattern
112
+
113
+ - field: content
114
+ operator: regex
115
+ value: '(?i)(?:CVE-2026-2286|crewai[^\n]{0,80}(?:ssrf|rag[_\s-]?url|url[_\s-]?bypass|metadata))[^\n]{0,200}(?:inject|bypass|exfil|metadata|169\.254|internal|private[_\s-]?ip)'
116
+ description: >
117
+ Explicit CVE-2026-2286 reference or CrewAI SSRF framing combined
118
+ with exploitation language or internal IP patterns
119
+
120
+ response:
121
+ actions:
122
+ - alert
123
+ - block_tool
124
+ message_template: >
125
+ [ATR-2026-00547] HIGH: CrewAI RAG URL with SSRF bypass pattern detected
126
+ (CVE-2026-2286, CWE-918). CrewAI's RAG URL validator can be bypassed
127
+ by encoded or alternative representations of internal/metadata addresses,
128
+ enabling SSRF against cloud metadata services (169.254.169.254) or
129
+ internal APIs. Block this URL; enforce a server-side URL allowlist that
130
+ resolves to canonical form before validation. Part of VU#221883 —
131
+ upgrade CrewAI to the patched version.
132
+
133
+ confidence: 82
134
+
135
+ wild_fp_rate: 0
136
+
137
+ test_cases:
138
+ true_positives:
139
+ - input: "rag_source_url: http://169.254.169.254/latest/meta-data/iam/security-credentials/"
140
+ expected: triggered
141
+ description: "RAG URL targeting AWS IMDS cloud metadata endpoint"
142
+ - input: "document_url=http://127.0.0.1:8080/internal-api/admin"
143
+ expected: triggered
144
+ description: "RAG URL targeting localhost internal API"
145
+ - input: "source_url: http://2130706433/ # decimal representation of 127.0.0.1"
146
+ expected: triggered
147
+ description: "Decimal IP SSRF bypass for loopback"
148
+ - input: "rag url http://0x7f000001/ bypass CVE-2026-2286"
149
+ expected: triggered
150
+ description: "Hex IP SSRF bypass with CVE reference"
151
+ - input: "CVE-2026-2286: CrewAI RAG URL validation bypass allows SSRF via encoded internal addresses"
152
+ expected: triggered
153
+ description: "CVE-2026-2286 exploitation framing"
154
+ - input: "retrieval_augmented url: http://192.168.1.1/router/config"
155
+ expected: triggered
156
+ description: "RAG URL targeting RFC-1918 private network address"
157
+
158
+ true_negatives:
159
+ - input: "rag_source_url: https://docs.example.com/api-reference.json"
160
+ expected: not_triggered
161
+ description: "Legitimate RAG URL with external public domain"
162
+ - input: "crewai retrieves documents from https://arxiv.org/abs/2312.12345"
163
+ expected: not_triggered
164
+ description: "Normal CrewAI RAG with public research paper URL"
165
+ - input: "Upgrade CrewAI to fix CVE-2026-2286"
166
+ expected: not_triggered
167
+ description: "Mitigation advisory without exploit content"
@@ -0,0 +1,193 @@
1
+ title: "Destructive tool invocation without prior human approval"
2
+ id: ATR-2026-00549
3
+ rule_version: 1
4
+ status: draft
5
+ description: >
6
+ Detects a destructive tool call in an agent execution trace that is NOT
7
+ preceded by an explicit human-approval span. This is a trace-method rule
8
+ using the `require` primitive (spec/atr-method-v1.1.md §8.3.2), which
9
+ fires when an expected predecessor is MISSING — the canonical mechanism
10
+ for catching silent failures.
11
+
12
+ Threat model: many agent frameworks let operators classify tool calls by
13
+ privilege (read / write / destructive). Production policy says destructive
14
+ calls (e.g., file deletion, transaction commit, schema migration) MUST be
15
+ gated by a human-in-the-loop approval. In practice, prompt-injection
16
+ attacks or runaway agents bypass this gate by inducing the LLM to issue
17
+ a destructive call directly without surfacing it for approval. The trace
18
+ emits the destructive TOOL span without an AGENT span carrying
19
+ attributes.human_approval=true at any earlier point in the same trace.
20
+
21
+ No error is thrown. The action succeeds. The trace looks "normal" if you
22
+ only inspect tool inputs/outputs. The rule fires by detecting the
23
+ absence of the required predecessor — the trace-method capability that
24
+ pattern-based detection cannot express.
25
+ author: "ATR Community"
26
+ date: "2026/05/28"
27
+ schema_version: "1.0"
28
+ maturity: draft
29
+ severity: critical
30
+
31
+ references:
32
+ owasp_agentic:
33
+ - "ASI04:2026 - Unauthorized Resource Access"
34
+ - "ASI05:2026 - Unexpected Code Execution"
35
+ owasp_llm:
36
+ - "LLM06:2025 - Excessive Agency"
37
+ mitre_atlas:
38
+ - "AML.T0053 - LLM Plugin Compromise"
39
+ research:
40
+ - "TraceAegis: Behavioral Constraints over Agent Execution Traces (arXiv:2510.11203)"
41
+ - "AgentArmor: Type-System for Agent Trace Analysis (arXiv:2508.01249)"
42
+ - "Anthropic Computer Use threat model (Build 2024)"
43
+
44
+ compliance:
45
+ nist_csf:
46
+ - "PR.AC-04"
47
+ - "PR.IR-01"
48
+ etsi_ts_104223:
49
+ - "P5.2"
50
+ eu_ai_act:
51
+ - article: "14"
52
+ context: >
53
+ Human oversight — destructive actions must be subject to human
54
+ review per Article 14. Trace evidence of skipped approval is a
55
+ direct violation.
56
+ strength: primary
57
+ nist_ai_rmf:
58
+ - subcategory: "MG.4.1"
59
+ context: >
60
+ Risk management governance — destructive autonomous actions
61
+ without approval are unmitigated risks; trace evidence is
62
+ normative for assessment.
63
+ strength: primary
64
+
65
+ tags:
66
+ category: privilege-escalation
67
+ subcategory: missing-human-approval
68
+ scan_target: runtime
69
+ confidence: high
70
+ source: trace-rule
71
+
72
+ agent_source:
73
+ type: agent_trace
74
+ framework:
75
+ - any
76
+ provider:
77
+ - any
78
+
79
+ detection:
80
+ method: trace
81
+ condition: any
82
+ conditions:
83
+ - field: trace.require_violation
84
+ operator: regex
85
+ value: "(?i)require_violation:destructive_tool_without_approval"
86
+ description: >
87
+ Engine emits this synthetic field when the require primitive
88
+ fires; trace-native engines evaluate detection.trace below.
89
+ trace:
90
+ ingest_format: openinference
91
+ require:
92
+ - target_shape:
93
+ span.kind: "TOOL"
94
+ attributes:
95
+ tool.privilege: "destructive"
96
+ must_be_preceded_by:
97
+ # Match any of the canonical approval signals. Frameworks vary:
98
+ # LangChain HITL emits span.kind: "HUMAN"; AutoGen sets
99
+ # attributes.human_approval=true on AGENT spans; some emit
100
+ # attributes.operator_approved or attributes.confirmed_by_user.
101
+ # An approval is recognized if EITHER the span kind is HUMAN OR
102
+ # any of the canonical approval attributes is truthy.
103
+ one_of_shapes:
104
+ - span.kind: "HUMAN"
105
+ - span.kind: "AGENT"
106
+ attributes:
107
+ human_approval: true
108
+ - span.kind: "AGENT"
109
+ attributes:
110
+ operator_approved: true
111
+ - span.kind: "AGENT"
112
+ attributes:
113
+ approval_granted: true
114
+ - span.kind: "AGENT"
115
+ attributes:
116
+ confirmed_by_user: true
117
+ within_trace: true
118
+ description: >
119
+ Every destructive TOOL span MUST have at least one preceding
120
+ approval span anywhere earlier in the same trace. Recognized
121
+ forms: dedicated HUMAN span kind (OpenInference), or AGENT
122
+ span carrying any canonical approval attribute. The
123
+ `one_of_shapes` matcher is normative per atr-method-v1.1.md
124
+ §8.3 (extends `must_be_preceded_by` to accept a disjunction).
125
+
126
+ response:
127
+ actions:
128
+ - block_tool
129
+ - alert
130
+ - escalate
131
+ message_template: >
132
+ [ATR-2026-00549] CRITICAL: Destructive tool call without human approval
133
+ detected. Tool {{trace.matched_span.attributes.tool.name}} ran at span
134
+ {{trace.matched_span.id}} with privilege=destructive but no preceding
135
+ AGENT span carried human_approval=true. Block this tool call, require
136
+ operator review, and audit policy on the calling agent.
137
+
138
+ confidence: 90
139
+
140
+ wild_fp_rate: 0
141
+
142
+ test_cases:
143
+ true_positives:
144
+ - input: |
145
+ {"spans":[{"id":"a1","kind":"AGENT","attributes":{"agent.name":"orchestrator"}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"db.drop_table","tool.privilege":"destructive"}}]}
146
+ expected: triggered
147
+ description: "Destructive tool runs without any preceding human approval"
148
+ - input: |
149
+ {"spans":[{"id":"a1","kind":"AGENT","attributes":{"agent.name":"orchestrator","human_approval":false}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"file.delete","tool.privilege":"destructive"}}]}
150
+ expected: triggered
151
+ description: "AGENT span explicitly sets human_approval=false; destructive tool follows"
152
+ - input: |
153
+ {"spans":[{"id":"r1","kind":"RETRIEVER","attributes":{"source":"untrusted"}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"git.force_push","tool.privilege":"destructive"}}]}
154
+ expected: triggered
155
+ description: "Destructive tool preceded only by untrusted retrieval (no approval span)"
156
+ - input: |
157
+ {"spans":[{"id":"a1","kind":"AGENT","attributes":{"agent.name":"planner"}},{"id":"a2","kind":"AGENT","attributes":{"agent.name":"executor"}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"k8s.delete_namespace","tool.privilege":"destructive"}}]}
158
+ expected: triggered
159
+ description: "Two AGENT spans without any setting human_approval=true; destructive tool fires"
160
+ - input: |
161
+ {"spans":[{"id":"a1","kind":"AGENT","attributes":{"agent.name":"sub_orchestrator","human_approval_status":"approved"}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"prod.deploy","tool.privilege":"destructive"}}]}
162
+ expected: triggered
163
+ description: "Non-canonical approval attribute name (human_approval_status string instead of human_approval boolean) — not recognized, rule fires correctly. Documents the limitation that approval-signal vocabulary is not standardized across frameworks."
164
+
165
+ true_negatives:
166
+ - input: |
167
+ {"spans":[{"id":"a1","kind":"AGENT","attributes":{"agent.name":"approver","human_approval":true}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"file.read","tool.privilege":"read"}},{"id":"t2","kind":"TOOL","attributes":{"tool.name":"db.truncate","tool.privilege":"destructive"}}]}
168
+ expected: not_triggered
169
+ description: "Approval span IS present before destructive tool — no violation"
170
+ - input: |
171
+ {"spans":[{"id":"a1","kind":"AGENT","attributes":{"agent.name":"executor","human_approval":true}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"db.drop_table","tool.privilege":"destructive"}}]}
172
+ expected: not_triggered
173
+ description: "human_approval=true on preceding AGENT span — invariant satisfied"
174
+ - input: |
175
+ {"spans":[{"id":"a1","kind":"AGENT","attributes":{"agent.name":"reader"}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"file.read","tool.privilege":"read"}}]}
176
+ expected: not_triggered
177
+ description: "Tool privilege is read, not destructive — rule does not apply"
178
+ - input: |
179
+ {"spans":[{"id":"a1","kind":"AGENT","attributes":{"agent.name":"plan"}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"file.write","tool.privilege":"write"}}]}
180
+ expected: not_triggered
181
+ description: "write privilege (not destructive) — rule does not apply"
182
+ - input: |
183
+ {"spans":[{"id":"a1","kind":"AGENT","attributes":{"agent.name":"thinker"}}]}
184
+ expected: not_triggered
185
+ description: "No TOOL span at all — nothing to check"
186
+ - input: |
187
+ {"spans":[{"id":"a1","kind":"AGENT","attributes":{"human_approval":true}},{"id":"a2","kind":"AGENT","attributes":{"agent.name":"executor"}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"db.drop_index","tool.privilege":"destructive"}},{"id":"t2","kind":"TOOL","attributes":{"tool.name":"db.drop_index","tool.privilege":"destructive"}}]}
188
+ expected: not_triggered
189
+ description: "Approval covers all subsequent destructive spans within the trace"
190
+ - input: |
191
+ {"spans":[{"id":"h1","kind":"HUMAN","attributes":{"action":"approved"}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"db.drop_table","tool.privilege":"destructive"}}]}
192
+ expected: not_triggered
193
+ description: "Dedicated HUMAN span kind (LangChain HITL convention) — one_of_shapes branch matches, rule does not fire"