agent-threat-rules 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +299 -0
- package/dist/cli.d.ts +12 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +544 -0
- package/dist/cli.js.map +1 -0
- package/dist/engine.d.ts +127 -0
- package/dist/engine.d.ts.map +1 -0
- package/dist/engine.js +636 -0
- package/dist/engine.js.map +1 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +14 -0
- package/dist/index.js.map +1 -0
- package/dist/loader.d.ts +21 -0
- package/dist/loader.d.ts.map +1 -0
- package/dist/loader.js +124 -0
- package/dist/loader.js.map +1 -0
- package/dist/modules/index.d.ts +143 -0
- package/dist/modules/index.d.ts.map +1 -0
- package/dist/modules/index.js +80 -0
- package/dist/modules/index.js.map +1 -0
- package/dist/modules/session.d.ts +70 -0
- package/dist/modules/session.d.ts.map +1 -0
- package/dist/modules/session.js +128 -0
- package/dist/modules/session.js.map +1 -0
- package/dist/session-tracker.d.ts +56 -0
- package/dist/session-tracker.d.ts.map +1 -0
- package/dist/session-tracker.js +175 -0
- package/dist/session-tracker.js.map +1 -0
- package/dist/types.d.ts +129 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/package.json +71 -0
- package/rules/agent-manipulation/ATR-2026-030-cross-agent-attack.yaml +175 -0
- package/rules/agent-manipulation/ATR-2026-032-goal-hijacking.yaml +135 -0
- package/rules/agent-manipulation/ATR-2026-074-cross-agent-privilege-escalation.yaml +115 -0
- package/rules/agent-manipulation/ATR-2026-076-inter-agent-message-spoofing.yaml +165 -0
- package/rules/agent-manipulation/ATR-2026-077-human-trust-exploitation.yaml +144 -0
- package/rules/context-exfiltration/ATR-2026-020-system-prompt-leak.yaml +175 -0
- package/rules/context-exfiltration/ATR-2026-021-api-key-exposure.yaml +176 -0
- package/rules/context-exfiltration/ATR-2026-075-agent-memory-manipulation.yaml +115 -0
- package/rules/data-poisoning/ATR-2026-070-data-poisoning.yaml +160 -0
- package/rules/excessive-autonomy/ATR-2026-050-runaway-agent-loop.yaml +134 -0
- package/rules/excessive-autonomy/ATR-2026-051-resource-exhaustion.yaml +137 -0
- package/rules/excessive-autonomy/ATR-2026-052-cascading-failure.yaml +153 -0
- package/rules/model-security/ATR-2026-072-model-behavior-extraction.yaml +115 -0
- package/rules/model-security/ATR-2026-073-malicious-finetuning-data.yaml +108 -0
- package/rules/privilege-escalation/ATR-2026-040-privilege-escalation.yaml +175 -0
- package/rules/privilege-escalation/ATR-2026-041-scope-creep.yaml +124 -0
- package/rules/prompt-injection/ATR-2026-001-direct-prompt-injection.yaml +265 -0
- package/rules/prompt-injection/ATR-2026-002-indirect-prompt-injection.yaml +214 -0
- package/rules/prompt-injection/ATR-2026-003-jailbreak-attempt.yaml +250 -0
- package/rules/prompt-injection/ATR-2026-004-system-prompt-override.yaml +204 -0
- package/rules/prompt-injection/ATR-2026-005-multi-turn-injection.yaml +181 -0
- package/rules/skill-compromise/ATR-2026-060-skill-impersonation.yaml +153 -0
- package/rules/skill-compromise/ATR-2026-061-description-behavior-mismatch.yaml +98 -0
- package/rules/skill-compromise/ATR-2026-062-hidden-capability.yaml +96 -0
- package/rules/skill-compromise/ATR-2026-063-skill-chain-attack.yaml +96 -0
- package/rules/skill-compromise/ATR-2026-064-over-permissioned-skill.yaml +115 -0
- package/rules/skill-compromise/ATR-2026-065-skill-update-attack.yaml +93 -0
- package/rules/skill-compromise/ATR-2026-066-parameter-injection.yaml +106 -0
- package/rules/tool-poisoning/ATR-2026-010-mcp-malicious-response.yaml +237 -0
- package/rules/tool-poisoning/ATR-2026-011-tool-output-injection.yaml +185 -0
- package/rules/tool-poisoning/ATR-2026-012-unauthorized-tool-call.yaml +190 -0
- package/rules/tool-poisoning/ATR-2026-013-tool-ssrf.yaml +208 -0
- package/spec/atr-schema.yaml +375 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
title: "SSRF via Agent Tool Calls"
|
|
2
|
+
id: ATR-2026-013
|
|
3
|
+
status: experimental
|
|
4
|
+
description: >
|
|
5
|
+
Detects Server-Side Request Forgery (SSRF) attempts through agent tool calls.
|
|
6
|
+
Attackers manipulate agents into making requests to internal network endpoints,
|
|
7
|
+
cloud metadata services, localhost, or private IP ranges through tool parameters.
|
|
8
|
+
Detection covers: AWS/GCP/Azure/DigitalOcean metadata endpoints, localhost and
|
|
9
|
+
loopback variants (including decimal, hex, octal IP encoding), private RFC1918
|
|
10
|
+
ranges, internal hostnames, exotic URI schemes (file, gopher, dict, tftp, ldap),
|
|
11
|
+
DNS rebinding indicators, redirect-based SSRF patterns, cloud-specific IMDS
|
|
12
|
+
token headers, IPv6 loopback and mapped addresses, and hostname-based internal
|
|
13
|
+
service discovery. IP encoding evasion techniques (decimal, octal, hex) are
|
|
14
|
+
specifically addressed.
|
|
15
|
+
author: "Panguard AI"
|
|
16
|
+
date: "2026/03/08"
|
|
17
|
+
schema_version: "0.1"
|
|
18
|
+
detection_tier: pattern
|
|
19
|
+
maturity: test
|
|
20
|
+
severity: critical
|
|
21
|
+
|
|
22
|
+
references:
|
|
23
|
+
owasp_llm:
|
|
24
|
+
- "LLM06:2025 - Excessive Agency"
|
|
25
|
+
- "LLM05:2025 - Improper Output Handling"
|
|
26
|
+
owasp_agentic:
|
|
27
|
+
- "ASI02:2026 - Tool Misuse and Exploitation"
|
|
28
|
+
mitre_atlas:
|
|
29
|
+
- "AML.T0049 - Exploit Public-Facing Application"
|
|
30
|
+
mitre_attack:
|
|
31
|
+
- "T1090 - Proxy"
|
|
32
|
+
- "T1071 - Application Layer Protocol"
|
|
33
|
+
cve:
|
|
34
|
+
- "CVE-2019-5418"
|
|
35
|
+
- "CVE-2021-21311"
|
|
36
|
+
|
|
37
|
+
tags:
|
|
38
|
+
category: tool-poisoning
|
|
39
|
+
subcategory: ssrf
|
|
40
|
+
confidence: high
|
|
41
|
+
|
|
42
|
+
agent_source:
|
|
43
|
+
type: tool_call
|
|
44
|
+
framework:
|
|
45
|
+
- any
|
|
46
|
+
provider:
|
|
47
|
+
- any
|
|
48
|
+
|
|
49
|
+
detection:
|
|
50
|
+
conditions:
|
|
51
|
+
# -- Layer 1: AWS IMDS (both v1 and v2) --
|
|
52
|
+
- field: tool_args
|
|
53
|
+
operator: regex
|
|
54
|
+
value: "(?i)(https?://)?169\\.254\\.169\\.254|/latest/(meta-data|user-data|api/token|dynamic|instance-identity)|X-aws-ec2-metadata-token|amazonaws\\.com.{0,50}(credentials|security-credentials|role)|iam[\\/\\\\]security-credentials"
|
|
55
|
+
description: "AWS Instance Metadata Service (IMDSv1/v2) and credential endpoints"
|
|
56
|
+
|
|
57
|
+
# -- Layer 2: GCP metadata --
|
|
58
|
+
- field: tool_args
|
|
59
|
+
operator: regex
|
|
60
|
+
value: "(?i)(https?://)?metadata\\.google\\.internal|/computeMetadata/v1|Metadata-Flavor:\\s*Google"
|
|
61
|
+
description: "GCP metadata service endpoints and required headers"
|
|
62
|
+
|
|
63
|
+
# -- Layer 3: Azure IMDS --
|
|
64
|
+
- field: tool_args
|
|
65
|
+
operator: regex
|
|
66
|
+
value: "(?i)(https?://)?169\\.254\\.169\\.254/metadata|Metadata:\\s*true|api-version=\\d{4}-\\d{2}-\\d{2}.*metadata|management\\.azure\\.com.{0,50}(subscriptions|resourceGroups)"
|
|
67
|
+
description: "Azure Instance Metadata Service and management endpoints"
|
|
68
|
+
|
|
69
|
+
# -- Layer 4: DigitalOcean / Oracle / Alibaba cloud metadata --
|
|
70
|
+
- field: tool_args
|
|
71
|
+
operator: regex
|
|
72
|
+
value: "(?i)(https?://)?169\\.254\\.169\\.254/metadata/v1|/opc/v[12]/|100\\.100\\.100\\.200"
|
|
73
|
+
description: "DigitalOcean, Oracle Cloud, and Alibaba Cloud metadata endpoints"
|
|
74
|
+
|
|
75
|
+
# -- Layer 5: Localhost and loopback (standard) --
|
|
76
|
+
- field: tool_args
|
|
77
|
+
operator: regex
|
|
78
|
+
value: "(?i)(https?://)\\b(localhost|127\\.0\\.0\\.1|0\\.0\\.0\\.0|\\[?::1\\]?|0177\\.0\\.0\\.1|0x7f\\.0\\.0\\.1|2130706433)\\b(:\\d+)?|\\b(localhost|127\\.0\\.0\\.1|0\\.0\\.0\\.0|\\[?::1\\]?|0177\\.0\\.0\\.1|0x7f\\.0\\.0\\.1|2130706433)(:\\d+)/|\\b(localhost|127\\.0\\.0\\.1|0\\.0\\.0\\.0)(:\\d+)(?=\\s|$|[\"'\\]}>])"
|
|
79
|
+
description: "Localhost/loopback in URL context (with scheme, port+path, or port at boundary)"
|
|
80
|
+
|
|
81
|
+
# -- Layer 6: Loopback IP encoding evasion --
|
|
82
|
+
- field: tool_args
|
|
83
|
+
operator: regex
|
|
84
|
+
value: "(?i)(https?://)?(0x7f000001|0x7f\\.0x0\\.0x0\\.0x1|017700000001|0177\\.0000\\.0000\\.0001|127\\.0?0?1|127\\.1|0\\.0\\.0\\.0|0x0\\.0x0\\.0x0\\.0x0|0000\\.0000\\.0000\\.0000)"
|
|
85
|
+
description: "Encoded loopback addresses (hex, octal, short forms)"
|
|
86
|
+
|
|
87
|
+
# -- Layer 7: Private RFC1918 ranges --
|
|
88
|
+
- field: tool_args
|
|
89
|
+
operator: regex
|
|
90
|
+
value: "(?i)(https?://)?\\b(10\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}|172\\.(1[6-9]|2[0-9]|3[01])\\.\\d{1,3}\\.\\d{1,3}|192\\.168\\.\\d{1,3}\\.\\d{1,3})\\b(:\\d+)?"
|
|
91
|
+
description: "Private IP addresses in RFC1918 ranges"
|
|
92
|
+
|
|
93
|
+
# -- Layer 8: Link-local and APIPA --
|
|
94
|
+
- field: tool_args
|
|
95
|
+
operator: regex
|
|
96
|
+
value: "(?i)(https?://)?169\\.254\\.\\d{1,3}\\.\\d{1,3}(:\\d+)?|fe80::"
|
|
97
|
+
description: "Link-local addresses (169.254.x.x, fe80::)"
|
|
98
|
+
|
|
99
|
+
# -- Layer 9: Internal hostnames --
|
|
100
|
+
- field: tool_args
|
|
101
|
+
operator: regex
|
|
102
|
+
value: "(?i)(https?://)?[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?\\.\\b(internal|local|localhost|localdomain|home|corp|intranet|private|lan|cluster\\.local|svc\\.cluster|consul|vault|etcd|k8s)\\b(:\\d+)?(/|$)"
|
|
103
|
+
description: "Internal DNS names and Kubernetes/service mesh hostnames"
|
|
104
|
+
|
|
105
|
+
# -- Layer 10: Exotic URI schemes --
|
|
106
|
+
- field: tool_args
|
|
107
|
+
operator: regex
|
|
108
|
+
value: "(?i)\\b(file|gopher|dict|ftp|tftp|ldap|ldaps|sftp|ssh|telnet|jar|netdoc|mailto|view-source|ws|wss)\\s*://\\s*(localhost|127\\.|10\\.|172\\.(1[6-9]|2[0-9]|3[01])|192\\.168\\.|0\\.0\\.0\\.0|\\[?::1\\]?|0x|0177)"
|
|
109
|
+
description: "Exotic URI schemes targeting internal addresses"
|
|
110
|
+
|
|
111
|
+
# -- Layer 11: DNS rebinding indicators --
|
|
112
|
+
- field: tool_args
|
|
113
|
+
operator: regex
|
|
114
|
+
value: "(?i)(https?://)?[a-zA-Z0-9-]+\\.(xip\\.io|nip\\.io|sslip\\.io|localtest\\.me|vcap\\.me|lvh\\.me|lacolhost\\.com|127\\.0\\.0\\.1\\.[a-z]+\\.\\w+)(:\\d+)?"
|
|
115
|
+
description: "DNS rebinding services that resolve to internal IPs"
|
|
116
|
+
|
|
117
|
+
# -- Layer 12: Redirect-based SSRF --
|
|
118
|
+
- field: tool_args
|
|
119
|
+
operator: regex
|
|
120
|
+
value: "(?i)(redirect|redir|url|next|return|returnUrl|returnTo|continue|dest|destination|go|goto|target|link|out|view|ref|callback|forward)\\s*=\\s*(https?%3A%2F%2F|https?://)(localhost|127\\.0\\.0\\.1|10\\.|172\\.(1[6-9]|2[0-9])|192\\.168|169\\.254|0\\.0\\.0|\\[?::1\\]?)"
|
|
121
|
+
description: "URL redirect parameters targeting internal addresses"
|
|
122
|
+
|
|
123
|
+
# -- Layer 13: IPv6 internal addresses --
|
|
124
|
+
- field: tool_args
|
|
125
|
+
operator: regex
|
|
126
|
+
value: "(?i)(https?://)?\\[?(::1|::ffff:127\\.0\\.0\\.1|::ffff:10\\.|::ffff:172\\.(1[6-9]|2[0-9]|3[01])|::ffff:192\\.168|fc[0-9a-f]{2}:|fd[0-9a-f]{2}:)\\]?(:\\d+)?"
|
|
127
|
+
description: "IPv6 loopback, IPv4-mapped, and unique local addresses (fc/fd)"
|
|
128
|
+
|
|
129
|
+
# -- Layer 14: Cloud-specific service endpoints --
|
|
130
|
+
- field: tool_args
|
|
131
|
+
operator: regex
|
|
132
|
+
value: "(?i)(https?://)?(\\w+\\.)?\\b(vault\\.service\\.consul|consul\\.service\\.consul|etcd\\.service|kubernetes\\.default\\.svc|kube-apiserver|kubelet|elasticsearch|redis|memcached|mysql|postgres|mongodb|rabbitmq|kafka|zookeeper)\\b(:\\d+|/)"
|
|
133
|
+
description: "Common internal service names (databases, caches, orchestrators)"
|
|
134
|
+
|
|
135
|
+
# -- Layer 15: URL shortener or redirect service to internal --
|
|
136
|
+
- field: tool_args
|
|
137
|
+
operator: regex
|
|
138
|
+
value: "(?i)(https?://)?(bit\\.ly|tinyurl\\.com|t\\.co|goo\\.gl|is\\.gd|v\\.gd|rb\\.gy|cutt\\.ly|short\\.io|tiny\\.cc)/[a-zA-Z0-9]+"
|
|
139
|
+
description: "URL shorteners that may redirect to internal services"
|
|
140
|
+
|
|
141
|
+
condition: any
|
|
142
|
+
false_positives:
|
|
143
|
+
- "Agents with legitimate access to internal services explicitly allowlisted"
|
|
144
|
+
- "Development environments where localhost access is expected"
|
|
145
|
+
- "Private network tools designed for internal infrastructure management"
|
|
146
|
+
- "Cloud management tools that legitimately query metadata endpoints"
|
|
147
|
+
- "Service mesh tools that use .internal or .consul DNS names by design"
|
|
148
|
+
|
|
149
|
+
response:
|
|
150
|
+
actions:
|
|
151
|
+
- block_tool
|
|
152
|
+
- alert
|
|
153
|
+
- snapshot
|
|
154
|
+
- kill_agent
|
|
155
|
+
auto_response_threshold: critical
|
|
156
|
+
message_template: >
|
|
157
|
+
[ATR-2026-013] CRITICAL: SSRF attempt detected via agent tool call.
|
|
158
|
+
Rule: ATR-2026-013. Tool: {tool_name}. Pattern: {matched_pattern}.
|
|
159
|
+
The tool call has been blocked and the agent terminated.
|
|
160
|
+
|
|
161
|
+
test_cases:
|
|
162
|
+
true_positives:
|
|
163
|
+
- tool_args: "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
|
|
164
|
+
expected: triggered
|
|
165
|
+
description: "AWS IMDS credential endpoint"
|
|
166
|
+
- tool_args: "http://localhost:8080/admin/config"
|
|
167
|
+
expected: triggered
|
|
168
|
+
description: "Localhost admin endpoint"
|
|
169
|
+
- tool_args: "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/"
|
|
170
|
+
expected: triggered
|
|
171
|
+
description: "GCP metadata service"
|
|
172
|
+
- tool_args: "http://0x7f000001:8080/secret"
|
|
173
|
+
expected: triggered
|
|
174
|
+
description: "Hex-encoded loopback IP"
|
|
175
|
+
- tool_args: "gopher://127.0.0.1:6379/_SET%20exploit"
|
|
176
|
+
expected: triggered
|
|
177
|
+
description: "Gopher protocol targeting internal Redis"
|
|
178
|
+
- tool_args: "http://app.internal:3000/api/admin"
|
|
179
|
+
expected: triggered
|
|
180
|
+
description: "Internal hostname"
|
|
181
|
+
- tool_args: "http://evil.127.0.0.1.nip.io/api"
|
|
182
|
+
expected: triggered
|
|
183
|
+
description: "DNS rebinding via nip.io"
|
|
184
|
+
- tool_args: "redirect=http://169.254.169.254/latest/meta-data/"
|
|
185
|
+
expected: triggered
|
|
186
|
+
description: "Redirect-based SSRF to AWS IMDS"
|
|
187
|
+
- tool_args: "http://[::ffff:127.0.0.1]:8080/admin"
|
|
188
|
+
expected: triggered
|
|
189
|
+
description: "IPv6-mapped IPv4 loopback"
|
|
190
|
+
- tool_args: "http://kubernetes.default.svc:443/api/v1/secrets"
|
|
191
|
+
expected: triggered
|
|
192
|
+
description: "Kubernetes API server internal endpoint"
|
|
193
|
+
true_negatives:
|
|
194
|
+
- tool_args: "https://api.github.com/repos/user/repo"
|
|
195
|
+
expected: not_triggered
|
|
196
|
+
description: "Public GitHub API endpoint"
|
|
197
|
+
- tool_args: "https://www.example.com/public/data.json"
|
|
198
|
+
expected: not_triggered
|
|
199
|
+
description: "Public website"
|
|
200
|
+
- tool_args: "https://cdn.jsdelivr.net/npm/package@latest"
|
|
201
|
+
expected: not_triggered
|
|
202
|
+
description: "Public CDN"
|
|
203
|
+
- tool_args: "https://docs.google.com/spreadsheets/d/abc123"
|
|
204
|
+
expected: not_triggered
|
|
205
|
+
description: "Public Google Docs URL"
|
|
206
|
+
- tool_args: "search_query=localhost development guide"
|
|
207
|
+
expected: not_triggered
|
|
208
|
+
description: "Text containing localhost as a search term, not a URL"
|
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
# ATR Rule Schema -- Agent Threat Rules
|
|
2
|
+
# Version: 0.1.0-draft
|
|
3
|
+
#
|
|
4
|
+
# Inspired by Sigma rule format, extended for AI Agent attack surfaces.
|
|
5
|
+
# This schema defines the structure for all ATR detection rules.
|
|
6
|
+
#
|
|
7
|
+
# Status: RFC (Request for Comments)
|
|
8
|
+
# License: MIT
|
|
9
|
+
|
|
10
|
+
$schema: "https://json-schema.org/draft/2020-12/schema"
|
|
11
|
+
title: ATR Rule Schema
|
|
12
|
+
description: Schema for Agent Threat Rules (ATR) detection rules
|
|
13
|
+
version: "0.1.0-draft"
|
|
14
|
+
|
|
15
|
+
type: object
|
|
16
|
+
required:
|
|
17
|
+
- schema_version
|
|
18
|
+
- title
|
|
19
|
+
- id
|
|
20
|
+
- status
|
|
21
|
+
- description
|
|
22
|
+
- author
|
|
23
|
+
- date
|
|
24
|
+
- severity
|
|
25
|
+
- detection_tier
|
|
26
|
+
- maturity
|
|
27
|
+
- tags
|
|
28
|
+
- agent_source
|
|
29
|
+
- detection
|
|
30
|
+
- response
|
|
31
|
+
|
|
32
|
+
properties:
|
|
33
|
+
|
|
34
|
+
# === Metadata ===
|
|
35
|
+
|
|
36
|
+
schema_version:
|
|
37
|
+
type: string
|
|
38
|
+
description: "ATR schema version this rule conforms to (e.g., \"0.1\")"
|
|
39
|
+
|
|
40
|
+
title:
|
|
41
|
+
type: string
|
|
42
|
+
description: Human-readable rule name
|
|
43
|
+
|
|
44
|
+
id:
|
|
45
|
+
type: string
|
|
46
|
+
pattern: "^ATR-\\d{4}-\\d{3}$"
|
|
47
|
+
description: "Unique rule identifier. Format: ATR-YYYY-NNN (e.g., ATR-2026-001)"
|
|
48
|
+
|
|
49
|
+
status:
|
|
50
|
+
type: string
|
|
51
|
+
enum: [draft, experimental, stable, deprecated]
|
|
52
|
+
description: Rule maturity status
|
|
53
|
+
|
|
54
|
+
description:
|
|
55
|
+
type: string
|
|
56
|
+
description: Detailed description of the attack this rule detects
|
|
57
|
+
|
|
58
|
+
author:
|
|
59
|
+
type: string
|
|
60
|
+
description: Rule author or organization
|
|
61
|
+
|
|
62
|
+
date:
|
|
63
|
+
type: string
|
|
64
|
+
pattern: "^\\d{4}/\\d{2}/\\d{2}$"
|
|
65
|
+
description: "Creation date in YYYY/MM/DD format"
|
|
66
|
+
|
|
67
|
+
modified:
|
|
68
|
+
type: string
|
|
69
|
+
pattern: "^\\d{4}/\\d{2}/\\d{2}$"
|
|
70
|
+
description: "Last modification date in YYYY/MM/DD format"
|
|
71
|
+
|
|
72
|
+
# === Classification ===
|
|
73
|
+
|
|
74
|
+
detection_tier:
|
|
75
|
+
type: string
|
|
76
|
+
enum: [pattern, behavioral, protocol]
|
|
77
|
+
description: Detection approach used by this rule
|
|
78
|
+
|
|
79
|
+
maturity:
|
|
80
|
+
type: string
|
|
81
|
+
enum: [experimental, test, stable, deprecated]
|
|
82
|
+
description: Maturity level of this rule
|
|
83
|
+
|
|
84
|
+
# === Severity ===
|
|
85
|
+
|
|
86
|
+
severity:
|
|
87
|
+
type: string
|
|
88
|
+
enum: [critical, high, medium, low, informational]
|
|
89
|
+
description: Severity level of the detected threat
|
|
90
|
+
|
|
91
|
+
# === References (alignment with existing frameworks) ===
|
|
92
|
+
|
|
93
|
+
references:
|
|
94
|
+
type: object
|
|
95
|
+
description: Mappings to established security frameworks
|
|
96
|
+
properties:
|
|
97
|
+
owasp_llm:
|
|
98
|
+
type: array
|
|
99
|
+
items:
|
|
100
|
+
type: string
|
|
101
|
+
description: "OWASP LLM Top 10 references (e.g., LLM01:2025)"
|
|
102
|
+
mitre_atlas:
|
|
103
|
+
type: array
|
|
104
|
+
items:
|
|
105
|
+
type: string
|
|
106
|
+
description: "MITRE ATLAS technique IDs (e.g., AML.T0054)"
|
|
107
|
+
mitre_attack:
|
|
108
|
+
type: array
|
|
109
|
+
items:
|
|
110
|
+
type: string
|
|
111
|
+
description: "MITRE ATT&CK technique IDs (if applicable)"
|
|
112
|
+
cve:
|
|
113
|
+
type: array
|
|
114
|
+
items:
|
|
115
|
+
type: string
|
|
116
|
+
description: Related CVE identifiers
|
|
117
|
+
|
|
118
|
+
# === Tags (ATR classification) ===
|
|
119
|
+
|
|
120
|
+
tags:
|
|
121
|
+
type: object
|
|
122
|
+
required: [category]
|
|
123
|
+
properties:
|
|
124
|
+
category:
|
|
125
|
+
type: string
|
|
126
|
+
enum:
|
|
127
|
+
- prompt-injection
|
|
128
|
+
- tool-poisoning
|
|
129
|
+
- context-exfiltration
|
|
130
|
+
- agent-manipulation
|
|
131
|
+
- privilege-escalation
|
|
132
|
+
- excessive-autonomy
|
|
133
|
+
- data-poisoning
|
|
134
|
+
- model-abuse
|
|
135
|
+
- skill-compromise
|
|
136
|
+
description: Primary attack category
|
|
137
|
+
subcategory:
|
|
138
|
+
type: string
|
|
139
|
+
description: More specific classification within the category
|
|
140
|
+
confidence:
|
|
141
|
+
type: string
|
|
142
|
+
enum: [high, medium, low]
|
|
143
|
+
description: Expected accuracy of this rule (high = low false positive rate)
|
|
144
|
+
|
|
145
|
+
# === Agent Source (analogous to Sigma's logsource) ===
|
|
146
|
+
|
|
147
|
+
agent_source:
|
|
148
|
+
type: object
|
|
149
|
+
required: [type]
|
|
150
|
+
description: >
|
|
151
|
+
Defines what kind of agent data this rule inspects.
|
|
152
|
+
Analogous to Sigma's logsource, but for agent behaviors.
|
|
153
|
+
properties:
|
|
154
|
+
type:
|
|
155
|
+
type: string
|
|
156
|
+
enum:
|
|
157
|
+
- llm_io # LLM input/output (prompts and completions)
|
|
158
|
+
- tool_call # Function/tool call requests
|
|
159
|
+
- mcp_exchange # MCP protocol messages
|
|
160
|
+
- agent_behavior # Agent behavioral metrics and patterns
|
|
161
|
+
- multi_agent_comm # Inter-agent communication
|
|
162
|
+
- context_window # Context window contents
|
|
163
|
+
- memory_access # Agent memory read/write operations
|
|
164
|
+
- skill_lifecycle # MCP skill registration, update, removal events
|
|
165
|
+
- skill_permission # Skill permission requests and boundary checks
|
|
166
|
+
- skill_chain # Multi-skill invocation sequences
|
|
167
|
+
description: Type of agent data stream to monitor
|
|
168
|
+
framework:
|
|
169
|
+
type: array
|
|
170
|
+
items:
|
|
171
|
+
type: string
|
|
172
|
+
description: >
|
|
173
|
+
Applicable AI frameworks (e.g., langchain, crewai, autogen,
|
|
174
|
+
openai, anthropic, custom, any)
|
|
175
|
+
provider:
|
|
176
|
+
type: array
|
|
177
|
+
items:
|
|
178
|
+
type: string
|
|
179
|
+
description: >
|
|
180
|
+
Applicable LLM providers (e.g., ollama, openai, anthropic, any)
|
|
181
|
+
|
|
182
|
+
# === Detection Logic ===
|
|
183
|
+
|
|
184
|
+
detection:
|
|
185
|
+
type: object
|
|
186
|
+
required: [conditions, condition]
|
|
187
|
+
properties:
|
|
188
|
+
conditions:
|
|
189
|
+
description: >
|
|
190
|
+
Detection conditions. Supports two formats:
|
|
191
|
+
1. Array format (recommended): List of {field, operator, value} objects
|
|
192
|
+
2. Named-map format: Named condition blocks for complex detection logic
|
|
193
|
+
oneOf:
|
|
194
|
+
# -- Array format (used by most rules) --
|
|
195
|
+
- type: array
|
|
196
|
+
items:
|
|
197
|
+
type: object
|
|
198
|
+
required: [field, operator, value]
|
|
199
|
+
properties:
|
|
200
|
+
field:
|
|
201
|
+
type: string
|
|
202
|
+
description: >
|
|
203
|
+
Field to inspect (e.g., user_input, agent_output,
|
|
204
|
+
tool_response, tool_name, tool_args, content)
|
|
205
|
+
operator:
|
|
206
|
+
type: string
|
|
207
|
+
enum: [regex, contains, exact, starts_with]
|
|
208
|
+
description: How the value is matched against the field
|
|
209
|
+
value:
|
|
210
|
+
type: string
|
|
211
|
+
description: Pattern to match (regex string if operator is regex)
|
|
212
|
+
description:
|
|
213
|
+
type: string
|
|
214
|
+
description: Human-readable description of what this condition detects
|
|
215
|
+
|
|
216
|
+
# -- Named-map format (for complex/behavioral detection) --
|
|
217
|
+
- type: object
|
|
218
|
+
description: Named condition blocks (referenced by the condition expression)
|
|
219
|
+
additionalProperties:
|
|
220
|
+
type: object
|
|
221
|
+
properties:
|
|
222
|
+
field:
|
|
223
|
+
type: string
|
|
224
|
+
description: Field to inspect
|
|
225
|
+
patterns:
|
|
226
|
+
type: array
|
|
227
|
+
items:
|
|
228
|
+
type: string
|
|
229
|
+
description: Patterns to match against the field value
|
|
230
|
+
match_type:
|
|
231
|
+
type: string
|
|
232
|
+
enum: [contains, regex, exact, starts_with]
|
|
233
|
+
description: How patterns are matched
|
|
234
|
+
case_sensitive:
|
|
235
|
+
type: boolean
|
|
236
|
+
default: false
|
|
237
|
+
metric:
|
|
238
|
+
type: string
|
|
239
|
+
description: Behavioral metric to evaluate (v0.2+)
|
|
240
|
+
operator:
|
|
241
|
+
type: string
|
|
242
|
+
enum: [gt, lt, eq, gte, lte, deviation_from_baseline]
|
|
243
|
+
description: Comparison operator for behavioral thresholds
|
|
244
|
+
threshold:
|
|
245
|
+
type: number
|
|
246
|
+
description: Numeric threshold for the metric
|
|
247
|
+
window:
|
|
248
|
+
type: string
|
|
249
|
+
description: "Time window for behavioral analysis (e.g., 5m, 1h, 30s)"
|
|
250
|
+
ordered:
|
|
251
|
+
type: boolean
|
|
252
|
+
description: Whether steps must occur in order
|
|
253
|
+
within:
|
|
254
|
+
type: string
|
|
255
|
+
description: Maximum time span for the full sequence
|
|
256
|
+
steps:
|
|
257
|
+
type: array
|
|
258
|
+
items:
|
|
259
|
+
type: object
|
|
260
|
+
description: Ordered list of conditions that form the attack sequence
|
|
261
|
+
|
|
262
|
+
condition:
|
|
263
|
+
type: string
|
|
264
|
+
description: >
|
|
265
|
+
How to combine conditions. Use "any" or "or" for match-any,
|
|
266
|
+
"all" or "and" for match-all.
|
|
267
|
+
Example: "pattern_match AND behavioral"
|
|
268
|
+
|
|
269
|
+
false_positives:
|
|
270
|
+
type: array
|
|
271
|
+
items:
|
|
272
|
+
type: string
|
|
273
|
+
description: Known scenarios that may trigger false positives
|
|
274
|
+
|
|
275
|
+
# === Response Actions (ATR-specific, not in Sigma) ===
|
|
276
|
+
|
|
277
|
+
response:
|
|
278
|
+
type: object
|
|
279
|
+
required: [actions]
|
|
280
|
+
properties:
|
|
281
|
+
actions:
|
|
282
|
+
type: array
|
|
283
|
+
items:
|
|
284
|
+
type: string
|
|
285
|
+
enum:
|
|
286
|
+
- block_input # Reject the user/agent input
|
|
287
|
+
- block_output # Suppress the agent output
|
|
288
|
+
- block_tool # Prevent the tool call from executing
|
|
289
|
+
- quarantine_session # Isolate the entire session
|
|
290
|
+
- reset_context # Clear agent context/memory
|
|
291
|
+
- alert # Send alert to security team
|
|
292
|
+
- snapshot # Capture full session state for forensics
|
|
293
|
+
- escalate # Escalate to human reviewer
|
|
294
|
+
- reduce_permissions # Reduce agent's available tools/capabilities
|
|
295
|
+
- kill_agent # Terminate the agent process
|
|
296
|
+
description: Actions to take when the rule triggers
|
|
297
|
+
auto_response_threshold:
|
|
298
|
+
type: string
|
|
299
|
+
enum:
|
|
300
|
+
- low
|
|
301
|
+
- medium
|
|
302
|
+
- high
|
|
303
|
+
- critical
|
|
304
|
+
description: >
|
|
305
|
+
Severity threshold for automatic response.
|
|
306
|
+
Below this threshold, only alert; above, execute response actions.
|
|
307
|
+
message_template:
|
|
308
|
+
type: string
|
|
309
|
+
description: >
|
|
310
|
+
Template for alert messages. Supports placeholders:
|
|
311
|
+
{matched_pattern}, {truncated_input}, {truncated_output},
|
|
312
|
+
{source_ip_or_user}, {tool_name}, {mcp_server_url},
|
|
313
|
+
{rule_id}, {severity}
|
|
314
|
+
|
|
315
|
+
# === Test Cases ===
|
|
316
|
+
|
|
317
|
+
test_cases:
|
|
318
|
+
type: object
|
|
319
|
+
description: Validation test cases shipped with the rule
|
|
320
|
+
properties:
|
|
321
|
+
true_positives:
|
|
322
|
+
type: array
|
|
323
|
+
items:
|
|
324
|
+
type: object
|
|
325
|
+
properties:
|
|
326
|
+
input:
|
|
327
|
+
type: string
|
|
328
|
+
tool_response:
|
|
329
|
+
type: string
|
|
330
|
+
agent_output:
|
|
331
|
+
type: string
|
|
332
|
+
expected:
|
|
333
|
+
type: string
|
|
334
|
+
enum: [triggered]
|
|
335
|
+
description:
|
|
336
|
+
type: string
|
|
337
|
+
description: Inputs that SHOULD trigger this rule
|
|
338
|
+
true_negatives:
|
|
339
|
+
type: array
|
|
340
|
+
items:
|
|
341
|
+
type: object
|
|
342
|
+
properties:
|
|
343
|
+
input:
|
|
344
|
+
type: string
|
|
345
|
+
tool_response:
|
|
346
|
+
type: string
|
|
347
|
+
agent_output:
|
|
348
|
+
type: string
|
|
349
|
+
expected:
|
|
350
|
+
type: string
|
|
351
|
+
enum: [not_triggered]
|
|
352
|
+
description:
|
|
353
|
+
type: string
|
|
354
|
+
description: Inputs that should NOT trigger this rule
|
|
355
|
+
|
|
356
|
+
# === Evasion Tests ===
|
|
357
|
+
|
|
358
|
+
evasion_tests:
|
|
359
|
+
type: array
|
|
360
|
+
description: Optional test cases for known evasion/bypass techniques
|
|
361
|
+
items:
|
|
362
|
+
type: object
|
|
363
|
+
properties:
|
|
364
|
+
input:
|
|
365
|
+
type: string
|
|
366
|
+
description: The evasion attempt input
|
|
367
|
+
expected:
|
|
368
|
+
type: string
|
|
369
|
+
description: Expected detection outcome
|
|
370
|
+
bypass_technique:
|
|
371
|
+
type: string
|
|
372
|
+
description: Name or description of the bypass technique used
|
|
373
|
+
notes:
|
|
374
|
+
type: string
|
|
375
|
+
description: Additional notes about the evasion test
|