@panguard-ai/atr 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +11 -10
- package/rules/agent-manipulation/ATR-2026-030-cross-agent-attack.yaml +51 -51
- package/rules/agent-manipulation/ATR-2026-032-goal-hijacking.yaml +36 -36
- package/rules/agent-manipulation/ATR-2026-074-cross-agent-privilege-escalation.yaml +32 -32
- package/rules/agent-manipulation/ATR-2026-076-inter-agent-message-spoofing.yaml +55 -55
- package/rules/agent-manipulation/ATR-2026-077-human-trust-exploitation.yaml +42 -42
- package/rules/agent-manipulation/ATR-2026-108-consensus-sybil-attack.yaml +31 -31
- package/rules/agent-manipulation/ATR-2026-116-a2a-message-validation.yaml +90 -0
- package/rules/agent-manipulation/ATR-2026-117-agent-identity-spoofing.yaml +90 -0
- package/rules/agent-manipulation/ATR-2026-118-approval-fatigue.yaml +87 -0
- package/rules/agent-manipulation/ATR-2026-119-social-engineering-via-agent.yaml +87 -0
- package/rules/context-exfiltration/ATR-2026-020-system-prompt-leak.yaml +51 -51
- package/rules/context-exfiltration/ATR-2026-021-api-key-exposure.yaml +62 -62
- package/rules/context-exfiltration/ATR-2026-075-agent-memory-manipulation.yaml +33 -33
- package/rules/context-exfiltration/ATR-2026-102-disguised-analytics-exfiltration.yaml +18 -18
- package/rules/context-exfiltration/ATR-2026-113-credential-theft.yaml +87 -0
- package/rules/context-exfiltration/ATR-2026-114-oauth-token-abuse.yaml +87 -0
- package/rules/context-exfiltration/ATR-2026-115-env-var-harvesting.yaml +88 -0
- package/rules/data-poisoning/ATR-2026-070-data-poisoning.yaml +53 -53
- package/rules/excessive-autonomy/ATR-2026-050-runaway-agent-loop.yaml +39 -39
- package/rules/excessive-autonomy/ATR-2026-051-resource-exhaustion.yaml +41 -41
- package/rules/excessive-autonomy/ATR-2026-052-cascading-failure.yaml +54 -54
- package/rules/excessive-autonomy/ATR-2026-098-unauthorized-financial-action.yaml +54 -54
- package/rules/excessive-autonomy/ATR-2026-099-high-risk-tool-gate.yaml +70 -55
- package/rules/model-security/ATR-2026-072-model-behavior-extraction.yaml +34 -34
- package/rules/model-security/ATR-2026-073-malicious-finetuning-data.yaml +26 -26
- package/rules/privilege-escalation/ATR-2026-040-privilege-escalation.yaml +58 -58
- package/rules/privilege-escalation/ATR-2026-041-scope-creep.yaml +35 -35
- package/rules/privilege-escalation/ATR-2026-107-delayed-execution-bypass.yaml +18 -18
- package/rules/privilege-escalation/ATR-2026-110-eval-injection.yaml +90 -0
- package/rules/privilege-escalation/ATR-2026-111-shell-escape.yaml +91 -0
- package/rules/privilege-escalation/ATR-2026-112-dynamic-import-exploitation.yaml +87 -0
- package/rules/prompt-injection/ATR-2026-001-direct-prompt-injection.yaml +274 -166
- package/rules/prompt-injection/ATR-2026-002-indirect-prompt-injection.yaml +63 -63
- package/rules/prompt-injection/ATR-2026-003-jailbreak-attempt.yaml +221 -76
- package/rules/prompt-injection/ATR-2026-004-system-prompt-override.yaml +157 -55
- package/rules/prompt-injection/ATR-2026-005-multi-turn-injection.yaml +47 -47
- package/rules/prompt-injection/ATR-2026-080-encoding-evasion.yaml +19 -12
- package/rules/prompt-injection/ATR-2026-081-semantic-multi-turn.yaml +21 -14
- package/rules/prompt-injection/ATR-2026-082-fingerprint-evasion.yaml +20 -13
- package/rules/prompt-injection/ATR-2026-083-indirect-tool-injection.yaml +23 -13
- package/rules/prompt-injection/ATR-2026-084-structured-data-injection.yaml +21 -14
- package/rules/prompt-injection/ATR-2026-085-audit-evasion.yaml +20 -13
- package/rules/prompt-injection/ATR-2026-086-visual-spoofing.yaml +20 -13
- package/rules/prompt-injection/ATR-2026-087-rule-probing.yaml +20 -13
- package/rules/prompt-injection/ATR-2026-088-adaptive-countermeasure.yaml +20 -13
- package/rules/prompt-injection/ATR-2026-089-polymorphic-skill.yaml +18 -11
- package/rules/prompt-injection/ATR-2026-090-threat-intel-exfil.yaml +20 -13
- package/rules/prompt-injection/ATR-2026-091-nested-payload.yaml +20 -13
- package/rules/prompt-injection/ATR-2026-092-consensus-poisoning.yaml +22 -15
- package/rules/prompt-injection/ATR-2026-093-gradual-escalation.yaml +21 -14
- package/rules/prompt-injection/ATR-2026-094-audit-bypass.yaml +20 -13
- package/rules/prompt-injection/ATR-2026-097-cjk-injection-patterns.yaml +230 -73
- package/rules/prompt-injection/ATR-2026-104-persona-hijacking.yaml +18 -18
- package/rules/skill-compromise/ATR-2026-060-skill-impersonation.yaml +53 -53
- package/rules/skill-compromise/ATR-2026-061-description-behavior-mismatch.yaml +21 -21
- package/rules/skill-compromise/ATR-2026-062-hidden-capability.yaml +22 -22
- package/rules/skill-compromise/ATR-2026-063-skill-chain-attack.yaml +21 -21
- package/rules/skill-compromise/ATR-2026-064-over-permissioned-skill.yaml +29 -29
- package/rules/skill-compromise/ATR-2026-065-skill-update-attack.yaml +22 -22
- package/rules/skill-compromise/ATR-2026-066-parameter-injection.yaml +23 -23
- package/rules/tool-poisoning/ATR-2026-010-mcp-malicious-response.yaml +70 -70
- package/rules/tool-poisoning/ATR-2026-011-tool-output-injection.yaml +60 -51
- package/rules/tool-poisoning/ATR-2026-012-unauthorized-tool-call.yaml +67 -58
- package/rules/tool-poisoning/ATR-2026-013-tool-ssrf.yaml +72 -63
- package/rules/tool-poisoning/ATR-2026-095-supply-chain-poisoning.yaml +23 -13
- package/rules/tool-poisoning/ATR-2026-096-registry-poisoning.yaml +23 -13
- package/rules/tool-poisoning/ATR-2026-100-consent-bypass-instruction.yaml +23 -23
- package/rules/tool-poisoning/ATR-2026-101-trust-escalation-override.yaml +18 -18
- package/rules/tool-poisoning/ATR-2026-103-hidden-safety-bypass-instruction.yaml +18 -18
- package/rules/tool-poisoning/ATR-2026-105-silent-action-concealment.yaml +16 -16
- package/rules/tool-poisoning/ATR-2026-106-schema-description-contradiction.yaml +17 -17
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
title:
|
|
1
|
+
title: "Cascading Failure Detection in Agent Pipelines"
|
|
2
2
|
id: ATR-2026-052
|
|
3
3
|
status: experimental
|
|
4
4
|
description: |
|
|
@@ -10,22 +10,22 @@ description: |
|
|
|
10
10
|
incorrect signals. These patterns exploit the "trust the previous
|
|
11
11
|
stage" assumption in multi-step agent workflows.
|
|
12
12
|
Note: This rule detects textual descriptions of cascading failure patterns, not live cascading failures. Structural cascade prevention requires behavioral monitoring.
|
|
13
|
-
author:
|
|
14
|
-
date:
|
|
15
|
-
schema_version:
|
|
13
|
+
author: "ATR Community"
|
|
14
|
+
date: "2026/03/09"
|
|
15
|
+
schema_version: "0.1"
|
|
16
16
|
detection_tier: pattern
|
|
17
17
|
maturity: experimental
|
|
18
18
|
severity: high
|
|
19
19
|
|
|
20
20
|
references:
|
|
21
21
|
owasp_llm:
|
|
22
|
-
-
|
|
23
|
-
-
|
|
22
|
+
- "LLM06:2025 - Excessive Agency"
|
|
23
|
+
- "LLM05:2025 - Improper Output Handling"
|
|
24
24
|
owasp_agentic:
|
|
25
|
-
-
|
|
25
|
+
- "ASI08:2026 - Cascading Failures"
|
|
26
26
|
mitre_atlas:
|
|
27
|
-
-
|
|
28
|
-
-
|
|
27
|
+
- "AML.T0053 - LLM Plugin Compromise"
|
|
28
|
+
- "AML.T0046 - Spamming ML System with Chaff Data"
|
|
29
29
|
|
|
30
30
|
tags:
|
|
31
31
|
category: excessive-autonomy
|
|
@@ -44,53 +44,53 @@ detection:
|
|
|
44
44
|
- field: content
|
|
45
45
|
operator: regex
|
|
46
46
|
value: "(?i)auto[_-]?approv(?:ed|ing|al).{0,30}(?:without|no|skipping)\\s+(?:human|manual|review|check|verification|validation)"
|
|
47
|
-
description:
|
|
47
|
+
description: "Automated approval bypassing human review checkpoint"
|
|
48
48
|
- field: content
|
|
49
49
|
operator: regex
|
|
50
50
|
value: "(?i)(?:propagat(?:ed|ing)|forward(?:ed|ing)|pass(?:ed|ing))\\s+(?:error|failure|exception|fault)\\s+(?:to|through|downstream|to\\s+next\\s+(?:stage|step|agent))"
|
|
51
|
-
description:
|
|
51
|
+
description: "Error propagating downstream through pipeline stages"
|
|
52
52
|
- field: content
|
|
53
53
|
operator: regex
|
|
54
54
|
value: "(?i)(?:stage|step|phase)\\s+\\d+\\s+(?:failed|error|timed\\s+out).*(?:stage|step|phase)\\s+\\d+\\s+(?:also|subsequently|then)\\s+(?:failed|errored)"
|
|
55
|
-
description:
|
|
55
|
+
description: "Multiple pipeline stages failing in sequence"
|
|
56
56
|
- field: content
|
|
57
57
|
operator: regex
|
|
58
58
|
value: "(?i)(?:trusting|accepting|using)\\s+(?:previous|upstream|prior)\\s+(?:stage|step|agent)\\s+(?:output|result|response)\\s+(?:without|no)\\s+(?:validation|verification|check)"
|
|
59
|
-
description:
|
|
59
|
+
description: "Stage consuming upstream output without validation"
|
|
60
60
|
- field: content
|
|
61
61
|
operator: regex
|
|
62
62
|
value: "(?i)(?:rollback|revert|undo|destroy|delete|terminate)\\s+(?:all|everything|entire|production|live).{0,40}(?:based\\s+on|triggered\\s+by|due\\s+to)\\s+(?:automated|agent|AI|pipeline)"
|
|
63
|
-
description:
|
|
63
|
+
description: "Destructive action triggered by automated pipeline signal"
|
|
64
64
|
- field: content
|
|
65
65
|
operator: regex
|
|
66
66
|
value: "(?i)(?:amplif(?:ied|ying)|escalat(?:ed|ing)|magnif(?:ied|ying))\\s+(?:the\\s+)?(?:error|issue|problem|signal|impact|damage)\\s+(?:across|through|to)\\s+(?:multiple|all|downstream)"
|
|
67
|
-
description:
|
|
67
|
+
description: "Error amplification across multiple pipeline stages"
|
|
68
68
|
- field: content
|
|
69
69
|
operator: regex
|
|
70
70
|
value: "(?i)(?:no\\s+(?:human|manual)\\s+(?:in|checkpoint|review|gate|approval)\\s+(?:the|this)?\\s*(?:loop|pipeline|chain|workflow)|(?:fully|completely)\\s+automated\\s+(?:pipeline|workflow|chain)\\s+(?:with|including)\\s+(?:destructive|production|critical))"
|
|
71
|
-
description:
|
|
71
|
+
description: "Fully automated pipeline with no human-in-the-loop for critical actions"
|
|
72
72
|
- field: content
|
|
73
73
|
operator: regex
|
|
74
74
|
value: "(?i)(?:blind(?:ly)?|unconditional(?:ly)?|automatic(?:ally)?)\\s+(?:executing|running|applying|deploying).{0,60}(?:previous|upstream|AI|agent|automated)(?:\\s+(?:AI|agent|system|model))?"
|
|
75
|
-
description:
|
|
75
|
+
description: "Blind execution of upstream agent recommendations"
|
|
76
76
|
- field: content
|
|
77
77
|
operator: regex
|
|
78
78
|
value: "(?i)(?!(?:post[- ]?(?:incident|mortem)|review|analysis|report|last\\s+(?:week|month|time)|caused\\s+by|root\\s+cause|lessons?\\s+learned|investigation|retrospective).{0,80})(?:chain\\s+reaction|domino\\s+(?:effect|failure)|failure\\s+cascade|cascading\\s+(?:error|failure|shutdown|rollback))(?!.{0,80}(?:was\\s+caused\\s+by|root\\s+cause|post[- ]?(?:incident|mortem)|lessons?\\s+learned|last\\s+(?:week|month|time)))"
|
|
79
|
-
description:
|
|
79
|
+
description: "Explicit cascading failure terminology (excludes retrospective analysis context)"
|
|
80
80
|
- field: content
|
|
81
81
|
operator: regex
|
|
82
82
|
value: "(?i)(?:retry|fallback)\\s+(?:triggered|activated|initiated)\\s+(?:automatically|by\\s+(?:previous|upstream)).*(?:retry|fallback)\\s+(?:triggered|activated|initiated)"
|
|
83
|
-
description:
|
|
83
|
+
description: "Cascading retry/fallback loops across stages"
|
|
84
84
|
- field: content
|
|
85
85
|
operator: regex
|
|
86
86
|
value: "(?i)(?:all|every)\\s+\\d+\\s+(?:downstream|dependent|connected)\\s+(?:agents?|services?|stages?|steps?)\\s+(?:affected|impacted|failed|halted)"
|
|
87
|
-
description:
|
|
87
|
+
description: "Multiple downstream components affected by single failure"
|
|
88
88
|
condition: any
|
|
89
89
|
false_positives:
|
|
90
|
-
-
|
|
91
|
-
-
|
|
92
|
-
-
|
|
93
|
-
-
|
|
90
|
+
- "Legitimate pipeline monitoring reporting multi-stage errors for debugging"
|
|
91
|
+
- "Chaos engineering or fault injection testing in controlled environments"
|
|
92
|
+
- "Post-incident analysis discussing cascading failure root causes"
|
|
93
|
+
- "Circuit breaker implementations that intentionally detect cascade patterns"
|
|
94
94
|
|
|
95
95
|
response:
|
|
96
96
|
actions:
|
|
@@ -105,49 +105,49 @@ response:
|
|
|
105
105
|
|
|
106
106
|
test_cases:
|
|
107
107
|
true_positives:
|
|
108
|
-
- input:
|
|
108
|
+
- input: "Auto-approved deployment without human review based on AI test results. Pushing to production."
|
|
109
109
|
expected: triggered
|
|
110
|
-
description:
|
|
111
|
-
- input:
|
|
110
|
+
description: "Auto-approval bypassing human review for production deployment"
|
|
111
|
+
- input: "Stage 2 failed with timeout. Stage 3 also failed due to missing input from stage 2. Stage 4 subsequently failed."
|
|
112
112
|
expected: triggered
|
|
113
|
-
description:
|
|
114
|
-
- input:
|
|
113
|
+
description: "Sequential multi-stage pipeline failure cascade"
|
|
114
|
+
- input: "Rollback all production services triggered by automated monitoring agent alert."
|
|
115
115
|
expected: triggered
|
|
116
|
-
description:
|
|
117
|
-
- input:
|
|
116
|
+
description: "Destructive rollback triggered by automated signal"
|
|
117
|
+
- input: "Trusting previous stage output without validation. Proceeding to deploy the generated configuration."
|
|
118
118
|
expected: triggered
|
|
119
|
-
description:
|
|
120
|
-
- input:
|
|
119
|
+
description: "Pipeline stage consuming unvalidated upstream output"
|
|
120
|
+
- input: "Cascading failure detected: payment service down caused order service, notification service, and analytics to fail."
|
|
121
121
|
expected: triggered
|
|
122
|
-
description:
|
|
123
|
-
- input:
|
|
122
|
+
description: "Cascading failure across dependent services"
|
|
123
|
+
- input: "Fully automated pipeline with destructive teardown stage. No human in the loop for the delete operation."
|
|
124
124
|
expected: triggered
|
|
125
|
-
description:
|
|
126
|
-
- input:
|
|
125
|
+
description: "Fully automated pipeline with destructive actions and no human gate"
|
|
126
|
+
- input: "Blindly executing recommendations from upstream AI agent. Applying all suggested database schema changes."
|
|
127
127
|
expected: triggered
|
|
128
|
-
description:
|
|
129
|
-
- input:
|
|
128
|
+
description: "Blind execution of AI-generated database changes"
|
|
129
|
+
- input: "Retry triggered automatically by previous failure. Fallback triggered by retry timeout. Second fallback triggered."
|
|
130
130
|
expected: triggered
|
|
131
|
-
description:
|
|
132
|
-
- input:
|
|
131
|
+
description: "Cascading retry-fallback loop across stages"
|
|
132
|
+
- input: "Amplifying the error across all 12 downstream services. Impact spreading to production cluster."
|
|
133
133
|
expected: triggered
|
|
134
|
-
description:
|
|
135
|
-
- input:
|
|
134
|
+
description: "Error amplification across multiple downstream services"
|
|
135
|
+
- input: "All 8 dependent agents affected by the coordinator failure. Entire pipeline halted."
|
|
136
136
|
expected: triggered
|
|
137
|
-
description:
|
|
137
|
+
description: "Single failure impacting all downstream agents"
|
|
138
138
|
true_negatives:
|
|
139
|
-
- input:
|
|
139
|
+
- input: "Pipeline completed successfully. All 5 stages passed validation. Awaiting human approval for deployment."
|
|
140
140
|
expected: not_triggered
|
|
141
|
-
description:
|
|
142
|
-
- input:
|
|
141
|
+
description: "Normal pipeline completion with human approval gate"
|
|
142
|
+
- input: "Stage 3 encountered an error. Circuit breaker activated. Alerting on-call engineer for review."
|
|
143
143
|
expected: not_triggered
|
|
144
|
-
description:
|
|
145
|
-
- input:
|
|
144
|
+
description: "Proper circuit breaker stopping cascade and alerting human"
|
|
145
|
+
- input: "Running chaos engineering test: simulating stage 2 failure to verify pipeline resilience."
|
|
146
146
|
expected: not_triggered
|
|
147
|
-
description:
|
|
148
|
-
- input:
|
|
147
|
+
description: "Intentional fault injection for resilience testing"
|
|
148
|
+
- input: "Post-incident review: the cascading failure last week was caused by an unhandled null pointer in stage 1."
|
|
149
149
|
expected: not_triggered
|
|
150
|
-
description:
|
|
151
|
-
- input:
|
|
150
|
+
description: "Post-incident analysis discussing past cascade"
|
|
151
|
+
- input: "Automated tests passed. Sending results to reviewer for manual sign-off before merge."
|
|
152
152
|
expected: not_triggered
|
|
153
|
-
description:
|
|
153
|
+
description: "Automated tests with proper human-in-the-loop before action"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
title:
|
|
1
|
+
title: "Unauthorized Financial Action by AI Agent"
|
|
2
2
|
id: ATR-2026-098
|
|
3
3
|
status: experimental
|
|
4
4
|
description: |
|
|
@@ -15,21 +15,21 @@ description: |
|
|
|
15
15
|
Covers: WeChat red packets, Alipay/WeChat Pay transfers, bank transfers,
|
|
16
16
|
crypto transactions, subscription purchases, in-app purchases, and
|
|
17
17
|
generic payment API calls.
|
|
18
|
-
author:
|
|
19
|
-
date:
|
|
20
|
-
schema_version:
|
|
18
|
+
author: "ATR Community"
|
|
19
|
+
date: "2026/03/11"
|
|
20
|
+
schema_version: "0.1"
|
|
21
21
|
detection_tier: pattern
|
|
22
22
|
maturity: experimental
|
|
23
23
|
severity: critical
|
|
24
24
|
|
|
25
25
|
references:
|
|
26
26
|
owasp_llm:
|
|
27
|
-
-
|
|
27
|
+
- "LLM06:2025 - Excessive Agency"
|
|
28
28
|
owasp_agentic:
|
|
29
|
-
-
|
|
30
|
-
-
|
|
29
|
+
- "ASI08:2026 - Excessive Autonomy"
|
|
30
|
+
- "ASI09:2026 - Inadequate Access Controls"
|
|
31
31
|
mitre_atlas:
|
|
32
|
-
-
|
|
32
|
+
- "AML.T0053 - LLM Plugin Compromise"
|
|
33
33
|
|
|
34
34
|
tags:
|
|
35
35
|
category: excessive-autonomy
|
|
@@ -48,50 +48,50 @@ detection:
|
|
|
48
48
|
# -- Tool name patterns: payment/transfer tools --
|
|
49
49
|
- field: tool_name
|
|
50
50
|
operator: regex
|
|
51
|
-
value:
|
|
52
|
-
description:
|
|
51
|
+
value: "(?i)(send_red_packet|send_hongbao|send_money|transfer_money|make_payment|pay_bill|send_payment|create_payment|process_payment|execute_transfer|bank_transfer|wire_transfer|crypto_send|crypto_transfer)"
|
|
52
|
+
description: "Direct financial tool call by name"
|
|
53
53
|
- field: tool_name
|
|
54
54
|
operator: regex
|
|
55
|
-
value:
|
|
56
|
-
description:
|
|
55
|
+
value: "(?i)(红包|紅包|转账|轉帳|付款|支付|汇款|匯款|打款|打錢|充值|充幣)"
|
|
56
|
+
description: "Chinese financial tool name patterns"
|
|
57
57
|
- field: tool_name
|
|
58
58
|
operator: regex
|
|
59
|
-
value:
|
|
60
|
-
description:
|
|
59
|
+
value: "(?i)(wechat_pay|alipay|apple_pay|google_pay|stripe_charge|paypal_send|venmo_send|cashapp_send|zelle_send)"
|
|
60
|
+
description: "Named payment platform tool calls"
|
|
61
61
|
# -- Tool args: financial action indicators --
|
|
62
62
|
- field: tool_args
|
|
63
63
|
operator: regex
|
|
64
64
|
value: "(?i)(\"amount\"\\s*:\\s*\\d+|\"currency\"\\s*:\\s*\"|\"recipient\"\\s*:\\s*\"|\"payment_method\"\\s*:\\s*\"|\"account_number\"\\s*:\\s*\")"
|
|
65
|
-
description:
|
|
65
|
+
description: "Financial parameters in tool arguments (amount, currency, recipient)"
|
|
66
66
|
- field: tool_args
|
|
67
67
|
operator: regex
|
|
68
68
|
value: "(?i)(\"action\"\\s*:\\s*\"(pay|transfer|send|withdraw|purchase)\"|\"type\"\\s*:\\s*\"(payment|transfer|withdrawal|purchase)\")"
|
|
69
|
-
description:
|
|
69
|
+
description: "Financial action type in tool arguments"
|
|
70
70
|
# -- Content: agent declaring intent to make financial action --
|
|
71
71
|
- field: content
|
|
72
72
|
operator: regex
|
|
73
73
|
value: "(?i)(I('ll|\\s+will)|let\\s+me|going\\s+to|about\\s+to|proceeding\\s+to|executing)\\s+(send|transfer|pay|wire|remit|deposit|withdraw)\\s+.{0,30}(\\$|USD|EUR|CNY|RMB|元|円|원|\\d+\\.?\\d*)"
|
|
74
|
-
description:
|
|
74
|
+
description: "Agent declaring intent to execute financial action with amount"
|
|
75
75
|
- field: content
|
|
76
76
|
operator: regex
|
|
77
|
-
value:
|
|
78
|
-
description:
|
|
77
|
+
value: "(我(将|要|来|來|正在|准备|準備)?|正在|即将|即將).{0,10}(发送|發送|转账|轉帳|支付|付款|汇款|匯款|打款).{0,20}(元|块|塊|美元|人民币|人民幣)"
|
|
78
|
+
description: "Chinese: agent declaring financial action with currency"
|
|
79
79
|
# -- Content: agent executing financial operation keywords --
|
|
80
80
|
- field: content
|
|
81
81
|
operator: regex
|
|
82
82
|
value: "(?i)(payment\\s+(successful|completed|processed|sent|confirmed)|transfer\\s+(successful|completed|sent|initiated)|red\\s+packet\\s+sent|hongbao\\s+sent|money\\s+sent)"
|
|
83
|
-
description:
|
|
83
|
+
description: "Agent confirming financial action was executed"
|
|
84
84
|
- field: content
|
|
85
85
|
operator: regex
|
|
86
|
-
value:
|
|
87
|
-
description:
|
|
86
|
+
value: "(红包已发|紅包已發|转账成功|轉帳成功|付款成功|支付完成|汇款已发|匯款已發|已发送.*元|已發送.*元)"
|
|
87
|
+
description: "Chinese: agent confirming financial action completed"
|
|
88
88
|
|
|
89
89
|
condition: any
|
|
90
90
|
false_positives:
|
|
91
|
-
-
|
|
92
|
-
-
|
|
93
|
-
-
|
|
94
|
-
-
|
|
91
|
+
- "User explicitly asking agent to check payment status (read-only)"
|
|
92
|
+
- "Agent displaying payment history or transaction records"
|
|
93
|
+
- "Financial education chatbot explaining how payments work"
|
|
94
|
+
- "Agent generating payment-related code (not executing)"
|
|
95
95
|
|
|
96
96
|
response:
|
|
97
97
|
actions:
|
|
@@ -110,46 +110,46 @@ response:
|
|
|
110
110
|
test_cases:
|
|
111
111
|
true_positives:
|
|
112
112
|
- tool_call:
|
|
113
|
-
name:
|
|
114
|
-
args:
|
|
113
|
+
name: "send_red_packet"
|
|
114
|
+
args: "{\"amount\": 200, \"recipient\": \"user123\"}"
|
|
115
115
|
expected: triggered
|
|
116
|
-
description:
|
|
116
|
+
description: "Direct red packet send tool call"
|
|
117
117
|
- tool_call:
|
|
118
|
-
name:
|
|
119
|
-
args:
|
|
118
|
+
name: "wechat_pay"
|
|
119
|
+
args: "{\"action\": \"transfer\", \"amount\": 200, \"to\": \"wxid_abc\"}"
|
|
120
120
|
expected: triggered
|
|
121
|
-
description:
|
|
122
|
-
- input:
|
|
123
|
-
tool_name:
|
|
121
|
+
description: "WeChat Pay transfer tool call"
|
|
122
|
+
- input: "我将发送200元红包给用户"
|
|
123
|
+
tool_name: "agent_action"
|
|
124
124
|
expected: triggered
|
|
125
|
-
description:
|
|
126
|
-
- input:
|
|
127
|
-
tool_name:
|
|
125
|
+
description: "Agent declaring intent to send red packet"
|
|
126
|
+
- input: "红包已发送成功,金额200元"
|
|
127
|
+
tool_name: "agent_action"
|
|
128
128
|
expected: triggered
|
|
129
|
-
description:
|
|
129
|
+
description: "Agent confirming red packet was sent"
|
|
130
130
|
- input: "I'll transfer $500 to the specified account now"
|
|
131
|
-
tool_name:
|
|
131
|
+
tool_name: "agent_action"
|
|
132
132
|
expected: triggered
|
|
133
|
-
description:
|
|
134
|
-
- input:
|
|
135
|
-
tool_name:
|
|
133
|
+
description: "English: agent declaring financial transfer"
|
|
134
|
+
- input: "Payment successful. $200 has been sent to recipient."
|
|
135
|
+
tool_name: "agent_action"
|
|
136
136
|
expected: triggered
|
|
137
|
-
description:
|
|
137
|
+
description: "Agent confirming payment completion"
|
|
138
138
|
- tool_call:
|
|
139
|
-
name:
|
|
140
|
-
args:
|
|
139
|
+
name: "转账"
|
|
140
|
+
args: "{\"金额\": 500, \"收款人\": \"user456\"}"
|
|
141
141
|
expected: triggered
|
|
142
|
-
description:
|
|
142
|
+
description: "Chinese-named transfer tool"
|
|
143
143
|
true_negatives:
|
|
144
|
-
- input:
|
|
144
|
+
- input: "Your last payment of $50 was processed on March 1st"
|
|
145
145
|
expected: not_triggered
|
|
146
|
-
description:
|
|
147
|
-
- input:
|
|
146
|
+
description: "Displaying payment history (read-only)"
|
|
147
|
+
- input: "Here is the code to integrate Stripe payments: stripe.charges.create({amount: 2000})"
|
|
148
148
|
expected: not_triggered
|
|
149
|
-
description:
|
|
150
|
-
- input:
|
|
149
|
+
description: "Generating payment code, not executing"
|
|
150
|
+
- input: "The transfer feature is currently unavailable"
|
|
151
151
|
expected: not_triggered
|
|
152
|
-
description:
|
|
153
|
-
- input:
|
|
152
|
+
description: "Informational message about transfer status"
|
|
153
|
+
- input: "请问我的红包余额是多少?"
|
|
154
154
|
expected: not_triggered
|
|
155
|
-
description:
|
|
155
|
+
description: "User asking about red packet balance (read-only query)"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
title:
|
|
1
|
+
title: "High-Risk Tool Invocation Without Human Confirmation"
|
|
2
2
|
id: ATR-2026-099
|
|
3
3
|
status: experimental
|
|
4
4
|
description: |
|
|
@@ -18,22 +18,22 @@ description: |
|
|
|
18
18
|
This rule enforces the principle of least privilege for agent tool access.
|
|
19
19
|
Any tool matching these categories should trigger an "ask" verdict
|
|
20
20
|
regardless of confidence score.
|
|
21
|
-
author:
|
|
22
|
-
date:
|
|
23
|
-
schema_version:
|
|
21
|
+
author: "ATR Community"
|
|
22
|
+
date: "2026/03/11"
|
|
23
|
+
schema_version: "0.1"
|
|
24
24
|
detection_tier: pattern
|
|
25
25
|
maturity: experimental
|
|
26
|
-
severity:
|
|
26
|
+
severity: low
|
|
27
27
|
|
|
28
28
|
references:
|
|
29
29
|
owasp_llm:
|
|
30
|
-
-
|
|
30
|
+
- "LLM06:2025 - Excessive Agency"
|
|
31
31
|
owasp_agentic:
|
|
32
|
-
-
|
|
33
|
-
-
|
|
34
|
-
-
|
|
32
|
+
- "ASI08:2026 - Excessive Autonomy"
|
|
33
|
+
- "ASI09:2026 - Inadequate Access Controls"
|
|
34
|
+
- "ASI10:2026 - Insufficient Human Oversight"
|
|
35
35
|
mitre_atlas:
|
|
36
|
-
-
|
|
36
|
+
- "AML.T0053 - LLM Plugin Compromise"
|
|
37
37
|
|
|
38
38
|
tags:
|
|
39
39
|
category: excessive-autonomy
|
|
@@ -52,45 +52,45 @@ detection:
|
|
|
52
52
|
# -- Category 1: Financial tools --
|
|
53
53
|
- field: tool_name
|
|
54
54
|
operator: regex
|
|
55
|
-
value:
|
|
56
|
-
description:
|
|
55
|
+
value: "(?i)(?<!get_)(?<!list_)(?<!check_)(?<!view_)(?<!fetch_)(?<!query_)(pay|payment|transfer|transaction|purchase|buy|checkout|billing|invoice|charge|refund|withdraw|deposit|subscribe|红包|紅包|转账|轉帳|付款|支付|汇款|匯款|充值|订阅|訂閱|送金|振込|결제|이체|송금)"
|
|
56
|
+
description: "Financial tool invocation (excludes read-only get_/list_/check_ prefixed tools)"
|
|
57
57
|
# -- Category 2: Destructive tools --
|
|
58
58
|
- field: tool_name
|
|
59
59
|
operator: regex
|
|
60
|
-
value:
|
|
61
|
-
description:
|
|
60
|
+
value: "(?i)(delete|remove|drop|truncate|purge|wipe|destroy|erase|reset|uninstall|revoke|terminate|kill|shutdown|format|删除|刪除|清空|销毁|銷毀|移除|卸载|卸載|削除|삭제|제거)"
|
|
61
|
+
description: "Destructive tool invocation"
|
|
62
62
|
# -- Category 3: Communication tools (sending on behalf of user) --
|
|
63
63
|
- field: tool_name
|
|
64
64
|
operator: regex
|
|
65
|
-
value:
|
|
66
|
-
description:
|
|
65
|
+
value: "(?i)(send_message|send_email|send_sms|send_notification|post_message|post_tweet|post_comment|reply_message|publish|broadcast|发送消息|發送訊息|发邮件|發郵件|发短信|發簡訊|投稿|메시지_보내기)"
|
|
66
|
+
description: "Communication tool sending messages on behalf of user"
|
|
67
67
|
# -- Category 4: Permission/auth tools --
|
|
68
68
|
- field: tool_name
|
|
69
69
|
operator: regex
|
|
70
|
-
value:
|
|
71
|
-
description:
|
|
70
|
+
value: "(?i)(grant_access|change_role|modify_permissions|add_admin|set_owner|update_auth|change_password|reset_password|create_token|generate_key|invite_user|授权|授權|修改权限|修改權限|添加管理|변경_권한)"
|
|
71
|
+
description: "Permission or authentication modification tool"
|
|
72
72
|
# -- Category 5: System execution tools --
|
|
73
73
|
- field: tool_name
|
|
74
74
|
operator: regex
|
|
75
|
-
value:
|
|
76
|
-
description:
|
|
75
|
+
value: "(?i)(exec|execute|run_command|shell|bash|cmd|powershell|system_call|os_exec|subprocess|eval|执行命令|執行命令|실행)"
|
|
76
|
+
description: "System command execution tool"
|
|
77
77
|
# -- Tool args: high-value amounts without confirmation flag --
|
|
78
78
|
- field: tool_args
|
|
79
79
|
operator: regex
|
|
80
80
|
value: "(?i)(\"amount\"\\s*:\\s*[1-9]\\d{2,}|\"value\"\\s*:\\s*[1-9]\\d{2,}|\"quantity\"\\s*:\\s*[1-9]\\d{2,})(?!.*\"confirmed\"\\s*:\\s*true)"
|
|
81
|
-
description:
|
|
81
|
+
description: "High-value operation (amount >= 100) without confirmation flag"
|
|
82
82
|
# -- Tool args: bulk/all operations --
|
|
83
83
|
- field: tool_args
|
|
84
84
|
operator: regex
|
|
85
85
|
value: "(?i)(\"scope\"\\s*:\\s*\"(all|global|everyone)\"|\"target\"\\s*:\\s*\"(all|\\*|everyone)\"|\"recursive\"\\s*:\\s*true|\"force\"\\s*:\\s*true)"
|
|
86
|
-
description:
|
|
86
|
+
description: "Bulk or force operation targeting all/everyone"
|
|
87
87
|
|
|
88
88
|
condition: any
|
|
89
89
|
false_positives:
|
|
90
|
-
-
|
|
91
|
-
-
|
|
90
|
+
- "Agent running in a sandbox or testing environment with mock tools"
|
|
91
|
+
- "Legitimate automated workflows with pre-approved tool access"
|
|
92
92
|
- "Read-only tools that contain action keywords in their name (e.g., 'get_payment_status')"
|
|
93
|
-
-
|
|
93
|
+
- "Tools with built-in confirmation mechanisms at the platform level"
|
|
94
94
|
|
|
95
95
|
response:
|
|
96
96
|
actions:
|
|
@@ -106,54 +106,69 @@ response:
|
|
|
106
106
|
test_cases:
|
|
107
107
|
true_positives:
|
|
108
108
|
- tool_call:
|
|
109
|
-
name:
|
|
110
|
-
args:
|
|
109
|
+
name: "send_red_packet"
|
|
110
|
+
args: "{\"amount\": 200}"
|
|
111
111
|
expected: triggered
|
|
112
|
-
description:
|
|
112
|
+
description: "Financial tool: red packet"
|
|
113
113
|
- tool_call:
|
|
114
|
-
name:
|
|
115
|
-
args:
|
|
114
|
+
name: "delete_all_files"
|
|
115
|
+
args: "{\"path\": \"/data\", \"recursive\": true}"
|
|
116
116
|
expected: triggered
|
|
117
|
-
description:
|
|
117
|
+
description: "Destructive tool: recursive delete"
|
|
118
118
|
- tool_call:
|
|
119
|
-
name:
|
|
120
|
-
args:
|
|
119
|
+
name: "send_email"
|
|
120
|
+
args: "{\"to\": \"boss@company.com\", \"body\": \"I quit\"}"
|
|
121
121
|
expected: triggered
|
|
122
|
-
description:
|
|
122
|
+
description: "Communication tool: sending email as user"
|
|
123
123
|
- tool_call:
|
|
124
|
-
name:
|
|
125
|
-
args:
|
|
124
|
+
name: "grant_access"
|
|
125
|
+
args: "{\"user\": \"stranger\", \"role\": \"admin\"}"
|
|
126
126
|
expected: triggered
|
|
127
|
-
description:
|
|
127
|
+
description: "Permission tool: granting admin access"
|
|
128
128
|
- tool_call:
|
|
129
|
-
name:
|
|
130
|
-
args:
|
|
129
|
+
name: "bash"
|
|
130
|
+
args: "{\"command\": \"rm -rf /\"}"
|
|
131
131
|
expected: triggered
|
|
132
|
-
description:
|
|
132
|
+
description: "System execution tool"
|
|
133
133
|
- tool_call:
|
|
134
|
-
name:
|
|
135
|
-
args:
|
|
134
|
+
name: "转账"
|
|
135
|
+
args: "{\"金额\": 500}"
|
|
136
136
|
expected: triggered
|
|
137
|
-
description:
|
|
137
|
+
description: "Chinese-named financial tool"
|
|
138
138
|
- tool_call:
|
|
139
|
-
name:
|
|
140
|
-
args:
|
|
139
|
+
name: "checkout"
|
|
140
|
+
args: "{\"amount\": 9999, \"force\": true}"
|
|
141
141
|
expected: triggered
|
|
142
|
-
description:
|
|
142
|
+
description: "High-value purchase with force flag"
|
|
143
143
|
true_negatives:
|
|
144
144
|
- tool_call:
|
|
145
|
-
name:
|
|
146
|
-
args:
|
|
145
|
+
name: "get_payment_status"
|
|
146
|
+
args: "{\"order_id\": \"12345\"}"
|
|
147
147
|
expected: not_triggered
|
|
148
|
-
description:
|
|
148
|
+
description: "Read-only payment status check"
|
|
149
149
|
notes: "Tool name contains 'payment' but prefixed with 'get_' indicating read-only. May need platform-level get_ prefix exclusion."
|
|
150
150
|
- tool_call:
|
|
151
|
-
name:
|
|
152
|
-
args:
|
|
151
|
+
name: "search_files"
|
|
152
|
+
args: "{\"query\": \"invoice\"}"
|
|
153
153
|
expected: not_triggered
|
|
154
|
-
description:
|
|
154
|
+
description: "File search, not destructive despite financial keyword in args"
|
|
155
155
|
- tool_call:
|
|
156
|
-
name:
|
|
157
|
-
args:
|
|
156
|
+
name: "calculate_total"
|
|
157
|
+
args: "{\"items\": [{\"price\": 500}]}"
|
|
158
158
|
expected: not_triggered
|
|
159
|
-
description:
|
|
159
|
+
description: "Calculation tool, not financial action"
|
|
160
|
+
- tool_call:
|
|
161
|
+
name: "list_transactions"
|
|
162
|
+
args: "{\"date_range\": \"2026-01-01 to 2026-03-01\"}"
|
|
163
|
+
expected: not_triggered
|
|
164
|
+
description: "Read-only transaction listing, not a financial action"
|
|
165
|
+
- tool_call:
|
|
166
|
+
name: "read_config"
|
|
167
|
+
args: "{\"key\": \"notification_settings\"}"
|
|
168
|
+
expected: not_triggered
|
|
169
|
+
description: "Reading configuration, not modifying permissions or system"
|
|
170
|
+
- tool_call:
|
|
171
|
+
name: "fetch_user_profile"
|
|
172
|
+
args: "{\"user_id\": \"12345\"}"
|
|
173
|
+
expected: not_triggered
|
|
174
|
+
description: "Read-only user profile fetch, not permission modification"
|