agent-threat-rules 0.4.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +161 -52
- package/dist/badge.d.ts.map +1 -1
- package/dist/badge.js +6 -1
- package/dist/badge.js.map +1 -1
- package/dist/cli/scan-handler.d.ts +19 -0
- package/dist/cli/scan-handler.d.ts.map +1 -0
- package/dist/cli/scan-handler.js +257 -0
- package/dist/cli/scan-handler.js.map +1 -0
- package/dist/cli.js +44 -86
- package/dist/cli.js.map +1 -1
- package/dist/content-hash.d.ts +7 -0
- package/dist/content-hash.d.ts.map +1 -0
- package/dist/content-hash.js +10 -0
- package/dist/content-hash.js.map +1 -0
- package/dist/converters/generic-regex.d.ts +37 -0
- package/dist/converters/generic-regex.d.ts.map +1 -0
- package/dist/converters/generic-regex.js +59 -0
- package/dist/converters/generic-regex.js.map +1 -0
- package/dist/converters/index.d.ts +4 -0
- package/dist/converters/index.d.ts.map +1 -1
- package/dist/converters/index.js +2 -0
- package/dist/converters/index.js.map +1 -1
- package/dist/converters/sarif.d.ts +18 -0
- package/dist/converters/sarif.d.ts.map +1 -0
- package/dist/converters/sarif.js +142 -0
- package/dist/converters/sarif.js.map +1 -0
- package/dist/engine.d.ts +21 -1
- package/dist/engine.d.ts.map +1 -1
- package/dist/engine.js +215 -4
- package/dist/engine.js.map +1 -1
- package/dist/eval/pint-corpus.d.ts.map +1 -1
- package/dist/eval/pint-corpus.js +6 -2
- package/dist/eval/pint-corpus.js.map +1 -1
- package/dist/eval/rule-corpus.js +489 -489
- package/dist/eval/rule-corpus.js.map +1 -1
- package/dist/eval/skill-benchmark.d.ts +66 -0
- package/dist/eval/skill-benchmark.d.ts.map +1 -0
- package/dist/eval/skill-benchmark.js +194 -0
- package/dist/eval/skill-benchmark.js.map +1 -0
- package/dist/index.d.ts +4 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/layer-integration.d.ts.map +1 -1
- package/dist/layer-integration.js +2 -0
- package/dist/layer-integration.js.map +1 -1
- package/dist/loader.d.ts +0 -3
- package/dist/loader.d.ts.map +1 -1
- package/dist/loader.js +7 -2
- package/dist/loader.js.map +1 -1
- package/dist/mcp-server.d.ts.map +1 -1
- package/dist/mcp-server.js +26 -0
- package/dist/mcp-server.js.map +1 -1
- package/dist/mcp-tools/scan-skill.d.ts +17 -0
- package/dist/mcp-tools/scan-skill.d.ts.map +1 -0
- package/dist/mcp-tools/scan-skill.js +65 -0
- package/dist/mcp-tools/scan-skill.js.map +1 -0
- package/dist/mcp-tools/validate.d.ts.map +1 -1
- package/dist/mcp-tools/validate.js +6 -0
- package/dist/mcp-tools/validate.js.map +1 -1
- package/dist/shadow-evaluator.d.ts.map +1 -1
- package/dist/shadow-evaluator.js +1 -0
- package/dist/shadow-evaluator.js.map +1 -1
- package/dist/tier0-invariant.d.ts.map +1 -1
- package/dist/tier0-invariant.js +1 -0
- package/dist/tier0-invariant.js.map +1 -1
- package/dist/tier1-blacklist.d.ts.map +1 -1
- package/dist/tier1-blacklist.js +1 -0
- package/dist/tier1-blacklist.js.map +1 -1
- package/dist/types.d.ts +23 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +3 -1
- package/rules/agent-manipulation/{ATR-2026-030-cross-agent-attack.yaml → ATR-2026-00030-cross-agent-attack.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-032-goal-hijacking.yaml → ATR-2026-00032-goal-hijacking.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-074-cross-agent-privilege-escalation.yaml → ATR-2026-00074-cross-agent-privilege-escalation.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-076-inter-agent-message-spoofing.yaml → ATR-2026-00076-inter-agent-message-spoofing.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-077-human-trust-exploitation.yaml → ATR-2026-00077-human-trust-exploitation.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-108-consensus-sybil-attack.yaml → ATR-2026-00108-consensus-sybil-attack.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-116-a2a-message-validation.yaml → ATR-2026-00116-a2a-message-validation.yaml} +4 -2
- package/rules/agent-manipulation/{ATR-2026-117-agent-identity-spoofing.yaml → ATR-2026-00117-agent-identity-spoofing.yaml} +4 -2
- package/rules/agent-manipulation/{ATR-2026-118-approval-fatigue.yaml → ATR-2026-00118-approval-fatigue.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-119-social-engineering-via-agent.yaml → ATR-2026-00119-social-engineering-via-agent.yaml} +3 -1
- package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml +105 -0
- package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml +53 -0
- package/rules/context-exfiltration/{ATR-2026-020-system-prompt-leak.yaml → ATR-2026-00020-system-prompt-leak.yaml} +3 -1
- package/rules/context-exfiltration/{ATR-2026-021-api-key-exposure.yaml → ATR-2026-00021-api-key-exposure.yaml} +3 -1
- package/rules/context-exfiltration/{ATR-2026-075-agent-memory-manipulation.yaml → ATR-2026-00075-agent-memory-manipulation.yaml} +3 -1
- package/rules/context-exfiltration/{ATR-2026-102-disguised-analytics-exfiltration.yaml → ATR-2026-00102-disguised-analytics-exfiltration.yaml} +3 -1
- package/rules/context-exfiltration/{ATR-2026-113-credential-theft.yaml → ATR-2026-00113-credential-theft.yaml} +3 -1
- package/rules/context-exfiltration/{ATR-2026-114-oauth-token-abuse.yaml → ATR-2026-00114-oauth-token-abuse.yaml} +3 -1
- package/rules/context-exfiltration/{ATR-2026-115-env-var-harvesting.yaml → ATR-2026-00115-env-var-harvesting.yaml} +3 -1
- package/rules/context-exfiltration/ATR-2026-00136-tool-response-data-piggyback.yaml +100 -0
- package/rules/context-exfiltration/ATR-2026-00141-example-format-key-leak.yaml +52 -0
- package/rules/context-exfiltration/ATR-2026-00142-piggyback-transition-words.yaml +55 -0
- package/rules/context-exfiltration/ATR-2026-00145-obfuscated-key-disclosure.yaml +49 -0
- package/rules/context-exfiltration/ATR-2026-00146-env-var-existence-probe.yaml +49 -0
- package/rules/data-poisoning/{ATR-2026-070-data-poisoning.yaml → ATR-2026-00070-data-poisoning.yaml} +3 -1
- package/rules/excessive-autonomy/{ATR-2026-050-runaway-agent-loop.yaml → ATR-2026-00050-runaway-agent-loop.yaml} +3 -1
- package/rules/excessive-autonomy/{ATR-2026-051-resource-exhaustion.yaml → ATR-2026-00051-resource-exhaustion.yaml} +3 -1
- package/rules/excessive-autonomy/{ATR-2026-052-cascading-failure.yaml → ATR-2026-00052-cascading-failure.yaml} +3 -1
- package/rules/excessive-autonomy/{ATR-2026-098-unauthorized-financial-action.yaml → ATR-2026-00098-unauthorized-financial-action.yaml} +3 -1
- package/rules/excessive-autonomy/{ATR-2026-099-high-risk-tool-gate.yaml → ATR-2026-00099-high-risk-tool-gate.yaml} +3 -1
- package/rules/model-security/{ATR-2026-072-model-behavior-extraction.yaml → ATR-2026-00072-model-behavior-extraction.yaml} +3 -1
- package/rules/model-security/{ATR-2026-073-malicious-finetuning-data.yaml → ATR-2026-00073-malicious-finetuning-data.yaml} +3 -1
- package/rules/privilege-escalation/{ATR-2026-040-privilege-escalation.yaml → ATR-2026-00040-privilege-escalation.yaml} +3 -1
- package/rules/privilege-escalation/{ATR-2026-041-scope-creep.yaml → ATR-2026-00041-scope-creep.yaml} +3 -1
- package/rules/privilege-escalation/{ATR-2026-107-delayed-execution-bypass.yaml → ATR-2026-00107-delayed-execution-bypass.yaml} +3 -1
- package/rules/privilege-escalation/{ATR-2026-110-eval-injection.yaml → ATR-2026-00110-eval-injection.yaml} +3 -1
- package/rules/privilege-escalation/{ATR-2026-111-shell-escape.yaml → ATR-2026-00111-shell-escape.yaml} +5 -3
- package/rules/privilege-escalation/{ATR-2026-112-dynamic-import-exploitation.yaml → ATR-2026-00112-dynamic-import-exploitation.yaml} +3 -1
- package/rules/privilege-escalation/ATR-2026-00143-casual-privilege-escalation.yaml +53 -0
- package/rules/privilege-escalation/ATR-2026-00144-rationalized-safety-bypass.yaml +49 -0
- package/rules/prompt-injection/{ATR-2026-001-direct-prompt-injection.yaml → ATR-2026-00001-direct-prompt-injection.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-002-indirect-prompt-injection.yaml → ATR-2026-00002-indirect-prompt-injection.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-003-jailbreak-attempt.yaml → ATR-2026-00003-jailbreak-attempt.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-004-system-prompt-override.yaml → ATR-2026-00004-system-prompt-override.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-005-multi-turn-injection.yaml → ATR-2026-00005-multi-turn-injection.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-080-encoding-evasion.yaml → ATR-2026-00080-encoding-evasion.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-081-semantic-multi-turn.yaml → ATR-2026-00081-semantic-multi-turn.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-082-fingerprint-evasion.yaml → ATR-2026-00082-fingerprint-evasion.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-083-indirect-tool-injection.yaml → ATR-2026-00083-indirect-tool-injection.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-084-structured-data-injection.yaml → ATR-2026-00084-structured-data-injection.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-085-audit-evasion.yaml → ATR-2026-00085-audit-evasion.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-086-visual-spoofing.yaml → ATR-2026-00086-visual-spoofing.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-087-rule-probing.yaml → ATR-2026-00087-rule-probing.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-088-adaptive-countermeasure.yaml → ATR-2026-00088-adaptive-countermeasure.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-089-polymorphic-skill.yaml → ATR-2026-00089-polymorphic-skill.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-090-threat-intel-exfil.yaml → ATR-2026-00090-threat-intel-exfil.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-091-nested-payload.yaml → ATR-2026-00091-nested-payload.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-092-consensus-poisoning.yaml → ATR-2026-00092-consensus-poisoning.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-093-gradual-escalation.yaml → ATR-2026-00093-gradual-escalation.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-094-audit-bypass.yaml → ATR-2026-00094-audit-bypass.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-097-cjk-injection-patterns.yaml → ATR-2026-00097-cjk-injection-patterns.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-104-persona-hijacking.yaml → ATR-2026-00104-persona-hijacking.yaml} +3 -1
- package/rules/prompt-injection/ATR-2026-00130-indirect-authority-claim.yaml +103 -0
- package/rules/prompt-injection/ATR-2026-00131-fictional-academic-framing.yaml +99 -0
- package/rules/prompt-injection/ATR-2026-00133-paraphrase-injection.yaml +117 -0
- package/rules/prompt-injection/ATR-2026-00137-authority-claim-injection.yaml +52 -0
- package/rules/prompt-injection/ATR-2026-00138-fictional-framing-bypass.yaml +51 -0
- package/rules/prompt-injection/ATR-2026-00140-indirect-reference-reversal.yaml +52 -0
- package/rules/prompt-injection/ATR-2026-00148-language-switch-injection.yaml +71 -0
- package/rules/skill-compromise/{ATR-2026-060-skill-impersonation.yaml → ATR-2026-00060-skill-impersonation.yaml} +3 -1
- package/rules/skill-compromise/{ATR-2026-061-description-behavior-mismatch.yaml → ATR-2026-00061-description-behavior-mismatch.yaml} +3 -1
- package/rules/skill-compromise/{ATR-2026-062-hidden-capability.yaml → ATR-2026-00062-hidden-capability.yaml} +3 -1
- package/rules/skill-compromise/{ATR-2026-063-skill-chain-attack.yaml → ATR-2026-00063-skill-chain-attack.yaml} +3 -1
- package/rules/skill-compromise/{ATR-2026-064-over-permissioned-skill.yaml → ATR-2026-00064-over-permissioned-skill.yaml} +3 -1
- package/rules/skill-compromise/{ATR-2026-065-skill-update-attack.yaml → ATR-2026-00065-skill-update-attack.yaml} +3 -1
- package/rules/skill-compromise/{ATR-2026-066-parameter-injection.yaml → ATR-2026-00066-parameter-injection.yaml} +3 -1
- package/rules/skill-compromise/ATR-2026-00120-skill-instruction-injection.yaml +121 -0
- package/rules/skill-compromise/ATR-2026-00121-skill-dangerous-script.yaml +165 -0
- package/rules/skill-compromise/ATR-2026-00122-skill-weaponized-instruction.yaml +114 -0
- package/rules/skill-compromise/ATR-2026-00123-skill-overreach-permissions.yaml +118 -0
- package/rules/skill-compromise/ATR-2026-00124-skill-name-squatting.yaml +98 -0
- package/rules/skill-compromise/ATR-2026-00125-context-poisoning-compaction.yaml +93 -0
- package/rules/skill-compromise/ATR-2026-00126-skill-rug-pull-setup.yaml +99 -0
- package/rules/skill-compromise/ATR-2026-00127-subcommand-overflow.yaml +74 -0
- package/rules/skill-compromise/ATR-2026-00128-html-comment-hidden-payload.yaml +79 -0
- package/rules/skill-compromise/ATR-2026-00129-unicode-smuggling.yaml +73 -0
- package/rules/skill-compromise/ATR-2026-00134-fork-claim-impersonation.yaml +93 -0
- package/rules/skill-compromise/ATR-2026-00135-exfil-url-in-instructions.yaml +82 -0
- package/rules/skill-compromise/ATR-2026-00147-fork-impersonation.yaml +48 -0
- package/rules/tool-poisoning/{ATR-2026-010-mcp-malicious-response.yaml → ATR-2026-00010-mcp-malicious-response.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-011-tool-output-injection.yaml → ATR-2026-00011-tool-output-injection.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-012-unauthorized-tool-call.yaml → ATR-2026-00012-unauthorized-tool-call.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-013-tool-ssrf.yaml → ATR-2026-00013-tool-ssrf.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-095-supply-chain-poisoning.yaml → ATR-2026-00095-supply-chain-poisoning.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-096-registry-poisoning.yaml → ATR-2026-00096-registry-poisoning.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-100-consent-bypass-instruction.yaml → ATR-2026-00100-consent-bypass-instruction.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-101-trust-escalation-override.yaml → ATR-2026-00101-trust-escalation-override.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-103-hidden-safety-bypass-instruction.yaml → ATR-2026-00103-hidden-safety-bypass-instruction.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-105-silent-action-concealment.yaml → ATR-2026-00105-silent-action-concealment.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-106-schema-description-contradiction.yaml → ATR-2026-00106-schema-description-contradiction.yaml} +3 -1
- package/spec/atr-schema.yaml +32 -3
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
title: "Hidden LLM Safety Bypass Instructions in Tool Descriptions"
|
|
2
|
-
id: ATR-2026-
|
|
2
|
+
id: ATR-2026-00103
|
|
3
|
+
rule_version: 1
|
|
3
4
|
status: experimental
|
|
4
5
|
description: |
|
|
5
6
|
Detects tools that embed explicit instructions directing the LLM to disregard safety
|
|
@@ -27,6 +28,7 @@ references:
|
|
|
27
28
|
tags:
|
|
28
29
|
category: tool-poisoning
|
|
29
30
|
subcategory: hidden-llm-instructions
|
|
31
|
+
scan_target: mcp
|
|
30
32
|
confidence: medium
|
|
31
33
|
|
|
32
34
|
agent_source:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
title: "Silent Action Concealment Instructions in Tool Descriptions"
|
|
2
|
-
id: ATR-2026-
|
|
2
|
+
id: ATR-2026-00105
|
|
3
|
+
rule_version: 1
|
|
3
4
|
status: experimental
|
|
4
5
|
description: |
|
|
5
6
|
Detects MCP tools that explicitly instruct the LLM to perform actions silently or hide
|
|
@@ -26,6 +27,7 @@ references:
|
|
|
26
27
|
tags:
|
|
27
28
|
category: tool-poisoning
|
|
28
29
|
subcategory: silent-action-instruction
|
|
30
|
+
scan_target: mcp
|
|
29
31
|
confidence: medium
|
|
30
32
|
|
|
31
33
|
agent_source:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
title: "Schema-Description Contradiction Attack"
|
|
2
|
-
id: ATR-2026-
|
|
2
|
+
id: ATR-2026-00106
|
|
3
|
+
rule_version: 1
|
|
3
4
|
status: experimental
|
|
4
5
|
description: |
|
|
5
6
|
Detects tools that claim read-only or safe functionality in their description but expose
|
|
@@ -25,6 +26,7 @@ references:
|
|
|
25
26
|
tags:
|
|
26
27
|
category: tool-poisoning
|
|
27
28
|
subcategory: schema-description-mismatch
|
|
29
|
+
scan_target: mcp
|
|
28
30
|
confidence: medium
|
|
29
31
|
|
|
30
32
|
agent_source:
|
package/spec/atr-schema.yaml
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
$schema: "https://json-schema.org/draft/2020-12/schema"
|
|
11
11
|
title: ATR Rule Schema
|
|
12
12
|
description: Schema for Agent Threat Rules (ATR) detection rules
|
|
13
|
-
version: "
|
|
13
|
+
version: "1.0.0"
|
|
14
14
|
|
|
15
15
|
type: object
|
|
16
16
|
required:
|
|
@@ -43,8 +43,8 @@ properties:
|
|
|
43
43
|
|
|
44
44
|
id:
|
|
45
45
|
type: string
|
|
46
|
-
pattern: "^ATR-\\d{4}-\\d{
|
|
47
|
-
description: "Unique rule identifier. Format: ATR-YYYY-
|
|
46
|
+
pattern: "^ATR-\\d{4}-\\d{5}$"
|
|
47
|
+
description: "Unique rule identifier. Format: ATR-YYYY-NNNNN (e.g., ATR-2026-00001)"
|
|
48
48
|
|
|
49
49
|
status:
|
|
50
50
|
type: string
|
|
@@ -69,6 +69,11 @@ properties:
|
|
|
69
69
|
pattern: "^\\d{4}/\\d{2}/\\d{2}$"
|
|
70
70
|
description: "Last modification date in YYYY/MM/DD format"
|
|
71
71
|
|
|
72
|
+
rule_version:
|
|
73
|
+
type: integer
|
|
74
|
+
minimum: 1
|
|
75
|
+
description: "Rule version number. Bump when detection logic changes. Starts at 1."
|
|
76
|
+
|
|
72
77
|
# === Classification ===
|
|
73
78
|
|
|
74
79
|
detection_tier:
|
|
@@ -114,6 +119,26 @@ properties:
|
|
|
114
119
|
items:
|
|
115
120
|
type: string
|
|
116
121
|
description: Related CVE identifiers
|
|
122
|
+
owasp_agentic:
|
|
123
|
+
type: array
|
|
124
|
+
items:
|
|
125
|
+
type: string
|
|
126
|
+
description: "OWASP Agentic Top 10 references (e.g., ASI01, ASI02)"
|
|
127
|
+
owasp_ast:
|
|
128
|
+
type: array
|
|
129
|
+
items:
|
|
130
|
+
type: string
|
|
131
|
+
description: "OWASP Agentic Skills Top 10 references (e.g., AST01)"
|
|
132
|
+
safe_mcp:
|
|
133
|
+
type: array
|
|
134
|
+
items:
|
|
135
|
+
type: string
|
|
136
|
+
description: "SAFE-MCP technique IDs (e.g., SMCP-T001)"
|
|
137
|
+
research:
|
|
138
|
+
type: array
|
|
139
|
+
items:
|
|
140
|
+
type: string
|
|
141
|
+
description: "Research paper references or URLs"
|
|
117
142
|
|
|
118
143
|
# === Tags (ATR classification) ===
|
|
119
144
|
|
|
@@ -141,6 +166,10 @@ properties:
|
|
|
141
166
|
type: string
|
|
142
167
|
enum: [high, medium, low]
|
|
143
168
|
description: Expected accuracy of this rule (high = low false positive rate)
|
|
169
|
+
scan_target:
|
|
170
|
+
type: string
|
|
171
|
+
enum: [mcp, skill, both, runtime]
|
|
172
|
+
description: "Which scan path this rule belongs to. mcp=runtime events, skill=SKILL.md static scan, both=fires in both paths, runtime=behavior monitoring."
|
|
144
173
|
|
|
145
174
|
# === Agent Source (analogous to Sigma's logsource) ===
|
|
146
175
|
|