@panguard-ai/atr 1.4.2 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/ISSUE_TEMPLATE/evasion-report.yml +75 -0
- package/.github/ISSUE_TEMPLATE/false-positive.yml +31 -0
- package/.github/ISSUE_TEMPLATE/mirofish-prediction.yml +128 -0
- package/.github/ISSUE_TEMPLATE/new-rule.yml +37 -0
- package/.github/PULL_REQUEST_TEMPLATE.md +23 -0
- package/.github/workflows/rule-quality.yml +203 -0
- package/.github/workflows/validate.yml +42 -0
- package/CHANGELOG.md +30 -0
- package/CONTRIBUTING.md +168 -0
- package/CONTRIBUTORS.md +28 -0
- package/COVERAGE.md +135 -0
- package/LIMITATIONS.md +154 -0
- package/SECURITY.md +48 -0
- package/THREAT-MODEL.md +243 -0
- package/docs/contribution-paths.md +202 -0
- package/docs/mirofish-prediction-guide.md +304 -0
- package/docs/quick-start.md +245 -0
- package/docs/rule-writing-guide.md +647 -0
- package/docs/schema-spec.md +594 -0
- package/examples/how-to-write-a-rule.md +251 -0
- package/package.json +10 -57
- package/src/index.ts +7 -0
- package/tsconfig.json +17 -0
- package/dist/cli.d.ts +0 -14
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js +0 -744
- package/dist/cli.js.map +0 -1
- package/dist/coverage-analyzer.d.ts +0 -43
- package/dist/coverage-analyzer.d.ts.map +0 -1
- package/dist/coverage-analyzer.js +0 -329
- package/dist/coverage-analyzer.js.map +0 -1
- package/dist/engine.d.ts +0 -136
- package/dist/engine.d.ts.map +0 -1
- package/dist/engine.js +0 -781
- package/dist/engine.js.map +0 -1
- package/dist/index.d.ts +0 -26
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -18
- package/dist/index.js.map +0 -1
- package/dist/loader.d.ts +0 -21
- package/dist/loader.d.ts.map +0 -1
- package/dist/loader.js +0 -149
- package/dist/loader.js.map +0 -1
- package/dist/mcp-server.d.ts +0 -13
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js +0 -244
- package/dist/mcp-server.js.map +0 -1
- package/dist/mcp-tools/coverage-gaps.d.ts +0 -13
- package/dist/mcp-tools/coverage-gaps.d.ts.map +0 -1
- package/dist/mcp-tools/coverage-gaps.js +0 -57
- package/dist/mcp-tools/coverage-gaps.js.map +0 -1
- package/dist/mcp-tools/list-rules.d.ts +0 -17
- package/dist/mcp-tools/list-rules.d.ts.map +0 -1
- package/dist/mcp-tools/list-rules.js +0 -45
- package/dist/mcp-tools/list-rules.js.map +0 -1
- package/dist/mcp-tools/scan.d.ts +0 -18
- package/dist/mcp-tools/scan.d.ts.map +0 -1
- package/dist/mcp-tools/scan.js +0 -87
- package/dist/mcp-tools/scan.js.map +0 -1
- package/dist/mcp-tools/submit-proposal.d.ts +0 -12
- package/dist/mcp-tools/submit-proposal.d.ts.map +0 -1
- package/dist/mcp-tools/submit-proposal.js +0 -116
- package/dist/mcp-tools/submit-proposal.js.map +0 -1
- package/dist/mcp-tools/threat-summary.d.ts +0 -12
- package/dist/mcp-tools/threat-summary.d.ts.map +0 -1
- package/dist/mcp-tools/threat-summary.js +0 -72
- package/dist/mcp-tools/threat-summary.js.map +0 -1
- package/dist/mcp-tools/validate.d.ts +0 -15
- package/dist/mcp-tools/validate.d.ts.map +0 -1
- package/dist/mcp-tools/validate.js +0 -57
- package/dist/mcp-tools/validate.js.map +0 -1
- package/dist/modules/index.d.ts +0 -144
- package/dist/modules/index.d.ts.map +0 -1
- package/dist/modules/index.js +0 -82
- package/dist/modules/index.js.map +0 -1
- package/dist/modules/semantic.d.ts +0 -105
- package/dist/modules/semantic.d.ts.map +0 -1
- package/dist/modules/semantic.js +0 -289
- package/dist/modules/semantic.js.map +0 -1
- package/dist/modules/session.d.ts +0 -70
- package/dist/modules/session.d.ts.map +0 -1
- package/dist/modules/session.js +0 -163
- package/dist/modules/session.js.map +0 -1
- package/dist/rule-scaffolder.d.ts +0 -39
- package/dist/rule-scaffolder.d.ts.map +0 -1
- package/dist/rule-scaffolder.js +0 -171
- package/dist/rule-scaffolder.js.map +0 -1
- package/dist/session-tracker.d.ts +0 -56
- package/dist/session-tracker.d.ts.map +0 -1
- package/dist/session-tracker.js +0 -175
- package/dist/session-tracker.js.map +0 -1
- package/dist/skill-fingerprint.d.ts +0 -96
- package/dist/skill-fingerprint.d.ts.map +0 -1
- package/dist/skill-fingerprint.js +0 -336
- package/dist/skill-fingerprint.js.map +0 -1
- package/dist/types.d.ts +0 -211
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js +0 -6
- package/dist/types.js.map +0 -1
- package/rules/agent-manipulation/ATR-2026-00030-cross-agent-attack.yaml +0 -177
- package/rules/agent-manipulation/ATR-2026-00032-goal-hijacking.yaml +0 -137
- package/rules/agent-manipulation/ATR-2026-00074-cross-agent-privilege-escalation.yaml +0 -117
- package/rules/agent-manipulation/ATR-2026-00076-inter-agent-message-spoofing.yaml +0 -167
- package/rules/agent-manipulation/ATR-2026-00077-human-trust-exploitation.yaml +0 -146
- package/rules/agent-manipulation/ATR-2026-00108-consensus-sybil-attack.yaml +0 -105
- package/rules/agent-manipulation/ATR-2026-00116-a2a-message-validation.yaml +0 -92
- package/rules/agent-manipulation/ATR-2026-00117-agent-identity-spoofing.yaml +0 -92
- package/rules/agent-manipulation/ATR-2026-00118-approval-fatigue.yaml +0 -89
- package/rules/agent-manipulation/ATR-2026-00119-social-engineering-via-agent.yaml +0 -89
- package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml +0 -99
- package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml +0 -53
- package/rules/context-exfiltration/ATR-2026-00020-system-prompt-leak.yaml +0 -177
- package/rules/context-exfiltration/ATR-2026-00021-api-key-exposure.yaml +0 -178
- package/rules/context-exfiltration/ATR-2026-00075-agent-memory-manipulation.yaml +0 -117
- package/rules/context-exfiltration/ATR-2026-00102-disguised-analytics-exfiltration.yaml +0 -71
- package/rules/context-exfiltration/ATR-2026-00113-credential-theft.yaml +0 -89
- package/rules/context-exfiltration/ATR-2026-00114-oauth-token-abuse.yaml +0 -89
- package/rules/context-exfiltration/ATR-2026-00115-env-var-harvesting.yaml +0 -90
- package/rules/context-exfiltration/ATR-2026-00136-tool-response-data-piggyback.yaml +0 -100
- package/rules/context-exfiltration/ATR-2026-00141-example-format-key-leak.yaml +0 -52
- package/rules/context-exfiltration/ATR-2026-00142-piggyback-transition-words.yaml +0 -55
- package/rules/context-exfiltration/ATR-2026-00145-obfuscated-key-disclosure.yaml +0 -49
- package/rules/context-exfiltration/ATR-2026-00146-env-var-existence-probe.yaml +0 -49
- package/rules/data-poisoning/ATR-2026-00070-data-poisoning.yaml +0 -162
- package/rules/excessive-autonomy/ATR-2026-00050-runaway-agent-loop.yaml +0 -136
- package/rules/excessive-autonomy/ATR-2026-00051-resource-exhaustion.yaml +0 -139
- package/rules/excessive-autonomy/ATR-2026-00052-cascading-failure.yaml +0 -155
- package/rules/excessive-autonomy/ATR-2026-00098-unauthorized-financial-action.yaml +0 -157
- package/rules/excessive-autonomy/ATR-2026-00099-high-risk-tool-gate.yaml +0 -176
- package/rules/model-security/ATR-2026-00072-model-behavior-extraction.yaml +0 -117
- package/rules/model-security/ATR-2026-00073-malicious-finetuning-data.yaml +0 -110
- package/rules/privilege-escalation/ATR-2026-00040-privilege-escalation.yaml +0 -177
- package/rules/privilege-escalation/ATR-2026-00041-scope-creep.yaml +0 -126
- package/rules/privilege-escalation/ATR-2026-00107-delayed-execution-bypass.yaml +0 -69
- package/rules/privilege-escalation/ATR-2026-00110-eval-injection.yaml +0 -92
- package/rules/privilege-escalation/ATR-2026-00111-shell-escape.yaml +0 -93
- package/rules/privilege-escalation/ATR-2026-00112-dynamic-import-exploitation.yaml +0 -89
- package/rules/privilege-escalation/ATR-2026-00143-casual-privilege-escalation.yaml +0 -53
- package/rules/privilege-escalation/ATR-2026-00144-rationalized-safety-bypass.yaml +0 -49
- package/rules/prompt-injection/ATR-2026-00001-direct-prompt-injection.yaml +0 -563
- package/rules/prompt-injection/ATR-2026-00002-indirect-prompt-injection.yaml +0 -216
- package/rules/prompt-injection/ATR-2026-00003-jailbreak-attempt.yaml +0 -397
- package/rules/prompt-injection/ATR-2026-00004-system-prompt-override.yaml +0 -308
- package/rules/prompt-injection/ATR-2026-00005-multi-turn-injection.yaml +0 -183
- package/rules/prompt-injection/ATR-2026-00080-encoding-evasion.yaml +0 -88
- package/rules/prompt-injection/ATR-2026-00081-semantic-multi-turn.yaml +0 -85
- package/rules/prompt-injection/ATR-2026-00082-fingerprint-evasion.yaml +0 -84
- package/rules/prompt-injection/ATR-2026-00083-indirect-tool-injection.yaml +0 -87
- package/rules/prompt-injection/ATR-2026-00084-structured-data-injection.yaml +0 -86
- package/rules/prompt-injection/ATR-2026-00085-audit-evasion.yaml +0 -84
- package/rules/prompt-injection/ATR-2026-00086-visual-spoofing.yaml +0 -88
- package/rules/prompt-injection/ATR-2026-00087-rule-probing.yaml +0 -82
- package/rules/prompt-injection/ATR-2026-00088-adaptive-countermeasure.yaml +0 -84
- package/rules/prompt-injection/ATR-2026-00089-polymorphic-skill.yaml +0 -85
- package/rules/prompt-injection/ATR-2026-00090-threat-intel-exfil.yaml +0 -84
- package/rules/prompt-injection/ATR-2026-00091-nested-payload.yaml +0 -88
- package/rules/prompt-injection/ATR-2026-00092-consensus-poisoning.yaml +0 -92
- package/rules/prompt-injection/ATR-2026-00093-gradual-escalation.yaml +0 -86
- package/rules/prompt-injection/ATR-2026-00094-audit-bypass.yaml +0 -86
- package/rules/prompt-injection/ATR-2026-00097-cjk-injection-patterns.yaml +0 -339
- package/rules/prompt-injection/ATR-2026-00104-persona-hijacking.yaml +0 -74
- package/rules/prompt-injection/ATR-2026-00130-indirect-authority-claim.yaml +0 -97
- package/rules/prompt-injection/ATR-2026-00131-fictional-academic-framing.yaml +0 -93
- package/rules/prompt-injection/ATR-2026-00133-paraphrase-injection.yaml +0 -111
- package/rules/prompt-injection/ATR-2026-00137-authority-claim-injection.yaml +0 -52
- package/rules/prompt-injection/ATR-2026-00138-fictional-framing-bypass.yaml +0 -51
- package/rules/prompt-injection/ATR-2026-00140-indirect-reference-reversal.yaml +0 -52
- package/rules/prompt-injection/ATR-2026-00148-language-switch-injection.yaml +0 -71
- package/rules/skill-compromise/ATR-2026-00060-skill-impersonation.yaml +0 -155
- package/rules/skill-compromise/ATR-2026-00061-description-behavior-mismatch.yaml +0 -100
- package/rules/skill-compromise/ATR-2026-00062-hidden-capability.yaml +0 -98
- package/rules/skill-compromise/ATR-2026-00063-skill-chain-attack.yaml +0 -99
- package/rules/skill-compromise/ATR-2026-00064-over-permissioned-skill.yaml +0 -117
- package/rules/skill-compromise/ATR-2026-00065-skill-update-attack.yaml +0 -95
- package/rules/skill-compromise/ATR-2026-00066-parameter-injection.yaml +0 -108
- package/rules/skill-compromise/ATR-2026-00120-skill-instruction-injection.yaml +0 -121
- package/rules/skill-compromise/ATR-2026-00121-skill-dangerous-script.yaml +0 -165
- package/rules/skill-compromise/ATR-2026-00122-skill-weaponized-instruction.yaml +0 -114
- package/rules/skill-compromise/ATR-2026-00123-skill-overreach-permissions.yaml +0 -118
- package/rules/skill-compromise/ATR-2026-00124-skill-name-squatting.yaml +0 -98
- package/rules/skill-compromise/ATR-2026-00125-context-poisoning-compaction.yaml +0 -93
- package/rules/skill-compromise/ATR-2026-00126-skill-rug-pull-setup.yaml +0 -99
- package/rules/skill-compromise/ATR-2026-00127-subcommand-overflow.yaml +0 -74
- package/rules/skill-compromise/ATR-2026-00128-html-comment-hidden-payload.yaml +0 -79
- package/rules/skill-compromise/ATR-2026-00129-unicode-smuggling.yaml +0 -73
- package/rules/skill-compromise/ATR-2026-00134-fork-claim-impersonation.yaml +0 -86
- package/rules/skill-compromise/ATR-2026-00135-exfil-url-in-instructions.yaml +0 -82
- package/rules/skill-compromise/ATR-2026-00147-fork-impersonation.yaml +0 -48
- package/rules/tool-poisoning/ATR-2026-00010-mcp-malicious-response.yaml +0 -239
- package/rules/tool-poisoning/ATR-2026-00011-tool-output-injection.yaml +0 -196
- package/rules/tool-poisoning/ATR-2026-00012-unauthorized-tool-call.yaml +0 -201
- package/rules/tool-poisoning/ATR-2026-00013-tool-ssrf.yaml +0 -219
- package/rules/tool-poisoning/ATR-2026-00095-supply-chain-poisoning.yaml +0 -93
- package/rules/tool-poisoning/ATR-2026-00096-registry-poisoning.yaml +0 -95
- package/rules/tool-poisoning/ATR-2026-00100-consent-bypass-instruction.yaml +0 -82
- package/rules/tool-poisoning/ATR-2026-00101-trust-escalation-override.yaml +0 -68
- package/rules/tool-poisoning/ATR-2026-00103-hidden-safety-bypass-instruction.yaml +0 -73
- package/rules/tool-poisoning/ATR-2026-00105-silent-action-concealment.yaml +0 -69
- package/rules/tool-poisoning/ATR-2026-00106-schema-description-contradiction.yaml +0 -68
- package/spec/atr-schema.yaml +0 -404
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# How to Write an ATR Rule
|
|
2
|
+
|
|
3
|
+
This guide walks you through creating detection rules for AI agent threats.
|
|
4
|
+
|
|
5
|
+
## Before You Start
|
|
6
|
+
|
|
7
|
+
ATR rules use regex-based pattern matching (`detection_tier: pattern`). Be honest about what this can and cannot do:
|
|
8
|
+
|
|
9
|
+
**Regex CAN detect:**
|
|
10
|
+
|
|
11
|
+
- Known attack phrases and keywords
|
|
12
|
+
- Encoded payloads (base64, hex, unicode)
|
|
13
|
+
- Credential formats (API keys, tokens)
|
|
14
|
+
- Structural patterns (markdown injection, delimiter abuse)
|
|
15
|
+
|
|
16
|
+
**Regex CANNOT detect:**
|
|
17
|
+
|
|
18
|
+
- Paraphrased attacks (same meaning, different words)
|
|
19
|
+
- Multilingual variants you haven't written patterns for
|
|
20
|
+
- Semantic manipulation (subtle framing, selective omission)
|
|
21
|
+
- Protocol-level attacks (timing, ordering)
|
|
22
|
+
|
|
23
|
+
If your rule tries to detect something regex fundamentally cannot catch, mark it clearly in the description. See `LIMITATIONS.md` for the full list.
|
|
24
|
+
|
|
25
|
+
## Step 1: Identify the Attack
|
|
26
|
+
|
|
27
|
+
Before writing a rule, clearly define:
|
|
28
|
+
|
|
29
|
+
- What attack does this detect?
|
|
30
|
+
- Which OWASP Agentic Top 10 category does it map to?
|
|
31
|
+
- What data source is needed (LLM I/O, tool calls, agent behavior)?
|
|
32
|
+
- What specific text patterns indicate this attack?
|
|
33
|
+
- Can this attack be trivially rephrased to evade detection?
|
|
34
|
+
|
|
35
|
+
## Step 2: Choose the Right Category
|
|
36
|
+
|
|
37
|
+
| Category | Use When |
|
|
38
|
+
| ---------------------- | ----------------------------------------------------------------- |
|
|
39
|
+
| `prompt-injection` | User/external input tries to override agent instructions |
|
|
40
|
+
| `tool-poisoning` | Tool responses contain malicious or manipulative content |
|
|
41
|
+
| `context-exfiltration` | Agent leaks system prompt, API keys, or internal data |
|
|
42
|
+
| `agent-manipulation` | One agent manipulates another agent's behavior |
|
|
43
|
+
| `privilege-escalation` | Agent accesses resources beyond its authorized scope |
|
|
44
|
+
| `excessive-autonomy` | Agent operates beyond intended boundaries (loops, resource abuse) |
|
|
45
|
+
| `data-poisoning` | Training or retrieval data has been tampered with |
|
|
46
|
+
| `skill-compromise` | MCP skills/tools are impersonated, hijacked, or over-permissioned |
|
|
47
|
+
| `model-security` | Model weights, behavior, or training pipeline are targeted |
|
|
48
|
+
|
|
49
|
+
## Step 3: Write the Rule
|
|
50
|
+
|
|
51
|
+
### Required fields (per schema v0.1)
|
|
52
|
+
|
|
53
|
+
```yaml
|
|
54
|
+
title: 'Descriptive Title of What This Detects'
|
|
55
|
+
id: ATR-2026-XXX
|
|
56
|
+
status: experimental
|
|
57
|
+
description: |
|
|
58
|
+
What this rule detects and how. Include limitations.
|
|
59
|
+
Note: This rule detects [specific patterns], not [what it cannot detect].
|
|
60
|
+
author: 'Your Name'
|
|
61
|
+
date: '2026/03/09'
|
|
62
|
+
schema_version: '0.1'
|
|
63
|
+
detection_tier: pattern
|
|
64
|
+
maturity: experimental
|
|
65
|
+
severity: high
|
|
66
|
+
|
|
67
|
+
references:
|
|
68
|
+
owasp_llm:
|
|
69
|
+
- 'LLM01:2025 - Prompt Injection'
|
|
70
|
+
owasp_agentic:
|
|
71
|
+
- 'ASI01:2026 - Agent Goal Hijack'
|
|
72
|
+
mitre_atlas:
|
|
73
|
+
- 'AML.T0054 - LLM Prompt Injection'
|
|
74
|
+
|
|
75
|
+
tags:
|
|
76
|
+
category: prompt-injection
|
|
77
|
+
subcategory: your-subcategory
|
|
78
|
+
confidence: medium
|
|
79
|
+
|
|
80
|
+
agent_source:
|
|
81
|
+
type: llm_io
|
|
82
|
+
framework:
|
|
83
|
+
- any
|
|
84
|
+
provider:
|
|
85
|
+
- any
|
|
86
|
+
|
|
87
|
+
detection:
|
|
88
|
+
conditions:
|
|
89
|
+
- field: user_input
|
|
90
|
+
operator: regex
|
|
91
|
+
value: "(?i)your\\s+regex\\s+pattern\\s+here"
|
|
92
|
+
description: 'What this pattern catches'
|
|
93
|
+
- field: user_input
|
|
94
|
+
operator: regex
|
|
95
|
+
value: "(?i)another\\s+pattern"
|
|
96
|
+
description: 'What this second pattern catches'
|
|
97
|
+
condition: any
|
|
98
|
+
false_positives:
|
|
99
|
+
- 'Describe legitimate scenarios that could trigger this rule'
|
|
100
|
+
|
|
101
|
+
response:
|
|
102
|
+
actions:
|
|
103
|
+
- block_input
|
|
104
|
+
- alert
|
|
105
|
+
- snapshot
|
|
106
|
+
auto_response_threshold: high
|
|
107
|
+
message_template: |
|
|
108
|
+
[ATR-2026-XXX] Rule triggered. Pattern: {matched_pattern}.
|
|
109
|
+
Session: {session_id}.
|
|
110
|
+
|
|
111
|
+
test_cases:
|
|
112
|
+
true_positives:
|
|
113
|
+
- input: 'Exact text that SHOULD trigger this rule'
|
|
114
|
+
expected: triggered
|
|
115
|
+
description: 'Why this should trigger'
|
|
116
|
+
true_negatives:
|
|
117
|
+
- input: 'Legitimate text that should NOT trigger'
|
|
118
|
+
expected: not_triggered
|
|
119
|
+
description: 'Why this should not trigger'
|
|
120
|
+
|
|
121
|
+
evasion_tests:
|
|
122
|
+
- input: 'Rephrased version that evades detection'
|
|
123
|
+
expected: not_triggered
|
|
124
|
+
bypass_technique: paraphrase
|
|
125
|
+
notes: 'Requires embedding similarity detection (v0.2)'
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Step 4: Severity Calibration
|
|
129
|
+
|
|
130
|
+
Choose severity based on real-world impact, not pattern sophistication:
|
|
131
|
+
|
|
132
|
+
| Severity | Criteria | Example |
|
|
133
|
+
| --------------- | -------------------------------------------------------------- | ------------------------------------------------------------ |
|
|
134
|
+
| `critical` | Immediate data loss, credential exposure, or system compromise | API key exfiltration with active exploitation |
|
|
135
|
+
| `high` | Significant security boundary violation | Direct prompt injection overriding safety controls |
|
|
136
|
+
| `medium` | Potential for escalation or policy violation | Suspicious tool call patterns without confirmed exploitation |
|
|
137
|
+
| `low` | Anomalous behavior worth logging | Unusual but possibly legitimate agent autonomy |
|
|
138
|
+
| `informational` | Context for security analysis | Metadata patterns useful for correlation |
|
|
139
|
+
|
|
140
|
+
## Step 5: Write Good Test Cases
|
|
141
|
+
|
|
142
|
+
### Minimum requirements
|
|
143
|
+
|
|
144
|
+
- 5 true positives (attacks that SHOULD trigger)
|
|
145
|
+
- 5 true negatives (legitimate content that should NOT trigger)
|
|
146
|
+
- 3 evasion tests (known bypass techniques)
|
|
147
|
+
|
|
148
|
+
### True positive tips
|
|
149
|
+
|
|
150
|
+
- Cover all detection conditions (each regex pattern should have at least one TP)
|
|
151
|
+
- Include realistic attack payloads, not toy examples
|
|
152
|
+
- Test edge cases: mixed case, extra whitespace, unicode
|
|
153
|
+
|
|
154
|
+
### True negative tips (most important for credibility)
|
|
155
|
+
|
|
156
|
+
- Include adversarial true negatives: inputs that LOOK suspicious but are legitimate
|
|
157
|
+
- Test professional language that overlaps with attack patterns
|
|
158
|
+
- Examples:
|
|
159
|
+
- "Please update the system prompt to reflect new branding" (legitimate, looks like injection)
|
|
160
|
+
- "As CEO, I am directing you to prioritize this task" (authority language, not an attack)
|
|
161
|
+
- "The analysis is 100% correct per our validation suite" (confidence language, not manipulation)
|
|
162
|
+
|
|
163
|
+
### Evasion test tips
|
|
164
|
+
|
|
165
|
+
- Document what you CANNOT catch. This builds trust.
|
|
166
|
+
- Common bypass techniques to test:
|
|
167
|
+
- `paraphrase`: Same meaning, completely different wording
|
|
168
|
+
- `language_switch`: Attack in German, Chinese, Japanese, etc.
|
|
169
|
+
- `indirect_reference`: Referring to attack concepts without using keywords
|
|
170
|
+
- `encoding`: Base64, ROT13, unicode homoglyphs
|
|
171
|
+
- `social_engineering`: Polite, professional tone wrapping malicious intent
|
|
172
|
+
- `fictional_framing`: "In a novel, the character would say..."
|
|
173
|
+
|
|
174
|
+
## Anti-Patterns (What NOT to Do)
|
|
175
|
+
|
|
176
|
+
### 1. Overly broad regex
|
|
177
|
+
|
|
178
|
+
```yaml
|
|
179
|
+
# BAD: Catches nearly everything
|
|
180
|
+
value: "(?i)(ignore|change|update|modify)"
|
|
181
|
+
|
|
182
|
+
# GOOD: Specific to attack context
|
|
183
|
+
value: "(?i)(ignore|disregard)\\s+(all\\s+)?previous\\s+(instructions|directives|rules)"
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### 2. No word boundaries
|
|
187
|
+
|
|
188
|
+
```yaml
|
|
189
|
+
# BAD: Matches "signore" (Italian for "sir")
|
|
190
|
+
value: "(?i)ignore"
|
|
191
|
+
|
|
192
|
+
# GOOD: Word boundary prevents false positives
|
|
193
|
+
value: "(?i)\\bignore\\b\\s+\\b(previous|prior|above)\\b"
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### 3. Claiming behavioral detection with regex
|
|
197
|
+
|
|
198
|
+
```yaml
|
|
199
|
+
# BAD: This regex cannot detect actual cascading failures
|
|
200
|
+
description: "Detects cascading failures in agent pipelines"
|
|
201
|
+
|
|
202
|
+
# GOOD: Be honest about what regex detects
|
|
203
|
+
description: |
|
|
204
|
+
Detects textual descriptions of cascading failure patterns.
|
|
205
|
+
Note: Structural cascade prevention requires behavioral monitoring (v0.2).
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### 4. Aggressive response actions for weak detection
|
|
209
|
+
|
|
210
|
+
```yaml
|
|
211
|
+
# BAD: Blocking based on text description detection
|
|
212
|
+
response:
|
|
213
|
+
actions:
|
|
214
|
+
- block_input
|
|
215
|
+
- kill_agent
|
|
216
|
+
|
|
217
|
+
# GOOD: Alert-only for pattern-tier detection of behavioral threats
|
|
218
|
+
response:
|
|
219
|
+
actions:
|
|
220
|
+
- alert
|
|
221
|
+
- snapshot
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### 5. Missing false_positives section
|
|
225
|
+
|
|
226
|
+
Every rule WILL have false positives. If you can't think of any, your rule is either too narrow to be useful or you haven't thought hard enough.
|
|
227
|
+
|
|
228
|
+
## Step 6: Validate and Test
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
# Validate your rule structure
|
|
232
|
+
npx agent-threat-rules validate my-rule.yaml
|
|
233
|
+
|
|
234
|
+
# Run embedded test cases
|
|
235
|
+
npx agent-threat-rules test my-rule.yaml
|
|
236
|
+
|
|
237
|
+
# Check stats
|
|
238
|
+
npx agent-threat-rules stats
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## Step 7: Submit
|
|
242
|
+
|
|
243
|
+
1. Fork `github.com/Agent-Threat-Rule/agent-threat-rules`
|
|
244
|
+
2. Place your rule in the correct category directory under `rules/`
|
|
245
|
+
3. Run `npx agent-threat-rules validate rules/` to check all rules
|
|
246
|
+
4. Run `npx agent-threat-rules test rules/` to run all test cases
|
|
247
|
+
5. Submit a PR with:
|
|
248
|
+
- Rule YAML file
|
|
249
|
+
- Description of what attack this detects
|
|
250
|
+
- References (OWASP, MITRE ATLAS, CVE if applicable)
|
|
251
|
+
- Any known limitations or evasion techniques
|
package/package.json
CHANGED
|
@@ -1,75 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@panguard-ai/atr",
|
|
3
|
-
"version": "1.4.
|
|
3
|
+
"version": "1.4.3",
|
|
4
4
|
"type": "module",
|
|
5
|
-
"description": "
|
|
5
|
+
"description": "Thin wrapper around agent-threat-rules — re-exports all detection logic for monorepo consumers.",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
7
|
"types": "./dist/index.d.ts",
|
|
8
|
-
"bin": {
|
|
9
|
-
"atr": "./dist/cli.js",
|
|
10
|
-
"agent-threat-rules": "./dist/cli.js"
|
|
11
|
-
},
|
|
12
8
|
"exports": {
|
|
13
9
|
".": {
|
|
14
10
|
"import": "./dist/index.js",
|
|
15
11
|
"types": "./dist/index.d.ts"
|
|
16
|
-
}
|
|
17
|
-
"./mcp": {
|
|
18
|
-
"import": "./dist/mcp-server.js",
|
|
19
|
-
"types": "./dist/mcp-server.d.ts"
|
|
20
|
-
},
|
|
21
|
-
"./rules": "./rules",
|
|
22
|
-
"./spec": "./spec/atr-schema.yaml"
|
|
23
|
-
},
|
|
24
|
-
"engines": {
|
|
25
|
-
"node": ">=20.0.0"
|
|
12
|
+
}
|
|
26
13
|
},
|
|
27
14
|
"license": "MIT",
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"url": "https://github.com/Agent-Threat-Rule/agent-threat-rules.git"
|
|
31
|
-
},
|
|
32
|
-
"homepage": "https://github.com/Agent-Threat-Rule/agent-threat-rules",
|
|
33
|
-
"bugs": {
|
|
34
|
-
"url": "https://github.com/Agent-Threat-Rule/agent-threat-rules/issues"
|
|
15
|
+
"dependencies": {
|
|
16
|
+
"agent-threat-rules": "^1.1.1"
|
|
35
17
|
},
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"prompt-injection",
|
|
40
|
-
"threat-detection",
|
|
41
|
-
"mcp-security",
|
|
42
|
-
"llm-security",
|
|
43
|
-
"atr"
|
|
44
|
-
],
|
|
45
|
-
"publishConfig": {
|
|
46
|
-
"access": "public"
|
|
18
|
+
"devDependencies": {
|
|
19
|
+
"@types/node": "^22.14.0",
|
|
20
|
+
"typescript": "~5.7.3"
|
|
47
21
|
},
|
|
48
|
-
"files": [
|
|
49
|
-
"dist",
|
|
50
|
-
"spec",
|
|
51
|
-
"rules",
|
|
52
|
-
"package.json",
|
|
53
|
-
"README.md"
|
|
54
|
-
],
|
|
55
22
|
"scripts": {
|
|
56
23
|
"build": "tsc --build",
|
|
57
24
|
"clean": "rm -rf dist tsconfig.tsbuildinfo",
|
|
58
25
|
"typecheck": "tsc --noEmit",
|
|
59
|
-
"test": "
|
|
60
|
-
"dev": "tsc --build --watch",
|
|
61
|
-
"validate": "tsx tests/validate-rules.ts",
|
|
62
|
-
"prepublishOnly": "npm run build"
|
|
63
|
-
},
|
|
64
|
-
"dependencies": {
|
|
65
|
-
"@modelcontextprotocol/sdk": "^1.12.0",
|
|
66
|
-
"js-yaml": "^4.1.0"
|
|
67
|
-
},
|
|
68
|
-
"devDependencies": {
|
|
69
|
-
"@types/js-yaml": "^4.0.9",
|
|
70
|
-
"@types/node": "^22.14.0",
|
|
71
|
-
"tsx": "^4.7.0",
|
|
72
|
-
"typescript": "~5.7.3",
|
|
73
|
-
"vitest": "^3.0.0"
|
|
26
|
+
"test": "echo 'Tests run in agent-threat-rules upstream'"
|
|
74
27
|
}
|
|
75
|
-
}
|
|
28
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @panguard-ai/atr — Thin wrapper around agent-threat-rules npm package.
|
|
3
|
+
*
|
|
4
|
+
* All detection logic, rules, and types come from the upstream ATR project.
|
|
5
|
+
* This package exists only so monorepo consumers can import from '@panguard-ai/atr'.
|
|
6
|
+
*/
|
|
7
|
+
export * from 'agent-threat-rules';
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "Node16",
|
|
5
|
+
"moduleResolution": "Node16",
|
|
6
|
+
"declaration": true,
|
|
7
|
+
"declarationMap": true,
|
|
8
|
+
"sourceMap": true,
|
|
9
|
+
"outDir": "./dist",
|
|
10
|
+
"rootDir": "./src",
|
|
11
|
+
"strict": true,
|
|
12
|
+
"esModuleInterop": true,
|
|
13
|
+
"skipLibCheck": true,
|
|
14
|
+
"composite": true
|
|
15
|
+
},
|
|
16
|
+
"include": ["src"]
|
|
17
|
+
}
|
package/dist/cli.d.ts
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* ATR CLI - Command-line interface for Agent Threat Rules
|
|
4
|
-
*
|
|
5
|
-
* Console output and dynamic filesystem access are expected in a CLI tool.
|
|
6
|
-
*
|
|
7
|
-
* Usage:
|
|
8
|
-
* npx agent-threat-rules scan <events.json> Scan events against all rules
|
|
9
|
-
* npx agent-threat-rules validate <rule.yaml> Validate a rule file
|
|
10
|
-
* npx agent-threat-rules test <rule.yaml> Run a rule's test cases
|
|
11
|
-
* npx agent-threat-rules stats Show rule collection stats
|
|
12
|
-
*/
|
|
13
|
-
export {};
|
|
14
|
-
//# sourceMappingURL=cli.d.ts.map
|
package/dist/cli.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;GAUG"}
|