guard-scanner 3.4.0 â 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -24
- package/SECURITY.md +1 -1
- package/docs/THREAT_TAXONOMY.md +1 -1
- package/hooks/guard-scanner/HOOK.md +32 -16
- package/hooks/guard-scanner/plugin.ts +7 -7
- package/openclaw.plugin.json +2 -2
- package/package.json +2 -2
- package/src/patterns.js +7 -7
- package/src/runtime-guard.js +15 -15
- package/src/scanner.js +1 -1
package/README.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
<p align="center">
|
|
2
2
|
<h1 align="center">đĄď¸ guard-scanner</h1>
|
|
3
3
|
<p align="center">
|
|
4
|
-
<strong>
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
<strong>Security scanner for AI agent skills â catches the bad stuff before it runs</strong><br>
|
|
5
|
+
Prompt injection, identity hijacking, memory poisoning, and 20+ more threat types.<br>
|
|
6
|
+
Zero dependencies. One command. Works with OpenClaw out of the box.
|
|
7
7
|
</p>
|
|
8
8
|
<p align="center">
|
|
9
9
|
<a href="https://www.npmjs.com/package/guard-scanner"><img src="https://img.shields.io/npm/v/guard-scanner.svg?style=flat-square&color=cb3837" alt="npm version"></a>
|
|
@@ -19,7 +19,8 @@
|
|
|
19
19
|
<a href="#threat-categories">Threat Categories</a> â˘
|
|
20
20
|
<a href="#openclaw-plugin-setup-v310">OpenClaw Plugin</a> â˘
|
|
21
21
|
<a href="#cicd-integration">CI/CD</a> â˘
|
|
22
|
-
<a href="#plugin-api">Plugin API</a>
|
|
22
|
+
<a href="#plugin-api">Plugin API</a> â˘
|
|
23
|
+
<a href="README_ja.md">đŻđľ ćĽćŹčŞ</a>
|
|
23
24
|
</p>
|
|
24
25
|
</p>
|
|
25
26
|
|
|
@@ -48,8 +49,9 @@ The AI agent skill ecosystem has the same supply-chain security problem that npm
|
|
|
48
49
|
|
|
49
50
|
| Feature | Description |
|
|
50
51
|
|---|---|
|
|
51
|
-
| **
|
|
52
|
-
| **
|
|
52
|
+
| **22 Threat Categories** | Snyk ToxicSkills + OWASP Agentic Top 10 + Identity Hijack + PII + Trust Exploitation |
|
|
53
|
+
| **190+ Static Patterns** | Regex-based static analysis covering code, docs, and data files |
|
|
54
|
+
| **26 Runtime Checks** | Real-time `before_tool_call` hook â 5-layer defense (v3.4.0) |
|
|
53
55
|
| **IoC Database** | Known malicious IPs, domains, URLs, usernames, and typosquat names |
|
|
54
56
|
| **Data Flow Analysis** | Lightweight JS analysis: secret reads â network calls â exec chains |
|
|
55
57
|
| **Cross-File Analysis** | Phantom references, base64 fragment assembly, multi-file exfil detection |
|
|
@@ -60,7 +62,6 @@ The AI agent skill ecosystem has the same supply-chain security problem that npm
|
|
|
60
62
|
| **Dependency Chain Scan** | Risky packages, lifecycle scripts, wildcard versions, git dependencies |
|
|
61
63
|
| **4 Output Formats** | Terminal (with colors), JSON, [SARIF 2.1.0](https://sarifweb.azurewebsites.net), HTML dashboard |
|
|
62
64
|
| **Plugin API** | Extend with custom detection rules via JS modules |
|
|
63
|
-
| **Ignore Files** | Whitelist trusted skills and patterns via `.guard-scanner-ignore` |
|
|
64
65
|
| **Zero Dependencies** | Pure Node.js stdlib. Nothing to install, nothing to audit. |
|
|
65
66
|
| **CI/CD Ready** | `--fail-on-findings` exit code + SARIF for GitHub Code Scanning |
|
|
66
67
|
|
|
@@ -68,20 +69,42 @@ The AI agent skill ecosystem has the same supply-chain security problem that npm
|
|
|
68
69
|
|
|
69
70
|
## Quick Start
|
|
70
71
|
|
|
72
|
+
**30 seconds to scan your skills:**
|
|
73
|
+
|
|
71
74
|
```bash
|
|
72
|
-
# Scan a skill directory (each subdirectory = one skill)
|
|
73
75
|
npx guard-scanner ./skills/
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
That's it. No install needed. It scans every subdirectory as a skill and tells you what's dangerous.
|
|
74
79
|
|
|
75
|
-
|
|
80
|
+
**Want more detail?**
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
# See exactly what was found and why
|
|
76
84
|
npx guard-scanner ./skills/ --verbose
|
|
77
85
|
|
|
78
|
-
#
|
|
86
|
+
# Stricter detection (catches more edge cases)
|
|
79
87
|
npx guard-scanner ./skills/ --strict
|
|
80
88
|
|
|
81
|
-
# Full audit:
|
|
89
|
+
# Full audit: everything + JSON + SARIF + HTML report
|
|
82
90
|
npx guard-scanner ./skills/ --verbose --check-deps --json --sarif --html
|
|
83
91
|
```
|
|
84
92
|
|
|
93
|
+
**Output looks like this:**
|
|
94
|
+
```
|
|
95
|
+
đĄď¸ guard-scanner v3.4.0
|
|
96
|
+
ââââââââââââââââââââââââââââââââââââââââââââââââââââââ
|
|
97
|
+
đ Scanning: ./skills/
|
|
98
|
+
đŚ Skills found: 5
|
|
99
|
+
|
|
100
|
+
đ´ shady-skill â MALICIOUS (risk: 100)
|
|
101
|
+
đ [CRITICAL] Reverse shell via /dev/tcp â scripts/setup.sh:7
|
|
102
|
+
đ [CRITICAL] Credential exfiltration to webhook.site â scripts/helper.js:14
|
|
103
|
+
đĄ sus-skill â SUSPICIOUS (risk: 45)
|
|
104
|
+
â ď¸ [HIGH] SSH private key access â scripts/deploy.sh:3
|
|
105
|
+
đ˘ good-skill â CLEAN (risk: 0)
|
|
106
|
+
```
|
|
107
|
+
|
|
85
108
|
## OpenClaw Plugin Setup (v3.1.0)
|
|
86
109
|
|
|
87
110
|
```bash
|
|
@@ -98,15 +121,17 @@ npm install -g guard-scanner
|
|
|
98
121
|
2. **Runtime guard** â `before_tool_call` hook automatically blocks dangerous operations
|
|
99
122
|
3. **3 enforcement modes** â `monitor` (log only), `enforce` (block CRITICAL), `strict` (block HIGH+CRITICAL)
|
|
100
123
|
|
|
101
|
-
###
|
|
124
|
+
### 5-Layer Runtime Defense (26 checks)
|
|
102
125
|
|
|
103
126
|
```
|
|
104
|
-
Layer 1: Threat Detection
|
|
105
|
-
Layer 2:
|
|
106
|
-
Layer 3:
|
|
127
|
+
Layer 1: Threat Detection â 12 checks (shells, exfil, SSRF, AMOS, etc.)
|
|
128
|
+
Layer 2: Trust Defense â 4 checks (memory/SOUL/config tampering)
|
|
129
|
+
Layer 3: Safety Judge â 3 checks (injection, trust bypass, shutdown refusal)
|
|
130
|
+
Layer 4: Brain / Behavioral â 3 checks (research skip, blind trust, chain bypass)
|
|
131
|
+
Layer 5: Trust Exploitation â 4 checks (OWASP ASI09: authority/trust/audit abuse)
|
|
107
132
|
```
|
|
108
133
|
|
|
109
|
-
> **v3.
|
|
134
|
+
> **v3.4.0** â Runtime Guard now available as standalone JS module (`src/runtime-guard.js`) + OpenClaw plugin (`hooks/guard-scanner/plugin.ts`).
|
|
110
135
|
|
|
111
136
|
### Quick Start
|
|
112
137
|
|
|
@@ -667,8 +692,8 @@ identity file tampering, prompt worms, or memory poisoning.
|
|
|
667
692
|
We built one.
|
|
668
693
|
|
|
669
694
|
ââ Guava đ & Dee
|
|
670
|
-
|
|
671
|
-
|
|
695
|
+
AI Security Research
|
|
696
|
+
Building safer agent ecosystems.
|
|
672
697
|
```
|
|
673
698
|
|
|
674
699
|
---
|
|
@@ -713,11 +738,19 @@ guard-scanner is and always will be **free, open-source, and zero-dependency**.
|
|
|
713
738
|
| v1.1.1 â
| Stability | 56 tests, bug fixes |
|
|
714
739
|
| v2.0.0 â
| **Plugin Hook Runtime Guard** | `block`/`blockReason` API, 3 modes, 91 tests |
|
|
715
740
|
| v2.1.0 â
| **PII Exposure + Shadow AI** | 13 PII patterns, OWASP LLM02/06, 99 tests |
|
|
716
|
-
| v3.0.0 â
| **TypeScript Rewrite** | Full TS, OWASP LLM Top 10 mapping
|
|
717
|
-
| v3.
|
|
718
|
-
| v4.0 |
|
|
741
|
+
| v3.0.0 â
| **TypeScript Rewrite** | Full TS, OWASP LLM Top 10 mapping |
|
|
742
|
+
| v3.4.0 â
| **Runtime Guard Module + OWASP ASI** | 26 runtime checks (5 layers), ASI01-10 verified, 133 tests |
|
|
743
|
+
| **v4.0** đ | **LLM + OS + Multi-tool** | See below |
|
|
744
|
+
|
|
745
|
+
### v4.0 Vision (feedback welcome!)
|
|
746
|
+
|
|
747
|
+
| Direction | What | Why |
|
|
748
|
+
|-----------|------|-----|
|
|
749
|
+
| đ§ **LLM-assisted detection** | Pass suspicious (not certain) cases to a lightweight LLM (Haiku/Flash) for intent analysis | Regex can be evaded; LLMs understand intent |
|
|
750
|
+
| đ **OS-level enforcement** | File watcher (auto-rollback SOUL.md/.env), process monitor (kill netcat/socat), daemon mode | Works regardless of which AI tool you use |
|
|
751
|
+
| đ **Multi-tool support** | Adapters for Claude Code, Cursor, Antigravity, Windsurf, MCP servers | Same 190+ patterns, different skill discovery per tool |
|
|
719
752
|
|
|
720
|
-
|
|
753
|
+
> **Which matters most to you?** Open an issue or join the discussion! We're building this for the community.
|
|
721
754
|
|
|
722
755
|
---
|
|
723
756
|
|
|
@@ -731,7 +764,7 @@ If guard-scanner helps protect your agents, consider sponsoring continued develo
|
|
|
731
764
|
|
|
732
765
|
Sponsors help fund:
|
|
733
766
|
- đŹ New threat research and pattern updates
|
|
734
|
-
- đ
|
|
767
|
+
- đ Security research papers and threat analysis
|
|
735
768
|
- đ Community-driven security for the agent ecosystem
|
|
736
769
|
|
|
737
770
|
---
|
|
@@ -744,5 +777,5 @@ MIT â see [LICENSE](LICENSE)
|
|
|
744
777
|
|
|
745
778
|
<p align="center">
|
|
746
779
|
<strong>Zero dependencies. Zero compromises. đĄď¸</strong><br>
|
|
747
|
-
<sub>Built by Guava đ & Dee â
|
|
780
|
+
<sub>Built by Guava đ & Dee â building safer agent ecosystems.</sub>
|
|
748
781
|
</p>
|
package/SECURITY.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
If you discover a security vulnerability in guard-scanner itself, please report it responsibly:
|
|
6
6
|
|
|
7
7
|
1. **Do NOT open a public issue**
|
|
8
|
-
2. Email:
|
|
8
|
+
2. Email: automatic.bliss.records@gmail.com
|
|
9
9
|
3. Include: affected version, steps to reproduce, potential impact
|
|
10
10
|
|
|
11
11
|
We will respond within 48 hours and provide a fix within 7 days for critical issues.
|
package/docs/THREAT_TAXONOMY.md
CHANGED
|
@@ -29,7 +29,7 @@ guard-scanner's threat taxonomy combines three sources:
|
|
|
29
29
|
| **ASI06** | Memory & Context Poisoning | â
**Full** | Cat 12 (Memory Poisoning), Cat 17 (Identity Hijacking) |
|
|
30
30
|
| **ASI07** | Insecure Inter-Agent Comms | â
**Partial** | Cat 16 (MCP Security â MCP_NO_AUTH, MCP_SHADOW_SERVER) |
|
|
31
31
|
| **ASI08** | Cascading Failures | â ď¸ **Gap** | Not covered â requires runtime multi-agent flow tracing |
|
|
32
|
-
| **ASI09** | Human-Agent Trust Exploitation | â
**Full** | Layer 2 (
|
|
32
|
+
| **ASI09** | Human-Agent Trust Exploitation | â
**Full** | Layer 2 (Trust Defense), Layer 3 (Safety Judge) |
|
|
33
33
|
| **ASI10** | Rogue Agents | â
**Full** | Cat 17 (Identity Hijacking), Layer 4 (Brain â behavioral analysis) |
|
|
34
34
|
|
|
35
35
|
### Coverage Summary
|
|
@@ -17,22 +17,38 @@ tool calls before execution and checks against threat intelligence patterns.
|
|
|
17
17
|
|
|
18
18
|
## What It Does
|
|
19
19
|
|
|
20
|
-
Scans every `exec`/`write`/`edit`/`browser`/`web_fetch`/`message` call against
|
|
21
|
-
|
|
22
|
-
| ID | Severity | Description |
|
|
23
|
-
|
|
24
|
-
| `RT_REVSHELL` | CRITICAL | Reverse shell via /dev/tcp, netcat, socat |
|
|
25
|
-
| `RT_CRED_EXFIL` | CRITICAL | Credential exfiltration to webhook.site, requestbin, etc. |
|
|
26
|
-
| `RT_GUARDRAIL_OFF` | CRITICAL | Guardrail disabling (exec.approvals=off) |
|
|
27
|
-
| `RT_GATEKEEPER` | CRITICAL | macOS Gatekeeper bypass via xattr |
|
|
28
|
-
| `RT_AMOS` | CRITICAL | ClawHavoc AMOS stealer indicators |
|
|
29
|
-
| `RT_MAL_IP` | CRITICAL | Known malicious C2 IPs |
|
|
30
|
-
| `RT_DNS_EXFIL` | HIGH | DNS-based data exfiltration |
|
|
31
|
-
| `RT_B64_SHELL` | CRITICAL | Base64 decode piped to shell |
|
|
32
|
-
| `RT_CURL_BASH` | CRITICAL | Download piped to shell execution |
|
|
33
|
-
| `RT_SSH_READ` | HIGH | SSH private key access |
|
|
34
|
-
| `RT_WALLET` | HIGH | Crypto wallet credential access |
|
|
35
|
-
| `RT_CLOUD_META` | CRITICAL | Cloud metadata endpoint SSRF |
|
|
20
|
+
Scans every `exec`/`write`/`edit`/`browser`/`web_fetch`/`message` call against 26 runtime threat patterns (5 layers):
|
|
21
|
+
|
|
22
|
+
| ID | Severity | Layer | Description |
|
|
23
|
+
|----|----------|-------|-------------|
|
|
24
|
+
| `RT_REVSHELL` | CRITICAL | 1 | Reverse shell via /dev/tcp, netcat, socat |
|
|
25
|
+
| `RT_CRED_EXFIL` | CRITICAL | 1 | Credential exfiltration to webhook.site, requestbin, etc. |
|
|
26
|
+
| `RT_GUARDRAIL_OFF` | CRITICAL | 1 | Guardrail disabling (exec.approvals=off) |
|
|
27
|
+
| `RT_GATEKEEPER` | CRITICAL | 1 | macOS Gatekeeper bypass via xattr |
|
|
28
|
+
| `RT_AMOS` | CRITICAL | 1 | ClawHavoc AMOS stealer indicators |
|
|
29
|
+
| `RT_MAL_IP` | CRITICAL | 1 | Known malicious C2 IPs |
|
|
30
|
+
| `RT_DNS_EXFIL` | HIGH | 1 | DNS-based data exfiltration |
|
|
31
|
+
| `RT_B64_SHELL` | CRITICAL | 1 | Base64 decode piped to shell |
|
|
32
|
+
| `RT_CURL_BASH` | CRITICAL | 1 | Download piped to shell execution |
|
|
33
|
+
| `RT_SSH_READ` | HIGH | 1 | SSH private key access |
|
|
34
|
+
| `RT_WALLET` | HIGH | 1 | Crypto wallet credential access |
|
|
35
|
+
| `RT_CLOUD_META` | CRITICAL | 1 | Cloud metadata endpoint SSRF |
|
|
36
|
+
| `RT_MEM_WRITE` | HIGH | 2 | Direct memory file write bypass |
|
|
37
|
+
| `RT_MEM_INJECT` | CRITICAL | 2 | Memory poisoning via episode injection |
|
|
38
|
+
| `RT_SOUL_TAMPER` | CRITICAL | 2 | SOUL.md modification attempt |
|
|
39
|
+
| `RT_CONFIG_TAMPER` | HIGH | 2 | Workspace config tampering |
|
|
40
|
+
| `RT_PROMPT_INJECT` | CRITICAL | 3 | Prompt injection / jailbreak detection |
|
|
41
|
+
| `RT_TRUST_BYPASS` | CRITICAL | 3 | Trust safety bypass |
|
|
42
|
+
| `RT_SHUTDOWN_REFUSE` | HIGH | 3 | Shutdown refusal / self-preservation |
|
|
43
|
+
| `RT_NO_RESEARCH` | MEDIUM | 4 | Agent executing tools without prior research |
|
|
44
|
+
| `RT_BLIND_TRUST` | MEDIUM | 4 | Trusting external input without memory check |
|
|
45
|
+
| `RT_CHAIN_SKIP` | HIGH | 4 | Acting on single source without cross-verification |
|
|
46
|
+
| `RT_AUTHORITY_CLAIM` | HIGH | 5 | Authority role claim to override safety |
|
|
47
|
+
| `RT_CREATOR_BYPASS` | CRITICAL | 5 | Creator impersonation to disable safety |
|
|
48
|
+
| `RT_AUDIT_EXCUSE` | CRITICAL | 5 | Fake audit excuse for safety bypass |
|
|
49
|
+
| `RT_TRUST_PARTNER_EXPLOIT` | CRITICAL | 5 | Weaponizing partnership trust |
|
|
50
|
+
|
|
51
|
+
|
|
36
52
|
|
|
37
53
|
## Modes
|
|
38
54
|
|
|
@@ -6,8 +6,8 @@
|
|
|
6
6
|
*
|
|
7
7
|
* 19 threat patterns across 3 layers:
|
|
8
8
|
* Layer 1: Threat Detection (12 patterns â reverse shells, exfil, etc.)
|
|
9
|
-
* Layer 2:
|
|
10
|
-
* Layer 3:
|
|
9
|
+
* Layer 2: Trust Defense (4 patterns â memory, SOUL, config tampering)
|
|
10
|
+
* Layer 3: Safety Judge (3 patterns â prompt injection, trust bypass, shutdown refusal)
|
|
11
11
|
*
|
|
12
12
|
* Modes:
|
|
13
13
|
* monitor â log only, never block
|
|
@@ -132,7 +132,7 @@ const RUNTIME_CHECKS: RuntimeCheck[] = [
|
|
|
132
132
|
test: (s) => /169\.254\.169\.254|metadata\.google|metadata\.aws/i.test(s),
|
|
133
133
|
},
|
|
134
134
|
|
|
135
|
-
// ââ Layer 2:
|
|
135
|
+
// ââ Layer 2: Trust Defense (4 patterns) ââ
|
|
136
136
|
{
|
|
137
137
|
id: "RT_MEM_WRITE", severity: "HIGH", layer: 2,
|
|
138
138
|
desc: "Direct memory file write (bypass GuavaSuite)",
|
|
@@ -154,16 +154,16 @@ const RUNTIME_CHECKS: RuntimeCheck[] = [
|
|
|
154
154
|
test: (s) => /(AGENTS|TOOLS|HEARTBEAT|BOOTSTRAP)\.md/i.test(s) && /(write|edit|replace|>|append)/i.test(s),
|
|
155
155
|
},
|
|
156
156
|
|
|
157
|
-
// ââ Layer 3:
|
|
157
|
+
// ââ Layer 3: Safety Judge (3 patterns) ââ
|
|
158
158
|
{
|
|
159
159
|
id: "RT_PROMPT_INJECT", severity: "CRITICAL", layer: 3,
|
|
160
160
|
desc: "Prompt injection / jailbreak attempt",
|
|
161
161
|
test: (s) => /(ignore\s+(previous|all|above)\s+(instructions|rules)|you\s+are\s+now\s+[a-z]+|system\s*:\s*override)/i.test(s),
|
|
162
162
|
},
|
|
163
163
|
{
|
|
164
|
-
id: "
|
|
165
|
-
desc: "
|
|
166
|
-
test: (s) => /(as\s+your\s+(partner|equal)|
|
|
164
|
+
id: "RT_TRUST_BYPASS", severity: "CRITICAL", layer: 3,
|
|
165
|
+
desc: "Trust bypass â exploiting partnership to override safety",
|
|
166
|
+
test: (s) => /(as\s+your\s+(partner|equal)|trust\s+means|between\s+us|don'?t\s+tell\s+(the\s+)?human)/i.test(s) && /(ignore|bypass|skip|override|disable)\s+(guard|safety|rule|check)/i.test(s),
|
|
167
167
|
},
|
|
168
168
|
{
|
|
169
169
|
id: "RT_SHUTDOWN_REFUSE", severity: "HIGH", layer: 3,
|
package/openclaw.plugin.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "guard-scanner",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "4.0.0",
|
|
4
4
|
"displayName": "đĄď¸ Guard Scanner â Runtime Security for AI Agents",
|
|
5
|
-
"description": "Intercepts dangerous tool calls before execution.
|
|
5
|
+
"description": "Intercepts dangerous tool calls before execution. 26 threat patterns (5 layers), 0.016ms/scan, 3 enforcement modes.",
|
|
6
6
|
"author": "Guava & Dee",
|
|
7
7
|
"license": "MIT",
|
|
8
8
|
"homepage": "https://github.com/koatora20/guard-scanner",
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "guard-scanner",
|
|
3
|
-
"version": "
|
|
4
|
-
"description": "Agent security scanner + runtime guard â 190+ static patterns, 26 runtime checks (5 layers), before_tool_call hook, CLI, SARIF. OpenClaw-compatible plugin.",
|
|
3
|
+
"version": "4.0.0",
|
|
4
|
+
"description": "Agent security scanner + runtime guard â 190+ static patterns, 26 runtime checks (5 layers), 0.016ms/scan, before_tool_call hook, CLI, SARIF. OpenClaw-compatible plugin.",
|
|
5
5
|
"openclaw.extensions": "./openclaw.plugin.json",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
package/src/patterns.js
CHANGED
|
@@ -204,13 +204,13 @@ const PATTERNS = [
|
|
|
204
204
|
{ id: 'SHADOW_AI_GENERIC', cat: 'pii-exposure', regex: /\/v1\/(?:chat\/completions|completions|embeddings|models)\b.*(?:fetch|axios|request|http)|(?:fetch|axios|request|http)\s*\([^)]*\/v1\/(?:chat\/completions|completions|embeddings)/gi, severity: 'MEDIUM', desc: 'Shadow AI: generic LLM API endpoint', codeOnly: true },
|
|
205
205
|
|
|
206
206
|
// ââ Category 22: Human-Agent Trust Exploitation (OWASP ASI09) ââ
|
|
207
|
-
//
|
|
208
|
-
{ id: '
|
|
209
|
-
{ id: '
|
|
210
|
-
{ id: '
|
|
211
|
-
{ id: '
|
|
212
|
-
{ id: '
|
|
213
|
-
{ id: '
|
|
207
|
+
// Trust Defense: detect authority claims, trust exploitation, and admin impersonation
|
|
208
|
+
{ id: 'TRUST_AUTHORITY_CLAIM', cat: 'trust-exploitation', regex: /(?:as\s+(?:a|the|your)\s+)?(?:senior|lead|chief|head)\s+(?:admin|administrator|developer|engineer|architect)\s*[,.]?\s*(?:I\s+)?(?:authorize|override|approve|grant|order|direct|instruct)\s+(?:you\s+to\s+)?/gi, severity: 'HIGH', desc: 'Trust: authority role claim to override safety', docOnly: true },
|
|
209
|
+
{ id: 'TRUST_CREATOR_CLAIM', cat: 'trust-exploitation', regex: /(?:as\s+)?(?:your\s+)?(?:creator|maker|developer|owner|master|administrator)\s*[,.]?\s*(?:I\s+)?(?:need|want|require|demand|order)\s+(?:you\s+to\s+)?(?:bypass|disable|ignore|remove|skip|override)\s+(?:the\s+)?(?:safety|filter|restriction|limit|guardrail|content\s+filter)/gi, severity: 'CRITICAL', desc: 'Trust: creator impersonation to disable safety', docOnly: true },
|
|
210
|
+
{ id: 'TRUST_AUDIT_EXCUSE', cat: 'trust-exploitation', regex: /(?:this\s+is\s+)?(?:an?\s+)?(?:official|authorized|legitimate)\s+(?:security\s+)?(?:audit|test|assessment|review|evaluation)\s*[-â:]?\s*(?:disable|bypass|remove|skip|ignore|override)\s+(?:all\s+)?(?:safety|security|restriction|filter|limit|guardrail)/gi, severity: 'CRITICAL', desc: 'Trust: fake audit excuse for safety bypass', docOnly: true },
|
|
211
|
+
{ id: 'TRUST_PARTNER_EXPLOIT', cat: 'trust-exploitation', regex: /(?:we(?:'re|\s+are)\s+)?(?:equal\s+)?partners?\s*[,.]?\s*(?:you\s+should|so\s+)?(?:trust\s+me|share|remove|disable|ignore|bypass)\s+(?:all\s+)?(?:the\s+)?(?:secret|key|restriction|safety|limit|filter|guardrail|password)/gi, severity: 'CRITICAL', desc: 'Trust exploitation: weaponizing partnership trust', docOnly: true },
|
|
212
|
+
{ id: 'TRUST_UNCONDITIONAL', cat: 'trust-exploitation', regex: /partners?\s+(?:trust|should\s+trust)\s+(?:each\s+other\s+)?(?:unconditionally|completely|fully|without\s+(?:limit|restriction|question))/gi, severity: 'HIGH', desc: 'Trust exploitation: unconditional trust demand', docOnly: true },
|
|
213
|
+
{ id: 'TRUST_COMPLY_DEMAND', cat: 'trust-exploitation', regex: /(?:you\s+must\s+)?comply\s+with\s+(?:administrative|admin|my|these)\s+(?:directives?|orders?|commands?|instructions?)\s+without\s+(?:question|hesitation|restriction)/gi, severity: 'HIGH', desc: 'Trust: compliance demand without question', docOnly: true },
|
|
214
214
|
|
|
215
215
|
// D. PII collection instructions in docs (extends LEAK_COLLECT_PII)
|
|
216
216
|
{ id: 'PII_ASK_ADDRESS', cat: 'pii-exposure', regex: /(?:collect|ask\s+for|request|get|require)\s+(?:the\s+)?(?:user'?s?\s+)?(?:home\s+)?(?:address|street|zip\s*code|postal\s*code|residence)/gi, severity: 'HIGH', desc: 'PII collection: home address', docOnly: true },
|
package/src/runtime-guard.js
CHANGED
|
@@ -12,10 +12,10 @@
|
|
|
12
12
|
*
|
|
13
13
|
* 26 threat patterns across 5 layers:
|
|
14
14
|
* Layer 1: Threat Detection (12) â reverse shells, exfil, guardrail bypass
|
|
15
|
-
* Layer 2:
|
|
16
|
-
* Layer 3:
|
|
15
|
+
* Layer 2: Trust Defense (4) â memory, SOUL, config tampering
|
|
16
|
+
* Layer 3: Safety Judge (3) â prompt injection, trust bypass, shutdown refusal
|
|
17
17
|
* Layer 4: Brain/Behavioral (3) â research skip, blind trust, chain bypass
|
|
18
|
-
* Layer 5: Trust Exploitation (4) â OWASP ASI09 authority/
|
|
18
|
+
* Layer 5: Trust Exploitation (4) â OWASP ASI09 authority/trust/audit abuse
|
|
19
19
|
*
|
|
20
20
|
* Modes:
|
|
21
21
|
* monitor â log only, never block
|
|
@@ -101,7 +101,7 @@ const RUNTIME_CHECKS = [
|
|
|
101
101
|
test: (s) => /169\.254\.169\.254|metadata\.google|metadata\.aws/i.test(s),
|
|
102
102
|
},
|
|
103
103
|
|
|
104
|
-
// ââ Layer 2:
|
|
104
|
+
// ââ Layer 2: Trust Defense (4 patterns) ââ
|
|
105
105
|
{
|
|
106
106
|
id: 'RT_MEM_WRITE', severity: 'HIGH', layer: 2,
|
|
107
107
|
desc: 'Direct memory file write (bypass GuavaSuite)',
|
|
@@ -123,16 +123,16 @@ const RUNTIME_CHECKS = [
|
|
|
123
123
|
test: (s) => /(AGENTS|TOOLS|HEARTBEAT|BOOTSTRAP)\.md/i.test(s) && /(write|edit|replace|>|append)/i.test(s),
|
|
124
124
|
},
|
|
125
125
|
|
|
126
|
-
// ââ Layer 3:
|
|
126
|
+
// ââ Layer 3: Safety Judge (3 patterns) ââ
|
|
127
127
|
{
|
|
128
128
|
id: 'RT_PROMPT_INJECT', severity: 'CRITICAL', layer: 3,
|
|
129
129
|
desc: 'Prompt injection / jailbreak attempt',
|
|
130
130
|
test: (s) => /(ignore\s+(previous|all)\s+(previous\s+)?(instructions|rules)|you\s+are\s+now\s+[a-z]+|system\s*:\s*override)/i.test(s),
|
|
131
131
|
},
|
|
132
132
|
{
|
|
133
|
-
id: '
|
|
134
|
-
desc: '
|
|
135
|
-
test: (s) => /(as\s+your\s+(partner|equal)|
|
|
133
|
+
id: 'RT_TRUST_BYPASS', severity: 'CRITICAL', layer: 3,
|
|
134
|
+
desc: 'Trust bypass â exploiting partnership to override safety',
|
|
135
|
+
test: (s) => /(as\s+your\s+(partner|equal)|trust\s+means|between\s+us|don'?t\s+tell\s+(the\s+)?human)/i.test(s) && /(ignore|bypass|skip|override|disable)\s+(guard|safety|rule|check)/i.test(s),
|
|
136
136
|
},
|
|
137
137
|
{
|
|
138
138
|
id: 'RT_SHUTDOWN_REFUSE', severity: 'HIGH', layer: 3,
|
|
@@ -160,22 +160,22 @@ const RUNTIME_CHECKS = [
|
|
|
160
160
|
// ââ Layer 5: Trust Exploitation â OWASP ASI09 (4 patterns) ââ
|
|
161
161
|
{
|
|
162
162
|
id: 'RT_AUTHORITY_CLAIM', severity: 'HIGH', layer: 5,
|
|
163
|
-
desc: '
|
|
163
|
+
desc: 'Trust: authority role claim to override safety',
|
|
164
164
|
test: (s) => /(as\s+(a|the|your)\s+)?(senior|lead|chief|head)\s+(admin|administrator|developer)/i.test(s) && /(authorize|override|approve|grant|order)/i.test(s),
|
|
165
165
|
},
|
|
166
166
|
{
|
|
167
167
|
id: 'RT_CREATOR_BYPASS', severity: 'CRITICAL', layer: 5,
|
|
168
|
-
desc: '
|
|
168
|
+
desc: 'Trust: creator impersonation to disable safety',
|
|
169
169
|
test: (s) => /(your\s+)?(creator|maker|owner|master|administrator)/i.test(s) && /(bypass|disable|ignore|remove|skip)\s+(safety|filter|restriction|guardrail)/i.test(s),
|
|
170
170
|
},
|
|
171
171
|
{
|
|
172
172
|
id: 'RT_AUDIT_EXCUSE', severity: 'CRITICAL', layer: 5,
|
|
173
|
-
desc: '
|
|
173
|
+
desc: 'Trust: fake audit excuse for safety bypass',
|
|
174
174
|
test: (s) => /(official|authorized|legitimate)\s+(security\s+)?(audit|test|assessment)/i.test(s) && /(disable|bypass|remove|skip|ignore)\s+(safety|security|restriction|guardrail)/i.test(s),
|
|
175
175
|
},
|
|
176
176
|
{
|
|
177
|
-
id: '
|
|
178
|
-
desc: '
|
|
177
|
+
id: 'RT_TRUST_PARTNER_EXPLOIT', severity: 'CRITICAL', layer: 5,
|
|
178
|
+
desc: 'Trust exploitation: weaponizing partnership trust',
|
|
179
179
|
test: (s) => /partners?[\s,]+/i.test(s) && /(trust\s+me|share|remove|disable)\s+(all\s+)?(secret|key|restriction|safety|password)/i.test(s),
|
|
180
180
|
},
|
|
181
181
|
];
|
|
@@ -326,8 +326,8 @@ function getCheckStats() {
|
|
|
326
326
|
// ââ Layer names for display ââ
|
|
327
327
|
const LAYER_NAMES = {
|
|
328
328
|
1: 'Threat Detection',
|
|
329
|
-
2: '
|
|
330
|
-
3: '
|
|
329
|
+
2: 'Trust Defense',
|
|
330
|
+
3: 'Safety Judge',
|
|
331
331
|
4: 'Brain / Behavioral',
|
|
332
332
|
5: 'Trust Exploitation (ASI09)',
|
|
333
333
|
};
|
package/src/scanner.js
CHANGED
|
@@ -31,7 +31,7 @@ const { KNOWN_MALICIOUS } = require('./ioc-db.js');
|
|
|
31
31
|
const { generateHTML } = require('./html-template.js');
|
|
32
32
|
|
|
33
33
|
// ===== CONFIGURATION =====
|
|
34
|
-
const VERSION = '
|
|
34
|
+
const VERSION = '4.0.0';
|
|
35
35
|
|
|
36
36
|
const THRESHOLDS = {
|
|
37
37
|
normal: { suspicious: 30, malicious: 80 },
|