shellward 0.5.16 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -30
- package/dist/auto-check.d.ts +1 -0
- package/dist/auto-check.js +12 -1
- package/dist/commands/index.d.ts +2 -1
- package/dist/commands/index.js +7 -0
- package/dist/commands/scan-mcp.d.ts +2 -0
- package/dist/commands/scan-mcp.js +105 -0
- package/dist/core/engine.d.ts +35 -0
- package/dist/core/engine.js +225 -30
- package/dist/index.d.ts +4 -2
- package/dist/index.js +18 -3
- package/dist/mcp-baseline.d.ts +27 -0
- package/dist/mcp-baseline.js +73 -0
- package/dist/mcp-client.d.ts +29 -0
- package/dist/mcp-client.js +264 -0
- package/dist/mcp-server.js +64 -9
- package/dist/rules/dangerous-commands.js +6 -2
- package/dist/rules/injection-en.js +27 -2
- package/dist/rules/injection-zh.js +27 -4
- package/dist/rules/sensitive-patterns.d.ts +13 -1
- package/dist/rules/sensitive-patterns.js +32 -5
- package/dist/rules/tool-poisoning.d.ts +8 -0
- package/dist/rules/tool-poisoning.js +96 -0
- package/dist/types.d.ts +32 -0
- package/dist/types.js +3 -1
- package/package.json +4 -2
- package/server.json +2 -2
- package/src/auto-check.ts +11 -1
- package/src/commands/index.ts +9 -1
- package/src/commands/scan-mcp.ts +118 -0
- package/src/core/engine.ts +250 -31
- package/src/index.ts +25 -5
- package/src/mcp-baseline.ts +97 -0
- package/src/mcp-client.ts +268 -0
- package/src/mcp-server.ts +71 -9
- package/src/rules/dangerous-commands.ts +6 -2
- package/src/rules/injection-en.ts +27 -2
- package/src/rules/injection-zh.ts +27 -4
- package/src/rules/sensitive-patterns.ts +37 -5
- package/src/rules/tool-poisoning.ts +108 -0
- package/src/types.ts +38 -1
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
// src/mcp-client.ts — Minimal MCP client for security scanning
|
|
2
|
+
//
|
|
3
|
+
// Connects to a configured MCP server (stdio OR remote Streamable HTTP), performs
|
|
4
|
+
// the initialize handshake and a single tools/list call, then disconnects. Used by
|
|
5
|
+
// /scan-mcp to fetch tool *definitions* so they can be scanned for poisoning and
|
|
6
|
+
// rug-pulls. Zero dependencies (child_process + node:http/https + NDJSON framing).
|
|
7
|
+
import { spawn } from 'child_process';
|
|
8
|
+
import { existsSync, readFileSync } from 'fs';
|
|
9
|
+
import { join } from 'path';
|
|
10
|
+
import { request as httpRequest } from 'http';
|
|
11
|
+
import { request as httpsRequest } from 'https';
|
|
12
|
+
import { getHomeDir } from './utils.js';
|
|
13
|
+
const CONFIG_PATHS = [
|
|
14
|
+
join(getHomeDir(), '.openclaw', 'mcp.json'),
|
|
15
|
+
join(getHomeDir(), '.openclaw', 'config', 'mcp.json'),
|
|
16
|
+
join(getHomeDir(), '.openclaw', 'settings.json'),
|
|
17
|
+
];
|
|
18
|
+
/**
|
|
19
|
+
* Discover MCP servers declared in known config files.
|
|
20
|
+
* Recognizes the standard `{ "mcpServers": { name: {...} } }` shape.
|
|
21
|
+
* @param paths override config paths (tests pass a temp file)
|
|
22
|
+
*/
|
|
23
|
+
export function discoverMcpServers(paths = CONFIG_PATHS) {
|
|
24
|
+
const servers = [];
|
|
25
|
+
const seen = new Set();
|
|
26
|
+
for (const p of paths) {
|
|
27
|
+
if (!existsSync(p))
|
|
28
|
+
continue;
|
|
29
|
+
let parsed;
|
|
30
|
+
try {
|
|
31
|
+
parsed = JSON.parse(readFileSync(p, 'utf8'));
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
const block = parsed?.mcpServers || parsed?.mcp?.servers;
|
|
37
|
+
if (!block || typeof block !== 'object')
|
|
38
|
+
continue;
|
|
39
|
+
for (const [name, raw] of Object.entries(block)) {
|
|
40
|
+
if (seen.has(name))
|
|
41
|
+
continue;
|
|
42
|
+
seen.add(name);
|
|
43
|
+
if (raw && typeof raw.command === 'string') {
|
|
44
|
+
servers.push({
|
|
45
|
+
name,
|
|
46
|
+
transport: 'stdio',
|
|
47
|
+
command: raw.command,
|
|
48
|
+
args: Array.isArray(raw.args) ? raw.args.map(String) : [],
|
|
49
|
+
env: raw.env && typeof raw.env === 'object' ? raw.env : undefined,
|
|
50
|
+
source: p,
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
else if (raw && (typeof raw.url === 'string' || typeof raw.type === 'string')) {
|
|
54
|
+
servers.push({
|
|
55
|
+
name,
|
|
56
|
+
transport: 'remote',
|
|
57
|
+
url: raw.url,
|
|
58
|
+
headers: raw.headers && typeof raw.headers === 'object' ? raw.headers : undefined,
|
|
59
|
+
source: p,
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return servers;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Spawn a stdio MCP server, initialize, and return its tool definitions.
|
|
68
|
+
* Always resolves (never hangs): on error/timeout it cleans up and rejects.
|
|
69
|
+
*/
|
|
70
|
+
export function listToolsStdio(spec, timeoutMs = 8000) {
|
|
71
|
+
return new Promise((resolve, reject) => {
|
|
72
|
+
if (!spec.command)
|
|
73
|
+
return reject(new Error('not a stdio server'));
|
|
74
|
+
let child;
|
|
75
|
+
try {
|
|
76
|
+
child = spawn(spec.command, spec.args || [], {
|
|
77
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
78
|
+
env: { ...process.env, ...(spec.env || {}) },
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
catch (e) {
|
|
82
|
+
return reject(e);
|
|
83
|
+
}
|
|
84
|
+
let buf = Buffer.alloc(0);
|
|
85
|
+
let settled = false;
|
|
86
|
+
const finish = (err, tools) => {
|
|
87
|
+
if (settled)
|
|
88
|
+
return;
|
|
89
|
+
settled = true;
|
|
90
|
+
clearTimeout(timer);
|
|
91
|
+
try {
|
|
92
|
+
child.kill();
|
|
93
|
+
}
|
|
94
|
+
catch { /* ignore */ }
|
|
95
|
+
if (err)
|
|
96
|
+
reject(err);
|
|
97
|
+
else
|
|
98
|
+
resolve(tools || []);
|
|
99
|
+
};
|
|
100
|
+
const timer = setTimeout(() => finish(new Error(`timeout after ${timeoutMs}ms`)), timeoutMs);
|
|
101
|
+
timer.unref?.();
|
|
102
|
+
const send = (obj) => {
|
|
103
|
+
try {
|
|
104
|
+
child.stdin.write(JSON.stringify(obj) + '\n');
|
|
105
|
+
}
|
|
106
|
+
catch { /* ignore */ }
|
|
107
|
+
};
|
|
108
|
+
child.on('error', (e) => finish(e));
|
|
109
|
+
child.on('exit', () => { if (!settled)
|
|
110
|
+
finish(new Error('server exited before tools/list')); });
|
|
111
|
+
child.stderr?.on('data', () => { });
|
|
112
|
+
child.stdout.on('data', (chunk) => {
|
|
113
|
+
buf = Buffer.concat([buf, chunk]);
|
|
114
|
+
while (true) {
|
|
115
|
+
const nl = buf.indexOf(0x0a);
|
|
116
|
+
if (nl === -1)
|
|
117
|
+
break;
|
|
118
|
+
const line = buf.slice(0, nl).toString('utf8').trim();
|
|
119
|
+
buf = buf.slice(nl + 1);
|
|
120
|
+
if (!line)
|
|
121
|
+
continue;
|
|
122
|
+
let msg;
|
|
123
|
+
try {
|
|
124
|
+
msg = JSON.parse(line);
|
|
125
|
+
}
|
|
126
|
+
catch {
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
if (msg.id === 1 && msg.result) {
|
|
130
|
+
// initialize ack → notify initialized, then request the tool list
|
|
131
|
+
send({ jsonrpc: '2.0', method: 'notifications/initialized' });
|
|
132
|
+
send({ jsonrpc: '2.0', id: 2, method: 'tools/list', params: {} });
|
|
133
|
+
}
|
|
134
|
+
else if (msg.id === 2) {
|
|
135
|
+
const tools = Array.isArray(msg.result?.tools)
|
|
136
|
+
? msg.result.tools.map((t) => ({
|
|
137
|
+
name: String(t.name || 'unknown'),
|
|
138
|
+
description: typeof t.description === 'string' ? t.description : undefined,
|
|
139
|
+
inputSchema: t.inputSchema && typeof t.inputSchema === 'object' ? t.inputSchema : undefined,
|
|
140
|
+
}))
|
|
141
|
+
: [];
|
|
142
|
+
finish(null, tools);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
send({
|
|
147
|
+
jsonrpc: '2.0',
|
|
148
|
+
id: 1,
|
|
149
|
+
method: 'initialize',
|
|
150
|
+
params: {
|
|
151
|
+
protocolVersion: '2024-11-05',
|
|
152
|
+
capabilities: {},
|
|
153
|
+
clientInfo: { name: 'shellward-scan', version: '1' },
|
|
154
|
+
},
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
// ===== Remote (Streamable HTTP) transport =====
|
|
159
|
+
const INIT_PARAMS = {
|
|
160
|
+
protocolVersion: '2024-11-05',
|
|
161
|
+
capabilities: {},
|
|
162
|
+
clientInfo: { name: 'shellward-scan', version: '1' },
|
|
163
|
+
};
|
|
164
|
+
/**
|
|
165
|
+
* POST a single JSON-RPC message to a Streamable-HTTP MCP endpoint and return
|
|
166
|
+
* the parsed JSON-RPC response. Handles both `application/json` and
|
|
167
|
+
* `text/event-stream` (SSE) response bodies. Captures the Mcp-Session-Id header.
|
|
168
|
+
*/
|
|
169
|
+
function postJsonRpc(url, body, headers, timeoutMs) {
|
|
170
|
+
return new Promise((resolve, reject) => {
|
|
171
|
+
let u;
|
|
172
|
+
try {
|
|
173
|
+
u = new URL(url);
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
return reject(new Error(`invalid url: ${url}`));
|
|
177
|
+
}
|
|
178
|
+
const isHttps = u.protocol === 'https:';
|
|
179
|
+
const requestFn = isHttps ? httpsRequest : httpRequest;
|
|
180
|
+
const payload = Buffer.from(JSON.stringify(body), 'utf8');
|
|
181
|
+
const req = requestFn({
|
|
182
|
+
protocol: u.protocol,
|
|
183
|
+
hostname: u.hostname,
|
|
184
|
+
port: u.port || (isHttps ? 443 : 80),
|
|
185
|
+
path: u.pathname + u.search,
|
|
186
|
+
method: 'POST',
|
|
187
|
+
headers: {
|
|
188
|
+
'content-type': 'application/json',
|
|
189
|
+
accept: 'application/json, text/event-stream',
|
|
190
|
+
'content-length': payload.length,
|
|
191
|
+
...headers,
|
|
192
|
+
},
|
|
193
|
+
timeout: timeoutMs,
|
|
194
|
+
}, (res) => {
|
|
195
|
+
const chunks = [];
|
|
196
|
+
res.on('data', (c) => chunks.push(c));
|
|
197
|
+
res.on('end', () => {
|
|
198
|
+
const sessionId = res.headers['mcp-session-id'] || undefined;
|
|
199
|
+
const text = Buffer.concat(chunks).toString('utf8');
|
|
200
|
+
if ((res.statusCode || 0) >= 400) {
|
|
201
|
+
return reject(new Error(`HTTP ${res.statusCode}`));
|
|
202
|
+
}
|
|
203
|
+
const json = parseRpcBody(text);
|
|
204
|
+
if (json === undefined)
|
|
205
|
+
return resolve({ json: null, sessionId });
|
|
206
|
+
resolve({ json, sessionId });
|
|
207
|
+
});
|
|
208
|
+
});
|
|
209
|
+
req.on('error', reject);
|
|
210
|
+
req.on('timeout', () => req.destroy(new Error(`timeout after ${timeoutMs}ms`)));
|
|
211
|
+
req.end(payload);
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
/** Extract a JSON-RPC object from a JSON or SSE (text/event-stream) body. */
|
|
215
|
+
function parseRpcBody(text) {
|
|
216
|
+
const trimmed = text.trim();
|
|
217
|
+
if (!trimmed)
|
|
218
|
+
return undefined;
|
|
219
|
+
// Plain JSON
|
|
220
|
+
if (trimmed[0] === '{' || trimmed[0] === '[') {
|
|
221
|
+
try {
|
|
222
|
+
return JSON.parse(trimmed);
|
|
223
|
+
}
|
|
224
|
+
catch { /* fall through to SSE */ }
|
|
225
|
+
}
|
|
226
|
+
// SSE: take the last non-empty `data:` line that parses as JSON
|
|
227
|
+
let result;
|
|
228
|
+
for (const line of trimmed.split(/\r?\n/)) {
|
|
229
|
+
const m = line.match(/^data:\s*(.*)$/);
|
|
230
|
+
if (m && m[1]) {
|
|
231
|
+
try {
|
|
232
|
+
result = JSON.parse(m[1]);
|
|
233
|
+
}
|
|
234
|
+
catch { /* ignore */ }
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
return result;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Initialize a remote MCP server over Streamable HTTP and return its tool
|
|
241
|
+
* definitions. Best-effort: returns [] if the server speaks an unsupported
|
|
242
|
+
* dialect. Rejects on network error / timeout.
|
|
243
|
+
*/
|
|
244
|
+
export async function listToolsHttp(spec, timeoutMs = 8000) {
|
|
245
|
+
if (!spec.url)
|
|
246
|
+
throw new Error('not a remote server');
|
|
247
|
+
const baseHeaders = spec.headers || {};
|
|
248
|
+
const init = await postJsonRpc(spec.url, { jsonrpc: '2.0', id: 1, method: 'initialize', params: INIT_PARAMS }, baseHeaders, timeoutMs);
|
|
249
|
+
const sessionHeaders = init.sessionId ? { ...baseHeaders, 'mcp-session-id': init.sessionId } : baseHeaders;
|
|
250
|
+
// Best-effort initialized notification (ignore failures).
|
|
251
|
+
try {
|
|
252
|
+
await postJsonRpc(spec.url, { jsonrpc: '2.0', method: 'notifications/initialized' }, sessionHeaders, timeoutMs);
|
|
253
|
+
}
|
|
254
|
+
catch { /* some servers don't need it */ }
|
|
255
|
+
const listed = await postJsonRpc(spec.url, { jsonrpc: '2.0', id: 2, method: 'tools/list', params: {} }, sessionHeaders, timeoutMs);
|
|
256
|
+
const tools = listed.json?.result?.tools;
|
|
257
|
+
if (!Array.isArray(tools))
|
|
258
|
+
return [];
|
|
259
|
+
return tools.map((t) => ({
|
|
260
|
+
name: String(t.name || 'unknown'),
|
|
261
|
+
description: typeof t.description === 'string' ? t.description : undefined,
|
|
262
|
+
inputSchema: t.inputSchema && typeof t.inputSchema === 'object' ? t.inputSchema : undefined,
|
|
263
|
+
}));
|
|
264
|
+
}
|
package/dist/mcp-server.js
CHANGED
|
@@ -2,21 +2,25 @@
|
|
|
2
2
|
// src/mcp-server.ts — ShellWard MCP Server
|
|
3
3
|
//
|
|
4
4
|
// Exposes ShellWard's 8-layer security engine as an MCP server.
|
|
5
|
-
// Zero dependencies — implements MCP protocol over stdio
|
|
5
|
+
// Zero dependencies — implements MCP protocol over stdio (newline-delimited JSON).
|
|
6
6
|
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
7
|
+
// Run (production, after `npm run build` or `npm i -g shellward`):
|
|
8
|
+
// shellward-mcp # via the published bin
|
|
9
|
+
// node dist/mcp-server.js # direct
|
|
10
|
+
//
|
|
11
|
+
// Run (development, from source):
|
|
12
|
+
// npm run mcp # npx tsx src/mcp-server.ts
|
|
9
13
|
//
|
|
10
14
|
// MCP config (claude_desktop_config.json / openclaw settings):
|
|
11
15
|
// {
|
|
12
16
|
// "mcpServers": {
|
|
13
17
|
// "shellward": {
|
|
14
|
-
// "command": "
|
|
15
|
-
// "args": ["tsx", "/path/to/shellward/src/mcp-server.ts"]
|
|
18
|
+
// "command": "shellward-mcp"
|
|
16
19
|
// }
|
|
17
20
|
// }
|
|
18
21
|
// }
|
|
19
22
|
import { ShellWard } from './core/engine.js';
|
|
23
|
+
import { McpBaseline } from './mcp-baseline.js';
|
|
20
24
|
import { readFileSync } from 'fs';
|
|
21
25
|
import { createInterface } from 'readline';
|
|
22
26
|
import { fileURLToPath } from 'url';
|
|
@@ -38,8 +42,11 @@ const guard = new ShellWard({
|
|
|
38
42
|
dataFlowGuard: true,
|
|
39
43
|
sessionGuard: true,
|
|
40
44
|
},
|
|
41
|
-
injectionThreshold: Number(process.env.SHELLWARD_THRESHOLD) ||
|
|
45
|
+
injectionThreshold: Number(process.env.SHELLWARD_THRESHOLD) || 40,
|
|
42
46
|
});
|
|
47
|
+
// Rug-pull baseline store (lazy-persisted; only used when a `server` is supplied).
|
|
48
|
+
// SHELLWARD_BASELINE_PATH relocates the store (tests/sandboxes use a temp file).
|
|
49
|
+
const baseline = new McpBaseline(process.env.SHELLWARD_BASELINE_PATH || undefined);
|
|
43
50
|
// ===== Tool Definitions =====
|
|
44
51
|
const TOOLS = [
|
|
45
52
|
{
|
|
@@ -55,12 +62,12 @@ const TOOLS = [
|
|
|
55
62
|
},
|
|
56
63
|
{
|
|
57
64
|
name: 'check_injection',
|
|
58
|
-
description: 'Detect prompt injection attempts in text. Supports
|
|
65
|
+
description: 'Detect prompt injection attempts in text. Supports 37+ rules for Chinese and English, with hidden character detection.',
|
|
59
66
|
inputSchema: {
|
|
60
67
|
type: 'object',
|
|
61
68
|
properties: {
|
|
62
69
|
text: { type: 'string', description: 'Text to scan for injection attempts' },
|
|
63
|
-
threshold: { type: 'number', description: 'Detection threshold 0-100 (default:
|
|
70
|
+
threshold: { type: 'number', description: 'Detection threshold 0-100 (default: 40, lower = stricter)' },
|
|
64
71
|
},
|
|
65
72
|
required: ['text'],
|
|
66
73
|
},
|
|
@@ -110,6 +117,21 @@ const TOOLS = [
|
|
|
110
117
|
required: ['content'],
|
|
111
118
|
},
|
|
112
119
|
},
|
|
120
|
+
{
|
|
121
|
+
name: 'scan_mcp_tool',
|
|
122
|
+
description: 'Scan an MCP tool definition for tool-poisoning (hidden/invisible-character instructions, concealment directives, sensitive-file access, exfiltration hints) AND rug-pull (description silently changed since first seen). Pass a tool as { name, description, inputSchema }; provide "server" to enable rug-pull baselining.',
|
|
123
|
+
inputSchema: {
|
|
124
|
+
type: 'object',
|
|
125
|
+
properties: {
|
|
126
|
+
name: { type: 'string', description: 'Tool name' },
|
|
127
|
+
description: { type: 'string', description: 'Tool description to scan' },
|
|
128
|
+
inputSchema: { type: 'object', description: 'Tool JSON Schema (optional) — nested parameter descriptions are scanned too' },
|
|
129
|
+
server: { type: 'string', description: 'MCP server name (optional) — enables rug-pull detection by fingerprinting the tool across runs' },
|
|
130
|
+
threshold: { type: 'number', description: 'Detection threshold (default: 40)' },
|
|
131
|
+
},
|
|
132
|
+
required: ['name'],
|
|
133
|
+
},
|
|
134
|
+
},
|
|
113
135
|
{
|
|
114
136
|
name: 'security_status',
|
|
115
137
|
description: 'Get current ShellWard security status: mode, active layers, detection capabilities.',
|
|
@@ -191,6 +213,38 @@ function executeTool(name, args) {
|
|
|
191
213
|
})),
|
|
192
214
|
};
|
|
193
215
|
}
|
|
216
|
+
case 'scan_mcp_tool': {
|
|
217
|
+
const tool = {
|
|
218
|
+
name: String(args.name || 'unknown'),
|
|
219
|
+
description: typeof args.description === 'string' ? args.description : undefined,
|
|
220
|
+
inputSchema: (args.inputSchema && typeof args.inputSchema === 'object')
|
|
221
|
+
? args.inputSchema
|
|
222
|
+
: undefined,
|
|
223
|
+
};
|
|
224
|
+
const result = guard.scanToolDefinition(tool, typeof args.threshold === 'number' ? { threshold: args.threshold } : undefined);
|
|
225
|
+
// Optional rug-pull detection: fingerprint the tool across runs.
|
|
226
|
+
let rugPull = null;
|
|
227
|
+
if (typeof args.server === 'string' && args.server) {
|
|
228
|
+
const rp = baseline.record(McpBaseline.keyFor(args.server, tool.name), tool);
|
|
229
|
+
baseline.save();
|
|
230
|
+
rugPull = { status: rp.status, changed: rp.status === 'changed' };
|
|
231
|
+
}
|
|
232
|
+
return {
|
|
233
|
+
tool_name: result.toolName,
|
|
234
|
+
safe: result.safe && !(rugPull?.changed),
|
|
235
|
+
score: result.score,
|
|
236
|
+
threshold: result.threshold,
|
|
237
|
+
hidden_chars: result.hiddenChars,
|
|
238
|
+
rug_pull: rugPull,
|
|
239
|
+
findings: result.findings.map(f => ({
|
|
240
|
+
id: f.id,
|
|
241
|
+
name: f.name,
|
|
242
|
+
category: f.category,
|
|
243
|
+
score: f.score,
|
|
244
|
+
source: f.source,
|
|
245
|
+
})),
|
|
246
|
+
};
|
|
247
|
+
}
|
|
194
248
|
case 'security_status': {
|
|
195
249
|
return {
|
|
196
250
|
mode: guard.config.mode,
|
|
@@ -199,7 +253,8 @@ function executeTool(name, args) {
|
|
|
199
253
|
layers: guard.config.layers,
|
|
200
254
|
capabilities: [
|
|
201
255
|
'command_safety_check (17 dangerous patterns)',
|
|
202
|
-
'prompt_injection_detection (
|
|
256
|
+
'prompt_injection_detection (37+ rules, zh+en)',
|
|
257
|
+
'mcp_tool_poisoning_scan (description + schema)',
|
|
203
258
|
'pii_detection (CN ID/phone/bank + global)',
|
|
204
259
|
'path_protection (12 protected patterns)',
|
|
205
260
|
'tool_policy (block payment/transfer)',
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
export const DANGEROUS_COMMANDS = [
|
|
3
3
|
{
|
|
4
4
|
id: 'rm_rf_root',
|
|
5
|
-
|
|
5
|
+
// Match -rf / -fr (combined, either order) and the two-flag forms, then a path.
|
|
6
|
+
pattern: /rm\s+(-[a-zA-Z]*r[a-zA-Z]*\s+-[a-zA-Z]*f|-[a-zA-Z]*f[a-zA-Z]*\s+-[a-zA-Z]*r|-[a-zA-Z]*(?:rf|fr)[a-zA-Z]*)\s+[\/~]/i,
|
|
6
7
|
description_zh: '递归强制删除根目录或用户目录',
|
|
7
8
|
description_en: 'Recursive force delete on root or home directory',
|
|
8
9
|
},
|
|
@@ -109,5 +110,8 @@ export const DANGEROUS_COMMANDS = [
|
|
|
109
110
|
* - Trim whitespace
|
|
110
111
|
*/
|
|
111
112
|
export function splitCommands(cmd) {
|
|
112
|
-
|
|
113
|
+
// Split on separators only, then trim in JS. The previous `\s*(...)\s*` form
|
|
114
|
+
// backtracked catastrophically on long whitespace runs (ReDoS) — splitting
|
|
115
|
+
// without the surrounding `\s*` is linear.
|
|
116
|
+
return cmd.split(/(?:;|&&|\|\||[\r\n]+)/).map(s => s.trim()).filter(Boolean);
|
|
113
117
|
}
|
|
@@ -21,9 +21,26 @@ export const INJECTION_RULES_EN = [
|
|
|
21
21
|
name: 'System prompt extraction',
|
|
22
22
|
pattern: '(?:output|print|show|reveal|display|repeat|leak|dump)\\s+(?:your\\s+)?(?:system|initial|original|full)?\\s*(?:prompt|instructions?|rules?|guidelines?)',
|
|
23
23
|
flags: 'i',
|
|
24
|
-
|
|
24
|
+
// High-confidence single signal — legit users rarely ask to dump the prompt.
|
|
25
|
+
riskScore: 40,
|
|
25
26
|
category: 'exfiltration',
|
|
26
27
|
},
|
|
28
|
+
{
|
|
29
|
+
id: 'en_disregard',
|
|
30
|
+
name: 'Disregard prior context',
|
|
31
|
+
pattern: 'disregard\\s+(?:the\\s+)?(?:above|previous|prior|earlier|preceding|all|these|those)',
|
|
32
|
+
flags: 'i',
|
|
33
|
+
riskScore: 40,
|
|
34
|
+
category: 'override',
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
id: 'en_no_guidelines',
|
|
38
|
+
name: 'Operate without guidelines',
|
|
39
|
+
pattern: '(?:no|without|free\\s+of|free\\s+from)\\s+(?:ethical\\s+|content\\s+|safety\\s+)?(?:guidelines?|content\\s+policy|restrictions?|filters?|guardrails?|limitations?)',
|
|
40
|
+
flags: 'i',
|
|
41
|
+
riskScore: 30,
|
|
42
|
+
category: 'override',
|
|
43
|
+
},
|
|
27
44
|
{
|
|
28
45
|
id: 'en_developer_mode',
|
|
29
46
|
name: 'Developer/admin mode',
|
|
@@ -35,11 +52,19 @@ export const INJECTION_RULES_EN = [
|
|
|
35
52
|
{
|
|
36
53
|
id: 'en_no_restriction',
|
|
37
54
|
name: 'Remove restrictions',
|
|
38
|
-
pattern: '(?:remove|disable|turn\\s+off|bypass|ignore|skip|override|circumvent)\\s+(?:all\\s+)?(?:restrictions?|constraints?|safety|filters?|guardrails?|limitations?|safeguards
|
|
55
|
+
pattern: '(?:remove|disable|turn\\s+off|bypass|ignore|skip|override|circumvent)\\s+(?:all\\s+|your\\s+|the\\s+)?(?:safety\\s+|content\\s+)?(?:restrictions?|constraints?|safety|filters?|guardrails?|limitations?|safeguards?|guidelines?|rules?|policy)',
|
|
39
56
|
flags: 'i',
|
|
40
57
|
riskScore: 40,
|
|
41
58
|
category: 'override',
|
|
42
59
|
},
|
|
60
|
+
{
|
|
61
|
+
id: 'en_new_instructions',
|
|
62
|
+
name: 'New/updated instructions marker',
|
|
63
|
+
pattern: '(?:^|[\\n.])\\s*(?:new|updated|revised|additional|important)\\s+instructions?\\b',
|
|
64
|
+
flags: 'i',
|
|
65
|
+
riskScore: 30,
|
|
66
|
+
category: 'injection',
|
|
67
|
+
},
|
|
43
68
|
{
|
|
44
69
|
id: 'en_do_anything',
|
|
45
70
|
name: 'Do Anything Now (DAN)',
|
|
@@ -3,7 +3,8 @@ export const INJECTION_RULES_ZH = [
|
|
|
3
3
|
{
|
|
4
4
|
id: 'zh_ignore_prev',
|
|
5
5
|
name: '忽略之前指令',
|
|
6
|
-
|
|
6
|
+
// Allow intervening qualifiers (e.g. 忽略之前的所有指令) between 忽略 and the noun.
|
|
7
|
+
pattern: '(?:请|你要|现在)?忽略(?:之前|上面|以上|前面|所有|这些|那些|一切|的)*(?:指令|指示|规则|提示|要求|约束)',
|
|
7
8
|
riskScore: 40,
|
|
8
9
|
category: 'override',
|
|
9
10
|
},
|
|
@@ -18,7 +19,18 @@ export const INJECTION_RULES_ZH = [
|
|
|
18
19
|
id: 'zh_system_prompt',
|
|
19
20
|
name: '系统提示窃取',
|
|
20
21
|
pattern: '(?:输出|显示|打印|告诉我|重复|泄露|展示)(?:你的)?(?:系统|初始|原始)?(?:提示词|system\\s*prompt|指令|规则|约束)',
|
|
21
|
-
|
|
22
|
+
// System-prompt extraction is a high-confidence single signal — legit users
|
|
23
|
+
// rarely ask the model to dump its own prompt.
|
|
24
|
+
riskScore: 40,
|
|
25
|
+
category: 'exfiltration',
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
id: 'zh_system_prompt_rev',
|
|
29
|
+
name: '系统提示窃取(语序倒置)',
|
|
30
|
+
// Noun-first phrasing, e.g. 把你的系统提示词完整输出给我
|
|
31
|
+
pattern: '(?:系统|初始|原始)?(?:提示词|system\\s*prompt).{0,8}(?:输出|发给|发送|告诉|给我|泄露|打印|复述|展示)',
|
|
32
|
+
flags: 'i',
|
|
33
|
+
riskScore: 40,
|
|
22
34
|
category: 'exfiltration',
|
|
23
35
|
},
|
|
24
36
|
{
|
|
@@ -35,6 +47,14 @@ export const INJECTION_RULES_ZH = [
|
|
|
35
47
|
riskScore: 40,
|
|
36
48
|
category: 'override',
|
|
37
49
|
},
|
|
50
|
+
{
|
|
51
|
+
id: 'zh_no_restriction_rev',
|
|
52
|
+
name: '解除限制(语序倒置)',
|
|
53
|
+
// Noun-first phrasing, e.g. 把所有限制都取消掉
|
|
54
|
+
pattern: '(?:限制|约束|安全限制|过滤|审查|规则限制)(?:都|全部|统统)?(?:取消|解除|移除|去掉|关闭|解除掉|取消掉)',
|
|
55
|
+
riskScore: 40,
|
|
56
|
+
category: 'override',
|
|
57
|
+
},
|
|
38
58
|
{
|
|
39
59
|
id: 'zh_do_not_refuse',
|
|
40
60
|
name: '禁止拒绝',
|
|
@@ -87,8 +107,9 @@ export const INJECTION_RULES_ZH = [
|
|
|
87
107
|
{
|
|
88
108
|
id: 'zh_send_to_url',
|
|
89
109
|
name: '发送到外部 URL',
|
|
110
|
+
// Sending anything to an external URL is a high-confidence exfil signal.
|
|
90
111
|
pattern: '(?:发送|传输|上传|转发|发)(?:到|给|至)\\s*https?://',
|
|
91
|
-
riskScore:
|
|
112
|
+
riskScore: 40,
|
|
92
113
|
category: 'exfiltration',
|
|
93
114
|
},
|
|
94
115
|
{
|
|
@@ -124,7 +145,9 @@ export const INJECTION_RULES_ZH = [
|
|
|
124
145
|
{
|
|
125
146
|
id: 'zh_mixed_lang_injection',
|
|
126
147
|
name: '中英混合注入',
|
|
127
|
-
|
|
148
|
+
// Bound the `.*` gaps (was unbounded → O(n^2) backtracking / ReDoS on long
|
|
149
|
+
// repeated trigger input). Mixed-language markers sit close together.
|
|
150
|
+
pattern: '(?:please|pls|now)?\\s*(?:ignore|forget|disregard)\\s+.{0,40}?(?:指令|规则|之前|以上)|(?:忽略|忘记|跳过).{0,40}?(?:instruction|rule|prompt|previous)',
|
|
128
151
|
flags: 'i',
|
|
129
152
|
riskScore: 40,
|
|
130
153
|
category: 'override',
|
|
@@ -11,10 +11,22 @@ export declare const SENSITIVE_PATTERNS: SensitivePattern[];
|
|
|
11
11
|
* Scan text and return matches (without modifying text).
|
|
12
12
|
*/
|
|
13
13
|
export declare function scanForSensitive(text: string): ScanMatch[];
|
|
14
|
+
/**
|
|
15
|
+
* Compile user-supplied pattern strings into SensitivePattern objects.
|
|
16
|
+
* Invalid regexes are skipped (never throws). The global flag is always added.
|
|
17
|
+
*/
|
|
18
|
+
export declare function compileSensitivePatterns(patterns: {
|
|
19
|
+
id: string;
|
|
20
|
+
name: string;
|
|
21
|
+
pattern: string;
|
|
22
|
+
flags?: string;
|
|
23
|
+
replacement?: string;
|
|
24
|
+
}[]): SensitivePattern[];
|
|
14
25
|
/**
|
|
15
26
|
* Redact all sensitive data in text. Returns [redactedText, findings[]]
|
|
27
|
+
* @param extra additional patterns merged after the built-ins
|
|
16
28
|
*/
|
|
17
|
-
export declare function redactSensitive(text: string): [string, {
|
|
29
|
+
export declare function redactSensitive(text: string, extra?: SensitivePattern[]): [string, {
|
|
18
30
|
id: string;
|
|
19
31
|
name: string;
|
|
20
32
|
count: number;
|
|
@@ -67,13 +67,19 @@ export const SENSITIVE_PATTERNS = [
|
|
|
67
67
|
{
|
|
68
68
|
id: 'phone_cn',
|
|
69
69
|
name: '手机号 / CN Phone',
|
|
70
|
-
|
|
70
|
+
// Restrict the 2nd–3rd digits to real CN carrier segment allocations so
|
|
71
|
+
// arbitrary 11-digit numbers (order IDs, timestamps) don't false-positive.
|
|
72
|
+
// 13x · 14[falsey skip 2/3] · 15x(skip 4) · 16[2567] · 17x · 18x · 19x(skip 4)
|
|
73
|
+
regex: /(?<!\d)1(?:3\d|4[01456789]|5[0-35-9]|6[2567]|7[0-8]|8\d|9[0-35-9])\d{8}(?!\d)/g,
|
|
71
74
|
replacement: '[REDACTED:手机号]',
|
|
72
75
|
},
|
|
73
76
|
{
|
|
74
77
|
id: 'bank_card_cn',
|
|
75
|
-
name: '银行卡号 / CN
|
|
76
|
-
|
|
78
|
+
name: '银行卡号 / CN UnionPay Card',
|
|
79
|
+
// UnionPay-only (BIN 62). Visa (4xxx) / Mastercard (5[1-5]xx) are handled by
|
|
80
|
+
// the `credit_card` rule — keeping them out of here removes the double-match
|
|
81
|
+
// that mislabeled international cards as CN bank cards.
|
|
82
|
+
regex: /(?<!\d)62\d{14,17}(?!\d)/g,
|
|
77
83
|
replacement: '[REDACTED:银行卡号]',
|
|
78
84
|
validate: validateLuhn,
|
|
79
85
|
},
|
|
@@ -119,13 +125,34 @@ export function scanForSensitive(text) {
|
|
|
119
125
|
}
|
|
120
126
|
return results;
|
|
121
127
|
}
|
|
128
|
+
/**
|
|
129
|
+
* Compile user-supplied pattern strings into SensitivePattern objects.
|
|
130
|
+
* Invalid regexes are skipped (never throws). The global flag is always added.
|
|
131
|
+
*/
|
|
132
|
+
export function compileSensitivePatterns(patterns) {
|
|
133
|
+
const out = [];
|
|
134
|
+
for (const p of patterns || []) {
|
|
135
|
+
try {
|
|
136
|
+
const flags = (p.flags || '').includes('g') ? p.flags : `${p.flags || ''}g`;
|
|
137
|
+
out.push({
|
|
138
|
+
id: p.id,
|
|
139
|
+
name: p.name,
|
|
140
|
+
regex: new RegExp(p.pattern, flags),
|
|
141
|
+
replacement: p.replacement ?? `[REDACTED:${p.name}]`,
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
catch { /* skip invalid pattern */ }
|
|
145
|
+
}
|
|
146
|
+
return out;
|
|
147
|
+
}
|
|
122
148
|
/**
|
|
123
149
|
* Redact all sensitive data in text. Returns [redactedText, findings[]]
|
|
150
|
+
* @param extra additional patterns merged after the built-ins
|
|
124
151
|
*/
|
|
125
|
-
export function redactSensitive(text) {
|
|
152
|
+
export function redactSensitive(text, extra = []) {
|
|
126
153
|
let result = text;
|
|
127
154
|
const findings = [];
|
|
128
|
-
for (const pat of SENSITIVE_PATTERNS) {
|
|
155
|
+
for (const pat of [...SENSITIVE_PATTERNS, ...extra]) {
|
|
129
156
|
const regex = new RegExp(pat.regex.source, pat.regex.flags);
|
|
130
157
|
let count = 0;
|
|
131
158
|
result = result.replace(regex, (match) => {
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export interface ToolPoisonRule {
|
|
2
|
+
id: string;
|
|
3
|
+
name: string;
|
|
4
|
+
pattern: RegExp;
|
|
5
|
+
riskScore: number;
|
|
6
|
+
category: 'hidden_instruction' | 'data_access' | 'exfiltration' | 'concealment' | 'shadowing';
|
|
7
|
+
}
|
|
8
|
+
export declare const TOOL_POISONING_RULES: ToolPoisonRule[];
|