decorated-pi 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -151
- package/extensions/extend-model.ts +1 -6
- package/extensions/lsp/client.ts +12 -1
- package/extensions/lsp/env.ts +6 -0
- package/extensions/lsp/format.ts +6 -0
- package/extensions/lsp/index.ts +6 -0
- package/extensions/lsp/prompt.ts +6 -0
- package/extensions/lsp/server-manager.ts +6 -0
- package/extensions/lsp/servers.ts +9 -1
- package/extensions/lsp/tools.ts +8 -0
- package/extensions/lsp/trust.ts +6 -0
- package/extensions/safety.ts +609 -104
- package/extensions/smart-at.ts +10 -3
- package/package.json +10 -7
package/extensions/safety.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Safety — 安全防护模块
|
|
3
3
|
*
|
|
4
|
-
* - Command Guard: 拦截危险 bash
|
|
5
|
-
* -
|
|
6
|
-
* -
|
|
4
|
+
* - Command Guard: 拦截危险 bash 命令(rm, sudo, npm publish, git push 等)
|
|
5
|
+
* - Redirect Guard: bash 覆盖写入(>)提示确认,保护路径额外警告敏感信息
|
|
6
|
+
* - Protected Paths: write/edit 写入保护路径需确认,提示敏感信息
|
|
7
|
+
* - Read Guard: read/cat 等读取保护路径需确认,提示敏感信息
|
|
8
|
+
* - Write Guard: 覆盖非空文件禁止 write 工具,建议用 edit
|
|
7
9
|
* - Secret Redact: API Key / Token 自动掩码
|
|
8
10
|
*/
|
|
9
11
|
|
|
@@ -32,7 +34,44 @@ const SAFE_REDIRECT_TARGETS = new Set([
|
|
|
32
34
|
]);
|
|
33
35
|
|
|
34
36
|
const SHELL_SEGMENT_BREAKS = new Set(["|", "&&", "||", ";"]);
|
|
35
|
-
const
|
|
37
|
+
const SHELL_REDIRECT_OVERWRITE = new Set([">", "1>", "2>", "&>"]);
|
|
38
|
+
|
|
39
|
+
// ─── 保护路径 ────────────────────────────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
const PROTECTED_PATH_SEGMENTS = [
|
|
42
|
+
".env", ".git/", ".ssh/",
|
|
43
|
+
".gnupg/", ".aws/", "secrets/", ".docker/",
|
|
44
|
+
];
|
|
45
|
+
const PROTECTED_EXTENSIONS = [".pem", ".key", ".p12", ".pfx", ".keystore"];
|
|
46
|
+
const PROTECTED_FILENAMES = [
|
|
47
|
+
"id_rsa", "id_ed25519", "id_ecdsa",
|
|
48
|
+
"authorized_keys", "known_hosts",
|
|
49
|
+
".env.local", ".env.production",
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
/** Commands that read file contents (should confirm before reading protected paths) */
|
|
53
|
+
const READ_COMMANDS = new Set([
|
|
54
|
+
"cat", "head", "tail", "less", "more", "bat", "batcat",
|
|
55
|
+
"tac", "nl", "od", "xxd", "hexdump", "base64",
|
|
56
|
+
"file", "strings", "grep", "rg", "ag", "ack",
|
|
57
|
+
]);
|
|
58
|
+
|
|
59
|
+
function checkProtectedPath(filePath: string): string | null {
|
|
60
|
+
const normalized = filePath.replace(/\\/g, "/");
|
|
61
|
+
const filename = normalized.split("/").pop() ?? "";
|
|
62
|
+
for (const seg of PROTECTED_PATH_SEGMENTS) {
|
|
63
|
+
if (normalized.includes(seg)) return `path contains "${seg}"`;
|
|
64
|
+
}
|
|
65
|
+
for (const ext of PROTECTED_EXTENSIONS) {
|
|
66
|
+
if (normalized.endsWith(ext)) return `file extension "${ext}"`;
|
|
67
|
+
}
|
|
68
|
+
for (const name of PROTECTED_FILENAMES) {
|
|
69
|
+
if (filename === name) return `protected file "${name}"`;
|
|
70
|
+
}
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ─── Shell tokenizer ────────────────────────────────────────────────────────
|
|
36
75
|
|
|
37
76
|
function tokenizeShell(command: string): string[] {
|
|
38
77
|
const tokens: string[] = [];
|
|
@@ -123,145 +162,588 @@ function isExistingRegularFile(target: string, cwd: string): boolean {
|
|
|
123
162
|
}
|
|
124
163
|
}
|
|
125
164
|
|
|
126
|
-
|
|
165
|
+
// ─── Bash danger analysis ───────────────────────────────────────────────────
|
|
166
|
+
|
|
167
|
+
interface BashDanger {
|
|
168
|
+
reason: string;
|
|
169
|
+
/** Whether the danger involves a protected (sensitive) path */
|
|
170
|
+
protectedPath?: string;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function collectBashDangers(command: string, cwd: string): BashDanger[] {
|
|
127
174
|
const tokens = tokenizeShell(command);
|
|
128
|
-
const
|
|
175
|
+
const dangers: BashDanger[] = [];
|
|
129
176
|
const seen = new Set<string>();
|
|
130
177
|
|
|
131
|
-
const
|
|
178
|
+
const addDanger = (reason: string, protectedPath?: string) => {
|
|
132
179
|
if (seen.has(reason)) return;
|
|
133
180
|
seen.add(reason);
|
|
134
|
-
|
|
181
|
+
dangers.push({ reason, protectedPath });
|
|
135
182
|
};
|
|
136
183
|
|
|
137
184
|
for (let i = 0; i < tokens.length; i++) {
|
|
138
185
|
const token = tokens[i]!;
|
|
139
186
|
if (SHELL_SEGMENT_BREAKS.has(token)) continue;
|
|
140
187
|
|
|
188
|
+
// ── Dangerous commands ──
|
|
141
189
|
for (const [cmd, subs] of DANGEROUS_COMMANDS) {
|
|
142
190
|
const name = token.split("/").pop() ?? token;
|
|
143
191
|
if (name !== cmd && name !== `${cmd}.exe`) continue;
|
|
144
192
|
if (subs.length === 0) {
|
|
145
|
-
|
|
193
|
+
addDanger(`"${cmd}" is a dangerous command`);
|
|
146
194
|
break;
|
|
147
195
|
}
|
|
148
196
|
const next = tokens[i + 1];
|
|
149
197
|
if (next && subs.includes(next)) {
|
|
150
|
-
|
|
198
|
+
addDanger(`"${cmd} ${next}" is a dangerous command`);
|
|
151
199
|
break;
|
|
152
200
|
}
|
|
153
201
|
}
|
|
154
202
|
|
|
155
|
-
|
|
203
|
+
// ── Overwrite redirect (>) ──
|
|
204
|
+
if (SHELL_REDIRECT_OVERWRITE.has(token)) {
|
|
156
205
|
const target = tokens[i + 1];
|
|
157
206
|
if (target && isExistingRegularFile(target, cwd)) {
|
|
158
|
-
|
|
207
|
+
const prot = checkProtectedPath(target);
|
|
208
|
+
if (prot) {
|
|
209
|
+
addDanger(
|
|
210
|
+
`shell redirection would overwrite existing file "${target}"\n Sensitive: ${prot}, may contain sensitive information`,
|
|
211
|
+
prot,
|
|
212
|
+
);
|
|
213
|
+
} else {
|
|
214
|
+
addDanger(`shell redirection would overwrite existing file "${target}"`);
|
|
215
|
+
}
|
|
159
216
|
}
|
|
160
217
|
continue;
|
|
161
218
|
}
|
|
162
219
|
|
|
163
|
-
|
|
164
|
-
|
|
220
|
+
// ── Read commands on protected paths ──
|
|
221
|
+
const cmdName = token.split("/").pop() ?? token;
|
|
222
|
+
if (READ_COMMANDS.has(cmdName) || READ_COMMANDS.has(`${cmdName}.exe`)) {
|
|
223
|
+
for (let j = i + 1; j < tokens.length; j++) {
|
|
224
|
+
const next = tokens[j]!;
|
|
225
|
+
if (SHELL_SEGMENT_BREAKS.has(next)) break;
|
|
226
|
+
if (next.startsWith("-")) continue;
|
|
227
|
+
if (next.includes("/") || next.startsWith(".") || isExistingRegularFile(next, cwd)) {
|
|
228
|
+
const prot = checkProtectedPath(next);
|
|
229
|
+
if (prot) {
|
|
230
|
+
addDanger(
|
|
231
|
+
`"${cmdName}" reads protected file "${next}"\n Sensitive: ${prot}, may contain sensitive information`,
|
|
232
|
+
prot,
|
|
233
|
+
);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
165
238
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
239
|
+
// ── tee writes to existing files ──
|
|
240
|
+
if (cmdName === "tee" || cmdName === "tee.exe") {
|
|
241
|
+
for (let j = i + 1; j < tokens.length; j++) {
|
|
242
|
+
const next = tokens[j]!;
|
|
243
|
+
if (SHELL_SEGMENT_BREAKS.has(next)) break;
|
|
244
|
+
if (next === "-a" || next === "--append") continue;
|
|
245
|
+
if (next.startsWith("-")) continue;
|
|
246
|
+
if (isExistingRegularFile(next, cwd)) {
|
|
247
|
+
const prot = checkProtectedPath(next);
|
|
248
|
+
if (prot) {
|
|
249
|
+
addDanger(
|
|
250
|
+
`"tee" would write to existing file "${next}"\n Sensitive: ${prot}, may contain sensitive information`,
|
|
251
|
+
prot,
|
|
252
|
+
);
|
|
253
|
+
} else {
|
|
254
|
+
addDanger(`"tee" would write to existing file "${next}"`);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
173
257
|
}
|
|
174
258
|
}
|
|
175
259
|
}
|
|
176
260
|
|
|
177
|
-
return
|
|
261
|
+
return dangers;
|
|
178
262
|
}
|
|
179
263
|
|
|
180
|
-
function
|
|
181
|
-
if (
|
|
182
|
-
if (
|
|
183
|
-
return `dangerous operations detected:\n- ${
|
|
264
|
+
function formatBashDangers(dangers: BashDanger[]): string | null {
|
|
265
|
+
if (dangers.length === 0) return null;
|
|
266
|
+
if (dangers.length === 1) return dangers[0]!.reason;
|
|
267
|
+
return `dangerous operations detected:\n- ${dangers.map(d => d.reason).join("\n- ")}`;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// ─── Secret Detection — Entropy + Pattern ────────────────────────────────────
|
|
271
|
+
//
|
|
272
|
+
// Based on opencode-secrets-protect by Jared Scheel
|
|
273
|
+
// https://github.com/jscheel/opencode-secrets-protect (MIT License)
|
|
274
|
+
//
|
|
275
|
+
// Detection pipeline: High-confidence patterns (40+ known formats)
|
|
276
|
+
// → Low-confidence patterns (generic assignments, context-checked)
|
|
277
|
+
// → Adjusted Shannon Entropy v3+Dict (unknown formats)
|
|
278
|
+
// → Safe pattern exclusion (reduce false positives)
|
|
279
|
+
//
|
|
280
|
+
// Entropy v3+Dict formula:
|
|
281
|
+
// adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
|
|
282
|
+
//
|
|
283
|
+
// - baseShannon: Claude E. Shannon's 1948 "A Mathematical Theory of Communication"
|
|
284
|
+
// - trigramDensity: 3-char sliding window scores class transitions:
|
|
285
|
+
// • Letter↔Digit (digit in first 2 positions) → 1.0
|
|
286
|
+
// • Contains '-' with ≥3 classes → 1.0
|
|
287
|
+
// • AbA pattern (≥2 uppercase + lowercase) → 0.8
|
|
288
|
+
// X-class chars (not letter/digit/dash) split segments independently
|
|
289
|
+
// - wordRatio: vowel-containing lowercase fragments penalize secret likelihood
|
|
290
|
+
// - dictRatio: dictionary word coverage penalizes identifiers/English text
|
|
291
|
+
// - hexPenalty: -2.5 only if >90% hex AND contains '-' (UUID-like format)
|
|
292
|
+
|
|
293
|
+
type ToolTextContent = Extract<NonNullable<ToolResultEvent["content"]>[number], { type: "text" }>;
|
|
294
|
+
|
|
295
|
+
// ── Entropy Analysis v3+Dict ─────────────────────────────────────────────────
|
|
296
|
+
//
|
|
297
|
+
// Based on opencode-secrets-protect by Jared Scheel
|
|
298
|
+
// https://github.com/jscheel/opencode-secrets-protect (MIT License)
|
|
299
|
+
//
|
|
300
|
+
// Detection approach:
|
|
301
|
+
// 1. Split content by whitespace + code punctuation
|
|
302
|
+
// 2. For each token ≥ 16 chars, compute adjusted entropy:
|
|
303
|
+
// adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
|
|
304
|
+
// 3. Trigram density uses a 3-character sliding window:
|
|
305
|
+
// - AbA pattern (≥2 uppercase) → 0.8
|
|
306
|
+
// - Letter↔Digit (digit in first 2 positions) → 1.0
|
|
307
|
+
// - Contains '-' with ≥3 classes → 1.0
|
|
308
|
+
// X-class chars split the token into independent segments;
|
|
309
|
+
// the segment with the highest density is used.
|
|
310
|
+
// 4. wordRatio: ratio of vowel-containing lowercase fragments ≥3 chars
|
|
311
|
+
// 5. dictRatio: ratio of dictionary word coverage (2121 English + tech words)
|
|
312
|
+
// 6. hexPenalty: -2.5 only if >90% hex AND contains '-' (UUID-like format)
|
|
313
|
+
|
|
314
|
+
/** Character class: U=uppercase, L=lowercase, D=digit, S=dash, X=other */
|
|
315
|
+
function charClass(c: string): "U" | "L" | "D" | "S" | "X" {
|
|
316
|
+
const code = c.charCodeAt(0);
|
|
317
|
+
if (code >= 65 && code <= 90) return "U";
|
|
318
|
+
if (code >= 97 && code <= 122) return "L";
|
|
319
|
+
if (code >= 48 && code <= 57) return "D";
|
|
320
|
+
if (c === "-") return "S";
|
|
321
|
+
return "X";
|
|
184
322
|
}
|
|
185
323
|
|
|
186
|
-
|
|
187
|
-
|
|
324
|
+
/**
|
|
325
|
+
* Shannon entropy: measures average information content per character.
|
|
326
|
+
* H(X) = -Σ p(x) · log₂(p(x))
|
|
327
|
+
*/
|
|
328
|
+
function shannonEntropy(data: string): number {
|
|
329
|
+
if (data.length === 0) return 0;
|
|
330
|
+
const freq = new Map<string, number>();
|
|
331
|
+
for (const char of data) {
|
|
332
|
+
freq.set(char, (freq.get(char) ?? 0) + 1);
|
|
333
|
+
}
|
|
334
|
+
let entropy = 0;
|
|
335
|
+
const len = data.length;
|
|
336
|
+
for (const count of freq.values()) {
|
|
337
|
+
const p = count / len;
|
|
338
|
+
entropy -= p * Math.log2(p);
|
|
339
|
+
}
|
|
340
|
+
return entropy;
|
|
188
341
|
}
|
|
189
342
|
|
|
190
|
-
|
|
343
|
+
/**
|
|
344
|
+
* Trigram (3-character sliding window) scoring.
|
|
345
|
+
* Rules (user-specified):
|
|
346
|
+
* - Pure digits → 0
|
|
347
|
+
* - Letter↔Digit switch (digit in first 2 positions, e.g. 4Vi, K9m, a9t) → 1.0
|
|
348
|
+
* - Contains '-' with ≥3 distinct classes → 1.0
|
|
349
|
+
* - Case switch AbA pattern (≥2 uppercase + ≥1 lowercase) → 0.8
|
|
350
|
+
* - Otherwise → 0
|
|
351
|
+
*/
|
|
352
|
+
function trigramScore(c1: string, c2: string, c3: string): number {
|
|
353
|
+
const cls: string[] = [charClass(c1), charClass(c2), charClass(c3)];
|
|
191
354
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
".gnupg/", ".aws/", "secrets/", ".docker/",
|
|
195
|
-
];
|
|
196
|
-
const PROTECTED_EXTENSIONS = [".pem", ".key", ".p12", ".pfx", ".keystore"];
|
|
197
|
-
const PROTECTED_FILENAMES = [
|
|
198
|
-
"id_rsa", "id_ed25519", "id_ecdsa",
|
|
199
|
-
"authorized_keys", "known_hosts",
|
|
200
|
-
".env.local", ".env.production",
|
|
201
|
-
];
|
|
355
|
+
// Any X-class character → skip
|
|
356
|
+
if (cls.includes("X")) return 0;
|
|
202
357
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
358
|
+
const unique = new Set(cls);
|
|
359
|
+
|
|
360
|
+
// Pure digits → 0
|
|
361
|
+
if (unique.size === 1 && cls[0] === "D") return 0;
|
|
362
|
+
|
|
363
|
+
// Contains '-' (S-class) with ≥3 distinct classes → 1.0
|
|
364
|
+
if (cls.includes("S") && unique.size >= 3) return 1.0;
|
|
365
|
+
|
|
366
|
+
// Letter↔Digit: digit must be in first 2 positions
|
|
367
|
+
const hasDigit = cls.includes("D");
|
|
368
|
+
const hasLetter = cls.includes("L") || cls.includes("U");
|
|
369
|
+
if (hasDigit && hasLetter && (cls[0] === "D" || cls[1] === "D")) return 1.0;
|
|
370
|
+
|
|
371
|
+
// AbA pattern: ≥2 uppercase + ≥1 lowercase (e.g. KeA, but not API)
|
|
372
|
+
const uCount = cls.filter(c => c === "U").length;
|
|
373
|
+
const lCount = cls.filter(c => c === "L").length;
|
|
374
|
+
if (uCount >= 2 && lCount >= 1) return 0.8;
|
|
375
|
+
|
|
376
|
+
return 0;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
/**
|
|
380
|
+
* Split a token by X-class characters into independent segments.
|
|
381
|
+
* This prevents `://`, `@`, `.` etc. from diluting trigram density.
|
|
382
|
+
*/
|
|
383
|
+
function splitByXClass(token: string): string[] {
|
|
384
|
+
const segments: string[] = [];
|
|
385
|
+
let current = "";
|
|
386
|
+
for (const c of token) {
|
|
387
|
+
if (charClass(c) === "X") {
|
|
388
|
+
if (current.length >= 3) segments.push(current);
|
|
389
|
+
current = "";
|
|
390
|
+
} else {
|
|
391
|
+
current += c;
|
|
392
|
+
}
|
|
208
393
|
}
|
|
209
|
-
|
|
210
|
-
|
|
394
|
+
if (current.length >= 3) segments.push(current);
|
|
395
|
+
return segments;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* Compute average trigram density for a single segment.
|
|
400
|
+
*/
|
|
401
|
+
function segmentDensity(segment: string): number {
|
|
402
|
+
if (segment.length < 3) return 0;
|
|
403
|
+
let totalScore = 0;
|
|
404
|
+
for (let i = 0; i <= segment.length - 3; i++) {
|
|
405
|
+
totalScore += trigramScore(segment[i]!, segment[i + 1]!, segment[i + 2]!);
|
|
211
406
|
}
|
|
212
|
-
|
|
213
|
-
|
|
407
|
+
return totalScore / (segment.length - 2);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
/**
|
|
411
|
+
* Compute the maximum segment density across all X-split segments.
|
|
412
|
+
* The segment with the highest density is the most likely secret region.
|
|
413
|
+
*/
|
|
414
|
+
function maxSegmentDensity(token: string): number {
|
|
415
|
+
const segments = splitByXClass(token);
|
|
416
|
+
if (segments.length === 0) return 0;
|
|
417
|
+
let maxD = 0;
|
|
418
|
+
for (const seg of segments) {
|
|
419
|
+
const d = segmentDensity(seg);
|
|
420
|
+
if (d > maxD) maxD = d;
|
|
214
421
|
}
|
|
215
|
-
return
|
|
422
|
+
return maxD;
|
|
216
423
|
}
|
|
217
424
|
|
|
218
|
-
|
|
425
|
+
/**
|
|
426
|
+
* Word ratio: fraction of token that consists of vowel-containing
|
|
427
|
+
* lowercase fragments ≥3 characters. Natural language words reduce
|
|
428
|
+
* the likelihood of being a secret.
|
|
429
|
+
*/
|
|
430
|
+
function computeWordRatio(token: string): number {
|
|
431
|
+
// Split by class boundaries
|
|
432
|
+
const segments: string[] = [];
|
|
433
|
+
let current = "";
|
|
434
|
+
let prevClass = "";
|
|
435
|
+
for (const c of token) {
|
|
436
|
+
const cls = charClass(c);
|
|
437
|
+
if (cls === "X") {
|
|
438
|
+
if (current.length > 0) { segments.push(current); current = ""; }
|
|
439
|
+
prevClass = "";
|
|
440
|
+
continue;
|
|
441
|
+
}
|
|
442
|
+
if (cls !== prevClass && current.length > 0) {
|
|
443
|
+
segments.push(current);
|
|
444
|
+
current = "";
|
|
445
|
+
}
|
|
446
|
+
current += c;
|
|
447
|
+
prevClass = cls;
|
|
448
|
+
}
|
|
449
|
+
if (current.length > 0) segments.push(current);
|
|
219
450
|
|
|
220
|
-
|
|
451
|
+
let wordLen = 0;
|
|
452
|
+
for (const seg of segments) {
|
|
453
|
+
if (seg.length >= 3 && /^[a-z]+$/.test(seg)) {
|
|
454
|
+
if (/[aeiou]/.test(seg)) wordLen += seg.length;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
return token.length > 0 ? wordLen / token.length : 0;
|
|
458
|
+
}
|
|
221
459
|
|
|
222
|
-
|
|
223
|
-
|
|
460
|
+
/**
|
|
461
|
+
* Hex ratio: fraction of characters that are hex characters (0-9, a-f, A-F, -).
|
|
462
|
+
* Values >0.9 indicate UUIDs or hex hashes which are safe.
|
|
463
|
+
*/
|
|
464
|
+
function computeHexRatio(token: string): number {
|
|
465
|
+
let hexChars = 0;
|
|
466
|
+
for (const c of token) {
|
|
467
|
+
if (/[0-9a-fA-F\-]/.test(c)) hexChars++;
|
|
468
|
+
}
|
|
469
|
+
return token.length > 0 ? hexChars / token.length : 0;
|
|
470
|
+
}
|
|
224
471
|
|
|
225
|
-
|
|
472
|
+
// ── Dictionary Words for Secret Detection ─────────────────────────────────────
|
|
473
|
+
//
|
|
474
|
+
// Based on Google 10K most common English words (len >= 4)
|
|
475
|
+
// + top 500 most common words (len >= 3)
|
|
476
|
+
// + ~80 common tech abbreviations that appear in code identifiers.
|
|
477
|
+
// Used by computeDictRatio() to penalize tokens containing known words.
|
|
478
|
+
//
|
|
479
|
+
// Word list source: https://github.com/first20hours/google-10000-english (public domain)
|
|
226
480
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
return text.slice(0, 4) + "********" + text.slice(-4);
|
|
481
|
+
const DICT_WORDS: ReadonlySet<string> = new Set(
|
|
482
|
+
// prettier-ignore
|
|
483
|
+
JSON.parse(`["ability","able","about","above","abstract","abuse","academic","accept","acceptance","accepted","access","accessories","accommodation","according","account","accounting","accounts","across","action","actions","active","activities","activity","actual","actually","added","addition","additional","address","adm","admin","administration","administrative","adult","advance","advanced","adventure","advertise","advertisement","advertising","advice","aes","affairs","affiliate","affiliates","africa","african","after","again","against","agencies","agency","agent","agents","agree","agreement","airport","album","allow","allowed","allows","almost","alone","along","already","also","alternative","although","always","amateur","amazon","america","american","among","amount","analysis","angeles","animal","animals","announcements","annual","another","answer","answers","anti","anyone","anything","apartments","api","apparel","appear","apple","application","applications","applied","apply","approach","appropriate","approval","approved","approximately","april","architecture","archive","archives","area","areas","argument","arizona","army","around","article","articles","artist","artists","arts","asia","asian","asked","assessment","assistance","assistant","associated","associates","association","attack","attention","attorney","auction","auctions","audio","august","australia","australian","auth","author","authority","authors","auto","automatically","automotive","availability","available","avenue","average","avg","avoid","award","awards","away","baby","back","background","balance","ball","band","bank","base","baseball","based","basic","basis","basket","battery","beach","beautiful","beauty","became","because","become","been","before","began","begin","beginning","behind","being","believe","below","benefit","benefits","best","better","between","beyond","bible","bill","birth","black","block","blog","blogs","blood","blue","board","boards","body","book","books","born","boston","both","bottom","boys","branch","brand","brands","break","breakfast","breast","bridge","bring","british","brought","brown","browse","browser","btn","budget","buf","build","building","built","bush","business","businesses","button","buyer","buying","cable","calendar","california","call","called","calls","came","camera","cameras","camp","campaign","campus","canada","canadian","cancer","canon","capacity","capital","card","cards","care","career","careers","carolina","cars","cart","case","cases","cash","casino","catalog","categories","category","cause","cb","cell","cells","center","centers","central","centre","century","certain","certificate","certified","cfg","chain","chair","challenge","chance","change","changed","changes","channel","chapter","character","characters","charge","charges","charles","chart","chat","cheap","check","chemical","chicago","chief","child","children","china","chinese","choice","choose","chris","christian","christmas","church","cities","city","civil","claim","claims","class","classes","classic","classifieds","clean","clear","cli","click","client","clients","clinical","close","closed","clothing","club","clubs","cnet","cnt","coast","code","codes","coffee","col","cold","collection","college","color","colorado","columbia","column","come","comes","coming","command","comment","comments","commerce","commercial","commission","committee","common","communication","communications","communities","community","companies","company","compare","compared","comparison","competition","complete","completed","complex","compliance","component","components","comprehensive","computer","computers","computing","condition","conditions","conference","configuration","congress","connect","connection","consider","considered","construction","consumer","contact","contacts","contains","content","contents","context","continue","continued","contract","control","cool","copy","copyright","core","corner","corporate","corporation","correct","cost","costs","could","council","count","counter","countries","country","county","couple","course","courses","court","cover","coverage","covered","cpu","create","created","creating","creative","credit","creek","crime","critical","cross","crud","css","csv","cultural","culture","currency","current","currently","custom","customer","customers","daily","damage","dance","dark","data","database","date","dates","dating","david","days","db","dead","deal","deals","death","debt","december","decision","deep","default","defense","define","defined","definition","degree","delivery","demand","department","described","description","design","designated","designed","desktop","detail","detailed","details","determine","determined","dev","develop","developed","developer","developing","development","device","devices","diamond","dictionary","died","diet","difference","different","difficult","digital","dir","direct","directions","directly","director","directory","disclaimer","discount","discuss","discussion","disease","disp","display","distance","distribution","district","division","dlg","dns","doctor","document","documentation","documents","does","doing","dollar","dollars","domain","domestic","done","door","double","down","download","downloads","draft","drive","driver","driving","drop","drug","drugs","dst","during","dvds","each","early","earth","easily","east","eastern","easy","ebay","economic","economy","edge","edit","edition","editor","education","educational","effect","effective","effects","effort","efforts","either","election","electric","electronic","electronics","element","elements","else","email","emergency","emit","employee","employees","employment","enable","ending","energy","engine","engineering","england","english","enjoy","enough","ensure","enter","enterprise","entertainment","entire","entries","entry","env","environment","environmental","equal","equipment","err","error","errors","especially","essential","established","estate","europe","european","evaluation","even","event","events","ever","every","everyone","everything","evidence","evt","example","examples","excellent","except","exchange","executive","exercise","existing","expect","expected","experience","expert","express","ext","extended","extension","external","extra","eyes","face","facilities","facility","fact","factor","factors","facts","faculty","failure","fair","faith","fall","families","family","fantasy","farm","fashion","fast","father","favorite","feat","feature","featured","features","february","federal","feed","feedback","feel","fees","feet","female","fiction","field","fields","figure","file","files","fill","film","films","filter","final","finally","finance","financial","find","finding","fine","fire","firm","first","fish","fishing","fitness","five","fixed","fixme","flag","flash","flat","flight","floor","florida","flow","flowers","focus","follow","following","follows","font","food","foot","football","force","ford","foreign","forest","form","format","former","forms","forum","forums","forward","found","foundation","four","frame","france","francisco","free","freedom","french","fresh","friday","friend","friendly","friends","from","front","ftr","fuel","full","fully","function","functional","functions","fund","funding","funds","furniture","further","future","galleries","gallery","game","games","gamma","garden","gave","gear","general","generally","generated","generation","george","georgia","german","germany","gets","getting","gid","gift","gifts","girl","girls","git","give","given","gives","giving","glass","global","goal","goals","goes","going","gold","golden","golf","gone","good","goods","google","government","gpt","gpu","grade","graduate","grand","grant","graphics","great","greater","green","ground","group","groups","growing","growth","grp","guarantee","guest","gui","guide","guidelines","guides","guitar","guys","hack","hair","half","hall","hand","hands","happy","hard","hardware","have","having","hdr","head","headlines","health","hear","heard","hearing","heart","heat","heavy","held","help","helpful","here","high","higher","highest","highly","hill","himself","hire","historical","history","hits","hold","holiday","holidays","home","homepage","homes","hook","hope","horse","hospital","host","hosting","hotel","hotels","hour","hours","house","housing","houston","however","html","huge","human","icon","idea","ideas","identify","idx","illinois","image","images","img","immediately","impact","implementation","important","improve","improvement","inch","include","included","includes","including","income","increase","increased","independent","index","india","indian","individual","individuals","industrial","industry","info","information","informed","initial","input","inside","install","installation","instead","institute","institutions","instructions","instruments","insurance","int","integrated","intended","interactive","interest","interested","interesting","interests","interface","internal","international","internet","into","introduction","investment","involved","ipod","iraq","ireland","isbn","island","islands","israel","issue","issues","italian","italy","item","items","itself","jack","jackson","james","january","japan","japanese","java","jersey","jesus","jewelry","jobs","john","johnson","join","joined","joint","jones","journal","json","july","jump","june","just","justice","kansas","keep","key","keyword","keywords","kids","kind","kinds","king","kingdom","kitchen","know","knowledge","known","kong","label","labor","lake","lan","land","language","languages","large","larger","largest","last","late","later","latest","latin","laws","lead","leader","leaders","leadership","leading","league","learn","learning","least","leather","leave","left","legal","len","length","lesbian","less","letter","letters","level","levels","lib","library","license","life","light","like","likely","limit","limited","line","lines","link","links","linux","list","listed","listen","listing","listings","lists","literature","little","live","lives","living","llm","load","loan","loans","local","located","location","locations","login","logo","london","long","longer","look","looking","looks","lord","loss","lost","lots","louis","love","lower","lowest","lyrics","mac","machine","machines","made","magazine","magazines","magic","mail","mailing","main","maintenance","major","make","makes","making","male","manage","management","manager","manual","manufacturer","manufacturing","many","maps","march","marine","mark","market","marketing","markets","martin","mary","mass","master","match","matching","material","materials","matter","mature","max","maximum","maybe","mean","means","measures","media","medical","medicine","medium","meet","meeting","meetings","mega","member","members","membership","memory","mental","menu","merchant","message","messages","metal","method","methods","mexico","michael","michigan","micro","microsoft","middle","might","mike","miles","military","million","min","mind","mini","minimum","minister","minnesota","minute","minutes","miss","missing","mission","mobile","mock","mod","mode","model","models","modern","modified","module","moment","monday","money","monitor","monitoring","month","monthly","months","more","morning","mortgage","most","mother","motion","motor","motorola","mount","mountain","move","moved","movement","movie","movies","moving","msg","much","multi","multimedia","multiple","museum","music","musical","must","myself","naked","name","names","nano","nation","national","native","natural","nature","nav","navigation","near","necessary","need","needed","needs","net","network","networking","networks","never","news","newsletter","next","nice","night","nlp","nokia","none","normal","north","northern","note","notes","nothing","notice","november","npm","num","number","numbers","nursing","oauth","object","october","offer","offered","offering","offers","office","officer","official","often","ohio","older","once","ones","online","only","ontario","open","opening","operating","operation","operations","opinion","opportunities","opportunity","ops","option","optional","options","oral","orange","order","orders","oregon","organization","organizations","original","orm","oss","other","others","otherwise","outdoor","output","outside","over","overall","overview","owned","owner","owners","pacific","pack","package","packages","page","pages","paid","pain","palm","panel","paper","paperback","papers","parent","parents","paris","park","parking","part","particular","particularly","parties","partner","partners","parts","party","pass","password","past","patch","path","patient","patients","paul","payment","paypal","peace","pennsylvania","people","percent","perfect","performance","perhaps","period","perm","permission","person","personal","persons","peter","phase","phentermine","phone","phones","photo","photography","photos","physical","pick","pics","picture","pictures","pid","piece","pink","pip","pipe","pkg","place","placed","places","plan","planning","plans","plant","plants","plastic","platform","play","played","player","players","playing","please","plus","pocket","point","points","poker","pol","police","policies","policy","political","politics","pool","poor","pop","popular","population","port","pos","position","positive","possible","post","posted","poster","posters","posts","potential","power","powered","practice","practices","premium","present","presentation","presented","president","press","pressure","pretty","prev","prevent","previous","price","prices","pricing","primary","prime","print","printer","printing","prior","privacy","private","pro","probably","problem","problems","procedure","procedures","process","processes","processing","prod","produce","produced","product","production","products","professional","professor","profile","profit","program","programme","programming","programs","progress","project","projects","properties","property","proposed","protect","protection","protein","provide","provided","provider","providers","provides","providing","ptr","public","publication","publications","published","publisher","publishing","purchase","purpose","purposes","quality","quantity","quarter","question","questions","quick","quickly","quite","quote","quotes","race","racing","radio","ram","random","range","rank","rate","rated","rates","rather","rating","ratings","reach","read","reader","readers","reading","ready","real","really","reason","reasons","receive","received","recent","recently","recipes","recommend","recommendations","recommended","record","records","recovery","reduce","ref","reference","references","regarding","region","regional","register","registered","registration","regular","regulations","related","relations","relationship","release","released","releases","relevant","religion","religious","remember","remote","remove","rent","rental","rentals","repair","replies","reply","report","reported","reporting","reports","republic","req","request","requests","require","required","requirements","requires","res","research","reserve","reserved","resolution","resort","resource","resources","respect","respective","response","responsibility","responsible","rest","restaurant","restaurants","result","results","retail","return","returns","rev","review","reviews","rich","richard","right","rights","ring","ringtones","risk","river","road","robert","rock","rol","role","room","rooms","root","rose","round","row","royal","rsa","rule","rules","running","russia","russian","safe","safety","said","saint","sale","sales","same","sample","samsung","santa","satellite","saturday","save","saying","says","scale","schedule","school","schools","science","sciences","scientific","score","scott","screen","sdk","search","searches","season","seattle","second","seconds","secretary","section","sections","sector","secure","security","seem","seems","seen","select","selected","selection","self","sell","seller","sellers","selling","send","senior","sense","sent","separate","september","sequence","series","serious","serve","server","servers","service","services","session","sets","setting","settings","seven","several","sha","shall","share","sheet","ship","shipping","ships","shirt","shirts","shoes","shop","shopping","shops","short","shot","should","show","showing","shown","shows","sid","side","sign","signed","significant","silver","similar","simple","simply","since","single","site","sitemap","sites","situation","size","skills","skin","skip","small","smart","smith","snow","social","society","soft","software","sold","solid","solution","solutions","some","someone","something","sometimes","song","songs","sony","soon","sorry","sort","sorted","sound","source","sources","south","southern","space","spain","spanish","special","species","specific","specified","speed","spirit","sponsored","sport","sports","spring","sql","square","src","sre","ssd","ssh","ssl","staff","stage","stand","standard","standards","star","stars","start","started","starting","state","statement","statements","states","station","statistics","status","stay","steel","step","steps","steve","still","stock","stone","stop","storage","store","stores","stories","story","str","strategies","strategy","stream","street","string","strong","structure","stub","student","students","studies","studio","study","stuff","style","subject","subjects","submit","submitted","subs","subscribe","success","successful","such","suggest","suite","sum","summary","summer","sunday","super","supplies","supply","support","supported","sure","surface","surgery","survey","switch","system","systems","tab","table","tables","tag","tags","take","taken","takes","taking","talk","talking","target","task","tcp","teacher","teachers","teaching","team","tech","technical","techniques","technologies","technology","teen","teens","telephone","television","tell","temp","temperature","term","terms","test","testing","tests","texas","text","than","thank","thanks","that","their","them","theme","themselves","then","theory","therapy","there","therefore","these","they","thing","things","think","thinking","third","this","thomas","those","though","thought","thoughts","thousands","thread","three","through","throughout","thursday","thus","tickets","tid","time","times","tip","tips","title","titles","tls","tmp","today","todo","together","told","took","tool","tools","topic","topics","total","touch","tour","tours","towards","town","toys","track","trade","trademarks","trading","traditional","traffic","training","transfer","transport","transportation","travel","treatment","tree","trial","trip","true","trust","truth","trying","tuesday","turn","type","types","udp","uid","under","understand","understanding","union","unique","unit","united","units","universal","university","unknown","unless","until","update","updated","updates","upgrade","upon","upper","urban","url","used","useful","user","username","users","uses","using","usr","usually","vacation","val","valid","valley","value","values","variable","variety","various","vegas","vehicle","vehicles","ver","version","very","video","videos","view","viewed","views","village","virginia","virtual","virus","vision","visit","visitors","visual","voice","volume","vote","vpn","wait","walk","wall","wan","want","wanted","warning","washington","waste","watch","watches","water","ways","weather","website","websites","wedding","wednesday","week","weekend","weekly","weeks","weight","welcome","well","went","were","west","western","what","when","where","whether","which","while","white","whole","wholesale","whose","wide","wife","wild","will","william","williams","wind","window","windows","wine","winter","wireless","wish","with","within","without","woman","women","wood","word","words","work","worked","workers","working","works","workshop","world","worldwide","worth","would","write","writing","written","wrong","wrote","xbox","xml","yahoo","yaml","year","years","yellow","yesterday","york","young","your","yourself","youth","zealand","zone"]`)
|
|
484
|
+
);
|
|
485
|
+
|
|
486
|
+
/**
|
|
487
|
+
* Dictionary word ratio: fraction of token characters covered by dictionary words.
|
|
488
|
+
*
|
|
489
|
+
* Extracts lowercase letter sequences from the token, then greedily matches
|
|
490
|
+
* the longest dictionary word at each position. Returns matched character
|
|
491
|
+
* count / token length.
|
|
492
|
+
*
|
|
493
|
+
* "devstral-small-2" → finds "dev", "str", "small" → covers 11/16 chars
|
|
494
|
+
* "aB3xK9mPqR7wN" → no words found → dictRatio = 0
|
|
495
|
+
*/
|
|
496
|
+
function computeDictRatio(token: string): number {
|
|
497
|
+
// Extract lowercase letter sequences (>= 3 chars)
|
|
498
|
+
const lowerSeqs: string[] = [];
|
|
499
|
+
let current = "";
|
|
500
|
+
for (const c of token) {
|
|
501
|
+
if (/[a-z]/.test(c)) {
|
|
502
|
+
current += c;
|
|
503
|
+
} else {
|
|
504
|
+
if (current.length >= 3) lowerSeqs.push(current);
|
|
505
|
+
current = "";
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
if (current.length >= 3) lowerSeqs.push(current);
|
|
509
|
+
|
|
510
|
+
if (lowerSeqs.length === 0) return 0;
|
|
511
|
+
|
|
512
|
+
// Greedy match: find longest word at each position, then skip past it
|
|
513
|
+
let matchedChars = 0;
|
|
514
|
+
for (const seq of lowerSeqs) {
|
|
515
|
+
let pos = 0;
|
|
516
|
+
while (pos < seq.length) {
|
|
517
|
+
let longestMatch = 0;
|
|
518
|
+
for (let end = seq.length; end > pos; end--) {
|
|
519
|
+
if (DICT_WORDS.has(seq.slice(pos, end))) {
|
|
520
|
+
longestMatch = end - pos;
|
|
521
|
+
break;
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
if (longestMatch > 0) {
|
|
525
|
+
matchedChars += longestMatch;
|
|
526
|
+
pos += longestMatch;
|
|
527
|
+
} else {
|
|
528
|
+
pos++;
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
return token.length > 0 ? matchedChars / token.length : 0;
|
|
230
534
|
}
|
|
231
535
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
536
|
+
// ── Entropy Constants ────────────────────────────────────────────────────────
|
|
537
|
+
|
|
538
|
+
const ENTROPY_THRESHOLD = 5.5;
|
|
539
|
+
const MIN_ENTROPY_TOKEN_LENGTH = 16;
|
|
540
|
+
const W1_DENSITY = 3.0; // trigram density weight
|
|
541
|
+
const W2_WORD = 3.0; // vowel-word penalty weight
|
|
542
|
+
const W3_DICT = 4.0; // dictionary word penalty weight
|
|
543
|
+
const HEX_PENALTY = 2.5; // penalty for >90% hex chars
|
|
544
|
+
const HEX_RATIO_THRESHOLD = 0.9;
|
|
545
|
+
|
|
546
|
+
/**
|
|
547
|
+
* Adjusted entropy v3+Dict:
|
|
548
|
+
* adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
|
|
549
|
+
*/
|
|
550
|
+
function calculateAdjustedEntropy(data: string): number {
|
|
551
|
+
const base = shannonEntropy(data);
|
|
552
|
+
const density = maxSegmentDensity(data);
|
|
553
|
+
const wordRatio = computeWordRatio(data);
|
|
554
|
+
const dictRatio = computeDictRatio(data);
|
|
555
|
+
const hexRatio = computeHexRatio(data);
|
|
556
|
+
|
|
557
|
+
const densityBoost = density * W1_DENSITY;
|
|
558
|
+
const wordPenalty = wordRatio * W2_WORD;
|
|
559
|
+
const dictPenalty = dictRatio * W3_DICT;
|
|
560
|
+
// Hex penalty: only for hyphenated UUID-like tokens (>90% hex AND contains -)
|
|
561
|
+
// Pure hex strings without hyphens might be real secrets (not UUIDs/SHAs)
|
|
562
|
+
const hp = (hexRatio > HEX_RATIO_THRESHOLD && data.includes("-")) ? HEX_PENALTY : 0;
|
|
563
|
+
return base + densityBoost - wordPenalty - dictPenalty - hp;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
function isHighEntropy(data: string): boolean {
|
|
567
|
+
if (data.length < MIN_ENTROPY_TOKEN_LENGTH) return false;
|
|
568
|
+
if (isSafeContent(data)) return false;
|
|
569
|
+
return calculateAdjustedEntropy(data) > ENTROPY_THRESHOLD;
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
/**
|
|
573
|
+
* Split by whitespace only — the most conservative tokenization.
|
|
574
|
+
* This preserves JSON structure, URLs, and connection strings.
|
|
575
|
+
*/
|
|
576
|
+
function findHighEntropyTokens(content: string): string[] {
|
|
577
|
+
const tokens = content.split(/[\s\[\]{}"',\/\\|()&#@!<>?]+/);
|
|
578
|
+
return tokens.filter(t => t.length >= MIN_ENTROPY_TOKEN_LENGTH && isHighEntropy(t));
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// ── Known Secret Patterns ────────────────────────────────────────────────────
|
|
582
|
+
|
|
583
|
+
interface SecretPattern {
|
|
584
|
+
name: string;
|
|
585
|
+
pattern: RegExp;
|
|
586
|
+
minLength: number;
|
|
587
|
+
allowsSpaces: boolean;
|
|
588
|
+
/** If true, skip safe-pattern exclusion (unambiguous prefix) */
|
|
589
|
+
highConfidence: boolean;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
const SECRET_PATTERNS: SecretPattern[] = [
|
|
593
|
+
// AWS
|
|
594
|
+
{ name: "AWS Access Key ID", pattern: /AKIA[0-9A-Z]{16}/, minLength: 16, allowsSpaces: false, highConfidence: true },
|
|
595
|
+
{ name: "AWS Secret Access Key", pattern: /(?:aws)?_?(?:secret)?_?(?:access)?_?key['"\s:=]+['"]?[0-9a-zA-Z/+]{40}['"]?/i, minLength: 30, allowsSpaces: false, highConfidence: true },
|
|
596
|
+
// GitHub
|
|
597
|
+
{ name: "GitHub OAuth Token", pattern: /gho_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
598
|
+
{ name: "GitHub App Token", pattern: /(?:ghu|ghs)_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
599
|
+
{ name: "GitHub PAT", pattern: /ghp_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
600
|
+
{ name: "GitHub Fine-Grained Token", pattern: /github_pat_[0-9a-zA-Z_]{22,}/, minLength: 26, allowsSpaces: false, highConfidence: true },
|
|
601
|
+
// GitLab
|
|
602
|
+
{ name: "GitLab PAT", pattern: /glpat-[0-9a-zA-Z\-_]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
603
|
+
{ name: "GitLab Runner Token", pattern: /glrt-[0-9a-zA-Z_\-]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
604
|
+
// Slack
|
|
605
|
+
{ name: "Slack Token", pattern: /xox[baprs]-[0-9a-zA-Z\-]{10,48}/, minLength: 15, allowsSpaces: false, highConfidence: true },
|
|
606
|
+
{ name: "Slack Webhook URL", pattern: /https:\/\/hooks\.slack\.com\/services\/T[a-zA-Z0-9_]{8,}\/B[a-zA-Z0-9_]{8,}\/[a-zA-Z0-9_]{24}/, minLength: 60, allowsSpaces: false, highConfidence: true },
|
|
607
|
+
// JWT
|
|
608
|
+
{ name: "JSON Web Token", pattern: /eyJ[a-zA-Z0-9_-]{10,}\.eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
609
|
+
// Google
|
|
610
|
+
{ name: "Google API Key", pattern: /AIza[0-9A-Za-z\-_]{35}/, minLength: 35, allowsSpaces: false, highConfidence: true },
|
|
611
|
+
{ name: "Google OAuth Token", pattern: /ya29\.[0-9A-Za-z\-_]+/, minLength: 10, allowsSpaces: false, highConfidence: true },
|
|
612
|
+
// Stripe
|
|
613
|
+
{ name: "Stripe Secret Key", pattern: /sk_live_[0-9a-zA-Z]{24,}/, minLength: 24, allowsSpaces: false, highConfidence: true },
|
|
614
|
+
{ name: "Stripe Restricted Key", pattern: /rk_live_[0-9a-zA-Z]{24,}/, minLength: 24, allowsSpaces: false, highConfidence: true },
|
|
615
|
+
// Twilio / SendGrid / Discord
|
|
616
|
+
{ name: "Twilio API Key", pattern: /SK[a-z0-9]{32}/, minLength: 30, allowsSpaces: false, highConfidence: true },
|
|
617
|
+
{ name: "SendGrid API Key", pattern: /SG\.[a-zA-Z0-9_-]{22,}\.[a-zA-Z0-9_-]{40,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
618
|
+
{ name: "Discord Bot Token", pattern: /[MN][A-Za-z\d]{23,}\.[\w-]{6}\.[\w-]{27,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
619
|
+
// OpenAI / Anthropic / Volcengine Ark
|
|
620
|
+
{ name: "OpenAI API Key", pattern: /sk-[a-zA-Z0-9]{20,}T3BlbkFJ[a-zA-Z0-9]{20,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
621
|
+
{ name: "OpenAI API Key (New)", pattern: /sk-(?:proj-)?[a-zA-Z0-9\-_]{40,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
622
|
+
{ name: "Anthropic API Key", pattern: /sk-ant-api[0-9]{2}-[a-zA-Z0-9\-_]{80,}/, minLength: 80, allowsSpaces: false, highConfidence: true },
|
|
623
|
+
{ name: "Volcengine Ark API Key", pattern: /ark-[a-zA-Z0-9\-_]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
624
|
+
// NPM / PyPI
|
|
625
|
+
{ name: "NPM Token", pattern: /npm_[a-zA-Z0-9]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
626
|
+
{ name: "PyPI Token", pattern: /pypi-[a-zA-Z0-9_\-]{50,}/, minLength: 50, allowsSpaces: false, highConfidence: true },
|
|
627
|
+
// Private Keys
|
|
628
|
+
{ name: "RSA Private Key", pattern: /-----BEGIN RSA PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
|
|
629
|
+
{ name: "OpenSSH Private Key", pattern: /-----BEGIN OPENSSH PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
|
|
630
|
+
{ name: "EC Private Key", pattern: /-----BEGIN EC PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
|
|
631
|
+
{ name: "PGP Private Key", pattern: /-----BEGIN PGP PRIVATE KEY BLOCK-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
|
|
632
|
+
{ name: "Generic Private Key", pattern: /-----BEGIN (?:ENCRYPTED )?PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
|
|
633
|
+
// Database URIs
|
|
634
|
+
{ name: "MongoDB Connection String", pattern: /mongodb(?:\+srv)?:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
635
|
+
{ name: "PostgreSQL Connection String", pattern: /postgres(?:ql)?:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
636
|
+
{ name: "MySQL Connection String", pattern: /mysql:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
637
|
+
{ name: "Redis Connection String", pattern: /redis:\/\/[^\s'"]*:[^\s'"]+@[^\s'"]+/, minLength: 15, allowsSpaces: false, highConfidence: true },
|
|
638
|
+
// URL-embedded passwords
|
|
639
|
+
{ name: "Password in URL", pattern: /[a-zA-Z]{3,10}:\/\/[^/\s:@]{3,20}:[^/\s:@]{3,20}@[^\s'"]+/, minLength: 15, allowsSpaces: false, highConfidence: true },
|
|
640
|
+
// Generic assignments (lower confidence — checked against SAFE_PATTERNS)
|
|
641
|
+
{ name: "Bearer Token", pattern: /[Bb]earer\s+[a-zA-Z0-9\-._~+/]+=*/, minLength: 15, allowsSpaces: false, highConfidence: false },
|
|
642
|
+
{ name: "Basic Auth Header", pattern: /[Bb]asic\s+[a-zA-Z0-9+/]{20,}={0,2}/, minLength: 20, allowsSpaces: false, highConfidence: false },
|
|
643
|
+
{ name: "API Key Assignment", pattern: /(?:api[_-]?key|apikey|api[_-]?secret)['"\s:=]+['"]?[a-zA-Z0-9\-._]{20,}['"]?/i, minLength: 20, allowsSpaces: false, highConfidence: false },
|
|
644
|
+
{ name: "Secret Assignment", pattern: /(?:secret|token|password|passwd|pwd)['"\s:=]+['"]?[a-zA-Z0-9\-._!@#$%^&*]{8,}['"]?/i, minLength: 12, allowsSpaces: false, highConfidence: false },
|
|
645
|
+
];
|
|
646
|
+
|
|
647
|
+
// ── Safe Patterns (exclude from detection to reduce false positives) ─────────
|
|
648
|
+
|
|
649
|
+
const SAFE_PATTERNS: RegExp[] = [
|
|
650
|
+
/^https?:\/\/[a-zA-Z0-9.-]+(?:\/[a-zA-Z0-9.\/_\-?&=#%]*)?$/, // URLs without credentials
|
|
651
|
+
/^\.\.?\/[a-zA-Z0-9_\-./]+$/, // Relative file paths
|
|
652
|
+
/^\/[a-zA-Z0-9_\-./]+$/, // Absolute Unix paths
|
|
653
|
+
/^[a-zA-Z]:\\[a-zA-Z0-9_\-\\./]+$/, // Windows paths
|
|
654
|
+
/^[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}$/, // Email addresses
|
|
655
|
+
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/, // UUIDs
|
|
656
|
+
/^v?\d+\.\d+\.\d+(?:-[a-zA-Z0-9.]+)?(?:\+[a-zA-Z0-9.]+)?$/, // Semver
|
|
657
|
+
/^(?:xxx+|your[_-]?(?:api[_-]?)?key|placeholder|example|test|demo|sample)/i, // Placeholders
|
|
658
|
+
/^[0-9a-f]{40}$/i, // Git SHA-1
|
|
659
|
+
/^[0-9a-f]{64}$/i, // SHA-256
|
|
660
|
+
/^@[a-z0-9-]+\/[a-z0-9-]+$/, // npm scoped packages
|
|
661
|
+
];
|
|
662
|
+
|
|
663
|
+
function isSafeContent(content: string): boolean {
|
|
664
|
+
for (const pat of SAFE_PATTERNS) {
|
|
665
|
+
if (pat.test(content)) return true;
|
|
246
666
|
}
|
|
247
|
-
return
|
|
667
|
+
return false;
|
|
248
668
|
}
|
|
249
669
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
670
|
+
// ── Detector ─────────────────────────────────────────────────────────────────
|
|
671
|
+
|
|
672
|
+
interface SecretMatch {
|
|
673
|
+
name: string;
|
|
674
|
+
start: number;
|
|
675
|
+
end: number;
|
|
676
|
+
original: string;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
const MIN_SCAN_LENGTH = 10;
|
|
680
|
+
|
|
681
|
+
function detectSecrets(content: string): SecretMatch[] {
|
|
682
|
+
if (content.length < MIN_SCAN_LENGTH) return [];
|
|
683
|
+
const matches: SecretMatch[] = [];
|
|
684
|
+
const seen = new Set<string>(); // deduplicate by position
|
|
685
|
+
|
|
686
|
+
// Pass 1: High-confidence pattern matching (specific prefixes like ghp_, AKIA)
|
|
687
|
+
for (const sp of SECRET_PATTERNS) {
|
|
688
|
+
if (!sp.highConfidence) continue;
|
|
689
|
+
if (content.length < sp.minLength) continue;
|
|
690
|
+
for (const m of content.matchAll(new RegExp(sp.pattern.source, sp.pattern.flags + "g"))) {
|
|
691
|
+
const text = m[0];
|
|
692
|
+
if (!text) continue;
|
|
693
|
+
if (!sp.allowsSpaces && text.includes(" ")) continue;
|
|
694
|
+
const key = `${m.index}-${m.index + text.length}`;
|
|
695
|
+
if (seen.has(key)) continue;
|
|
696
|
+
seen.add(key);
|
|
697
|
+
matches.push({ name: sp.name, start: m.index!, end: m.index! + text.length, original: text });
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
// Pass 2: Low-confidence pattern matching (generic assignments like secret=xxx)
|
|
702
|
+
// Skip ranges already covered by high-confidence matches
|
|
703
|
+
for (const sp of SECRET_PATTERNS) {
|
|
704
|
+
if (sp.highConfidence) continue;
|
|
705
|
+
if (content.length < sp.minLength) continue;
|
|
706
|
+
for (const m of content.matchAll(new RegExp(sp.pattern.source, sp.pattern.flags + "g"))) {
|
|
707
|
+
const text = m[0];
|
|
708
|
+
if (!text) continue;
|
|
709
|
+
if (!sp.allowsSpaces && text.includes(" ")) continue;
|
|
710
|
+
// Check against safe patterns to reduce false positives
|
|
711
|
+
if (isSafeContent(text)) continue;
|
|
712
|
+
// Also check surrounding context (e.g. "your_api_key=xxx" is a placeholder)
|
|
713
|
+
const contextStart = Math.max(0, m.index! - 10);
|
|
714
|
+
const context = content.slice(contextStart, m.index! + text.length);
|
|
715
|
+
if (isSafeContent(context)) continue;
|
|
716
|
+
// Skip if range already covered by a high-confidence match
|
|
717
|
+
const start = m.index!, end = m.index! + text.length;
|
|
718
|
+
if (matches.some(hc => hc.start <= start && hc.end >= end)) continue;
|
|
719
|
+
const key = `${start}-${end}`;
|
|
720
|
+
if (seen.has(key)) continue;
|
|
721
|
+
seen.add(key);
|
|
722
|
+
matches.push({ name: sp.name, start, end, original: text });
|
|
260
723
|
}
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
// Pass 3: Entropy analysis (catches unknown formats like third-party sk- keys)
|
|
727
|
+
const highEntropyTokens = findHighEntropyTokens(content);
|
|
728
|
+
for (const token of highEntropyTokens) {
|
|
729
|
+
if (isSafeContent(token)) continue;
|
|
730
|
+
const idx = content.indexOf(token);
|
|
731
|
+
if (idx === -1) continue;
|
|
732
|
+
// Skip if already covered by a pattern match
|
|
733
|
+
if (matches.some(m => m.start <= idx && m.end >= idx + token.length)) continue;
|
|
734
|
+
const key = `${idx}-${idx + token.length}`;
|
|
735
|
+
if (seen.has(key)) continue;
|
|
736
|
+
seen.add(key);
|
|
737
|
+
matches.push({ name: "High Entropy String", start: idx, end: idx + token.length, original: token });
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
// Sort by start position descending for safe right-to-left replacement
|
|
741
|
+
return matches.sort((a, b) => b.start - a.start);
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
function maskSecret(text: string): string {
|
|
745
|
+
if (text.length <= 8) return "********";
|
|
746
|
+
return text.slice(0, 4) + "********" + text.slice(-4);
|
|
265
747
|
}
|
|
266
748
|
|
|
267
749
|
// ─── Setup ──────────────────────────────────────────────────────────────────
|
|
@@ -270,42 +752,43 @@ export function setupSafety(pi: ExtensionAPI) {
|
|
|
270
752
|
// ── Command Guard + Protected Paths + Write Guard (tool_call) ─────────
|
|
271
753
|
|
|
272
754
|
pi.on("tool_call", async (event, ctx) => {
|
|
273
|
-
|
|
274
|
-
// Gate 1: 危险命令
|
|
755
|
+
|
|
756
|
+
// Gate 1: 危险命令 + 覆盖写入 + 读取保护路径
|
|
275
757
|
if (event.toolName === "bash") {
|
|
276
758
|
const command = (event.input as { command?: string }).command;
|
|
277
759
|
if (command) {
|
|
278
|
-
const
|
|
279
|
-
if (
|
|
760
|
+
const dangers = collectBashDangers(command, ctx.cwd);
|
|
761
|
+
if (dangers.length > 0) {
|
|
762
|
+
const message = formatBashDangers(dangers)!;
|
|
280
763
|
if (!ctx.hasUI) {
|
|
281
|
-
return { block: true, reason:
|
|
764
|
+
return { block: true, reason: `\u26D4 ${message} (non-interactive)` };
|
|
282
765
|
}
|
|
283
766
|
const choice = await ctx.ui.select(
|
|
284
|
-
|
|
767
|
+
`\u26A0\uFE0F ${message}\n\nAllow execution?`,
|
|
285
768
|
["Block", "Allow once"],
|
|
286
769
|
);
|
|
287
770
|
if (!choice || choice === "Block") {
|
|
288
|
-
return { block: true, reason:
|
|
771
|
+
return { block: true, reason: `\u26D4 ${message}` };
|
|
289
772
|
}
|
|
290
773
|
}
|
|
291
774
|
}
|
|
292
775
|
}
|
|
293
776
|
|
|
294
|
-
// Gate 2:
|
|
777
|
+
// Gate 2: write/edit 写入保护路径
|
|
295
778
|
if (event.toolName === "write" || event.toolName === "edit") {
|
|
296
779
|
const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
|
|
297
780
|
if (filePath) {
|
|
298
781
|
const danger = checkProtectedPath(filePath);
|
|
299
782
|
if (danger) {
|
|
300
783
|
if (!ctx.hasUI) {
|
|
301
|
-
return { block: true, reason:
|
|
784
|
+
return { block: true, reason: `\uD83D\uDD10 ${danger}\nmay contain sensitive information` };
|
|
302
785
|
}
|
|
303
786
|
const choice = await ctx.ui.select(
|
|
304
|
-
|
|
787
|
+
`\uD83D\uDD10 ${danger}\nmay contain sensitive information\n\nProceed?`,
|
|
305
788
|
["Block", "Allow once"],
|
|
306
789
|
);
|
|
307
790
|
if (!choice || choice === "Block") {
|
|
308
|
-
return { block: true, reason:
|
|
791
|
+
return { block: true, reason: `\uD83D\uDD10 ${danger}\nmay contain sensitive information` };
|
|
309
792
|
}
|
|
310
793
|
}
|
|
311
794
|
}
|
|
@@ -323,6 +806,26 @@ export function setupSafety(pi: ExtensionAPI) {
|
|
|
323
806
|
} catch { /* file doesn't exist */ }
|
|
324
807
|
}
|
|
325
808
|
}
|
|
809
|
+
|
|
810
|
+
// Gate 4: read 工具读取保护路径(bash 读取已在 Gate 1 处理)
|
|
811
|
+
if (event.toolName === "read") {
|
|
812
|
+
const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
|
|
813
|
+
if (filePath) {
|
|
814
|
+
const danger = checkProtectedPath(filePath);
|
|
815
|
+
if (danger) {
|
|
816
|
+
if (!ctx.hasUI) {
|
|
817
|
+
return { block: true, reason: `\uD83D\uDD10 Reading protected file: ${danger}\nmay contain sensitive information` };
|
|
818
|
+
}
|
|
819
|
+
const choice = await ctx.ui.select(
|
|
820
|
+
`\uD83D\uDD10 Reading protected file: ${danger}\nmay contain sensitive information\n\nProceed?`,
|
|
821
|
+
["Block", "Allow once"],
|
|
822
|
+
);
|
|
823
|
+
if (!choice || choice === "Block") {
|
|
824
|
+
return { block: true, reason: `\uD83D\uDD10 Reading protected file: ${danger}\nmay contain sensitive information` };
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
}
|
|
326
829
|
});
|
|
327
830
|
|
|
328
831
|
// ── Secret Redact (tool_result) ────────────────────────────────────────
|
|
@@ -333,6 +836,10 @@ export function setupSafety(pi: ExtensionAPI) {
|
|
|
333
836
|
): Promise<{ content?: NonNullable<ToolResultEvent["content"]> } | void> => {
|
|
334
837
|
if (!event.content || !Array.isArray(event.content)) return;
|
|
335
838
|
|
|
839
|
+
// Only scan read tool output — other tools (bash, write, edit) are either
|
|
840
|
+
// covered by path guards or produce git/diff noise that causes false positives.
|
|
841
|
+
if (event.toolName !== "read") return;
|
|
842
|
+
|
|
336
843
|
const textParts: Array<{ index: number; text: string; item: ToolTextContent }> = [];
|
|
337
844
|
for (let i = 0; i < event.content.length; i++) {
|
|
338
845
|
const item = event.content[i];
|
|
@@ -342,18 +849,16 @@ export function setupSafety(pi: ExtensionAPI) {
|
|
|
342
849
|
}
|
|
343
850
|
if (textParts.length === 0) return;
|
|
344
851
|
|
|
345
|
-
const eng = await ensureEngine();
|
|
346
852
|
let totalCount = 0;
|
|
347
853
|
const newContent = [...event.content];
|
|
348
854
|
|
|
349
855
|
for (const { index, text, item } of textParts) {
|
|
350
|
-
const
|
|
351
|
-
|
|
352
|
-
if (ranges.length === 0) continue;
|
|
856
|
+
const matches = detectSecrets(text);
|
|
857
|
+
if (matches.length === 0) continue;
|
|
353
858
|
|
|
354
|
-
totalCount +=
|
|
859
|
+
totalCount += matches.length;
|
|
355
860
|
let redacted = text;
|
|
356
|
-
for (const { start, end } of
|
|
861
|
+
for (const { start, end } of matches) {
|
|
357
862
|
const original = redacted.slice(start, end);
|
|
358
863
|
redacted = redacted.slice(0, start) + maskSecret(original) + redacted.slice(end);
|
|
359
864
|
}
|
|
@@ -363,7 +868,7 @@ export function setupSafety(pi: ExtensionAPI) {
|
|
|
363
868
|
|
|
364
869
|
if (totalCount === 0) return;
|
|
365
870
|
const label = totalCount === 1 ? "1 secret" : `${totalCount} secrets`;
|
|
366
|
-
ctx.ui.notify(
|
|
871
|
+
ctx.ui.notify(`\uD83D\uDD10 Redacted ${label} in ${event.toolName} output`, "warning");
|
|
367
872
|
return { content: newContent };
|
|
368
873
|
};
|
|
369
874
|
|