decorated-pi 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -34
- package/extensions/file-times.ts +60 -2
- package/extensions/guidance.ts +5 -3
- package/extensions/index.ts +2 -0
- package/extensions/io.ts +210 -29
- package/extensions/lsp/client.ts +181 -428
- package/extensions/lsp/env.ts +45 -12
- package/extensions/lsp/format.ts +102 -237
- package/extensions/lsp/index.ts +8 -11
- package/extensions/lsp/manager.ts +249 -0
- package/extensions/lsp/prompt.ts +3 -42
- package/extensions/lsp/protocol.ts +219 -0
- package/extensions/lsp/servers.ts +80 -160
- package/extensions/lsp/tools.ts +160 -553
- package/extensions/lsp/types.ts +42 -0
- package/extensions/mcp/builtin.ts +126 -0
- package/extensions/mcp/client.ts +106 -0
- package/extensions/mcp/index.ts +123 -0
- package/extensions/patch.ts +291 -73
- package/extensions/providers/ark-coding.ts +2 -0
- package/extensions/safety/detect.ts +20 -744
- package/extensions/safety/entropy.ts +226 -0
- package/extensions/safety/index.ts +1 -93
- package/extensions/safety/patterns.ts +155 -0
- package/extensions/safety/types.ts +50 -0
- package/extensions/settings.ts +8 -0
- package/extensions/slash.ts +161 -7
- package/extensions/smart-at.ts +5 -5
- package/extensions/subdir-agents.ts +43 -13
- package/package.json +2 -3
- package/tsconfig.json +16 -0
- package/extensions/lsp/server-manager.ts +0 -309
- package/extensions/lsp/trust.ts +0 -45
|
@@ -1,750 +1,23 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Safety Detection —
|
|
2
|
+
* Safety Detection — main detection pipeline
|
|
3
3
|
*
|
|
4
|
-
* -
|
|
5
|
-
* -
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
import * as fs from "node:fs";
|
|
11
|
-
import { basename, extname, resolve } from "node:path";
|
|
12
|
-
|
|
13
|
-
const DANGEROUS_COMMANDS: [string, string[]][] = [
|
|
14
|
-
["rm", []],
|
|
15
|
-
["sudo", []],
|
|
16
|
-
["npm", ["publish"]],
|
|
17
|
-
["svn", ["commit", "revert"]],
|
|
18
|
-
["git", ["reset", "restore", "clean", "push", "revert"]],
|
|
19
|
-
];
|
|
20
|
-
|
|
21
|
-
const SAFE_REDIRECT_TARGETS = new Set([
|
|
22
|
-
"/dev/null",
|
|
23
|
-
"/dev/stdout",
|
|
24
|
-
"/dev/stderr",
|
|
25
|
-
]);
|
|
26
|
-
|
|
27
|
-
const SHELL_SEGMENT_BREAKS = new Set(["|", "&&", "||", ";"]);
|
|
28
|
-
const SHELL_REDIRECT_OVERWRITE = new Set([">", "1>", "2>", "&>"]);
|
|
29
|
-
|
|
30
|
-
// ─── 保护路径 ────────────────────────────────────────────────────────────────
|
|
31
|
-
|
|
32
|
-
const PROTECTED_PATH_SEGMENTS = [
|
|
33
|
-
".env", ".git/", ".ssh/",
|
|
34
|
-
".gnupg/", ".aws/", "secrets/", ".docker/",
|
|
35
|
-
];
|
|
36
|
-
const PROTECTED_EXTENSIONS = [".pem", ".key", ".p12", ".pfx", ".keystore"];
|
|
37
|
-
const PROTECTED_FILENAMES = [
|
|
38
|
-
"id_rsa", "id_ed25519", "id_ecdsa",
|
|
39
|
-
"authorized_keys", "known_hosts",
|
|
40
|
-
".env.local", ".env.production",
|
|
41
|
-
];
|
|
42
|
-
|
|
43
|
-
/** Commands that read file contents (should confirm before reading protected paths) */
|
|
44
|
-
const READ_COMMANDS = new Set([
|
|
45
|
-
"cat", "head", "tail", "less", "more", "bat", "batcat",
|
|
46
|
-
"tac", "nl", "od", "xxd", "hexdump", "base64",
|
|
47
|
-
"file", "strings", "grep", "rg", "ag", "ack",
|
|
48
|
-
]);
|
|
49
|
-
|
|
50
|
-
export function checkProtectedPath(filePath: string): string | null {
|
|
51
|
-
const normalized = filePath.replace(/\\/g, "/");
|
|
52
|
-
const filename = normalized.split("/").pop() ?? "";
|
|
53
|
-
for (const seg of PROTECTED_PATH_SEGMENTS) {
|
|
54
|
-
if (normalized.includes(seg)) return `path contains "${seg}"`;
|
|
55
|
-
}
|
|
56
|
-
for (const ext of PROTECTED_EXTENSIONS) {
|
|
57
|
-
if (normalized.endsWith(ext)) return `file extension "${ext}"`;
|
|
58
|
-
}
|
|
59
|
-
for (const name of PROTECTED_FILENAMES) {
|
|
60
|
-
if (filename === name) return `protected file "${name}"`;
|
|
61
|
-
}
|
|
62
|
-
return null;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
// ─── Shell tokenizer ────────────────────────────────────────────────────────
|
|
66
|
-
|
|
67
|
-
export function tokenizeShell(command: string): string[] {
|
|
68
|
-
const tokens: string[] = [];
|
|
69
|
-
let current = "";
|
|
70
|
-
let quote: "'" | '"' | null = null;
|
|
71
|
-
|
|
72
|
-
const pushCurrent = () => {
|
|
73
|
-
if (current.length > 0) {
|
|
74
|
-
tokens.push(current);
|
|
75
|
-
current = "";
|
|
76
|
-
}
|
|
77
|
-
};
|
|
78
|
-
|
|
79
|
-
for (let i = 0; i < command.length; i++) {
|
|
80
|
-
const ch = command[i]!;
|
|
81
|
-
|
|
82
|
-
if (quote) {
|
|
83
|
-
if (ch === quote) {
|
|
84
|
-
quote = null;
|
|
85
|
-
} else if (ch === "\\" && quote === '"' && i + 1 < command.length) {
|
|
86
|
-
current += command[i + 1]!;
|
|
87
|
-
i += 1;
|
|
88
|
-
} else {
|
|
89
|
-
current += ch;
|
|
90
|
-
}
|
|
91
|
-
continue;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
if (ch === "'" || ch === '"') {
|
|
95
|
-
quote = ch;
|
|
96
|
-
continue;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
if (/\s/.test(ch)) {
|
|
100
|
-
pushCurrent();
|
|
101
|
-
continue;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
if (ch === ";") {
|
|
105
|
-
pushCurrent();
|
|
106
|
-
tokens.push(";");
|
|
107
|
-
continue;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
if (ch === "|" || ch === "&") {
|
|
111
|
-
if (i + 1 < command.length && command[i + 1] === ch) {
|
|
112
|
-
pushCurrent();
|
|
113
|
-
tokens.push(ch + ch);
|
|
114
|
-
i += 1;
|
|
115
|
-
continue;
|
|
116
|
-
}
|
|
117
|
-
if (ch === "|") {
|
|
118
|
-
pushCurrent();
|
|
119
|
-
tokens.push("|");
|
|
120
|
-
continue;
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
if (ch === ">") {
|
|
125
|
-
let op = ">";
|
|
126
|
-
if (i + 1 < command.length && command[i + 1] === ">") {
|
|
127
|
-
op = ">>";
|
|
128
|
-
i += 1;
|
|
129
|
-
}
|
|
130
|
-
if (current === "&" || /^\d+$/.test(current)) {
|
|
131
|
-
op = current + op;
|
|
132
|
-
current = "";
|
|
133
|
-
} else {
|
|
134
|
-
pushCurrent();
|
|
135
|
-
}
|
|
136
|
-
tokens.push(op);
|
|
137
|
-
continue;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
current += ch;
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
pushCurrent();
|
|
144
|
-
return tokens;
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
function isExistingRegularFile(target: string, cwd: string): boolean {
|
|
148
|
-
if (!target || SAFE_REDIRECT_TARGETS.has(target)) return false;
|
|
149
|
-
try {
|
|
150
|
-
return fs.statSync(resolve(cwd, target)).isFile();
|
|
151
|
-
} catch {
|
|
152
|
-
return false;
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
// ─── Bash danger analysis ───────────────────────────────────────────────────
|
|
157
|
-
|
|
158
|
-
export interface BashDanger {
|
|
159
|
-
reason: string;
|
|
160
|
-
/** Whether the danger involves a protected (sensitive) path */
|
|
161
|
-
protectedPath?: string;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
export function collectBashDangers(command: string, cwd: string): BashDanger[] {
|
|
165
|
-
const tokens = tokenizeShell(command);
|
|
166
|
-
const dangers: BashDanger[] = [];
|
|
167
|
-
const seen = new Set<string>();
|
|
168
|
-
|
|
169
|
-
const addDanger = (reason: string, protectedPath?: string) => {
|
|
170
|
-
if (seen.has(reason)) return;
|
|
171
|
-
seen.add(reason);
|
|
172
|
-
dangers.push({ reason, protectedPath });
|
|
173
|
-
};
|
|
174
|
-
|
|
175
|
-
for (let i = 0; i < tokens.length; i++) {
|
|
176
|
-
const token = tokens[i]!;
|
|
177
|
-
if (SHELL_SEGMENT_BREAKS.has(token)) continue;
|
|
178
|
-
|
|
179
|
-
// ── Dangerous commands ──
|
|
180
|
-
for (const [cmd, subs] of DANGEROUS_COMMANDS) {
|
|
181
|
-
const name = token.split("/").pop() ?? token;
|
|
182
|
-
if (name !== cmd && name !== `${cmd}.exe`) continue;
|
|
183
|
-
if (subs.length === 0) {
|
|
184
|
-
addDanger(`"${cmd}" is a dangerous command`);
|
|
185
|
-
break;
|
|
186
|
-
}
|
|
187
|
-
const next = tokens[i + 1];
|
|
188
|
-
if (next && subs.includes(next)) {
|
|
189
|
-
addDanger(`"${cmd} ${next}" is a dangerous command`);
|
|
190
|
-
break;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
// ── Overwrite redirect (>) ──
|
|
195
|
-
if (SHELL_REDIRECT_OVERWRITE.has(token)) {
|
|
196
|
-
const target = tokens[i + 1];
|
|
197
|
-
if (target && isExistingRegularFile(target, cwd)) {
|
|
198
|
-
const prot = checkProtectedPath(target);
|
|
199
|
-
if (prot) {
|
|
200
|
-
addDanger(
|
|
201
|
-
`shell redirection would overwrite existing file "${target}"\n Sensitive: ${prot}, may contain sensitive information`,
|
|
202
|
-
prot,
|
|
203
|
-
);
|
|
204
|
-
} else {
|
|
205
|
-
addDanger(`shell redirection would overwrite existing file "${target}"`);
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
continue;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
// ── Read commands on protected paths ──
|
|
212
|
-
const cmdName = token.split("/").pop() ?? token;
|
|
213
|
-
if (READ_COMMANDS.has(cmdName) || READ_COMMANDS.has(`${cmdName}.exe`)) {
|
|
214
|
-
for (let j = i + 1; j < tokens.length; j++) {
|
|
215
|
-
const next = tokens[j]!;
|
|
216
|
-
if (SHELL_SEGMENT_BREAKS.has(next)) break;
|
|
217
|
-
if (next.startsWith("-")) continue;
|
|
218
|
-
if (next.includes("/") || next.startsWith(".") || isExistingRegularFile(next, cwd)) {
|
|
219
|
-
const prot = checkProtectedPath(next);
|
|
220
|
-
if (prot) {
|
|
221
|
-
addDanger(
|
|
222
|
-
`"${cmdName}" reads protected file "${next}"\n Sensitive: ${prot}, may contain sensitive information`,
|
|
223
|
-
prot,
|
|
224
|
-
);
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
// ── tee writes to existing files ──
|
|
231
|
-
if (cmdName === "tee" || cmdName === "tee.exe") {
|
|
232
|
-
for (let j = i + 1; j < tokens.length; j++) {
|
|
233
|
-
const next = tokens[j]!;
|
|
234
|
-
if (SHELL_SEGMENT_BREAKS.has(next)) break;
|
|
235
|
-
if (next === "-a" || next === "--append") continue;
|
|
236
|
-
if (next.startsWith("-")) continue;
|
|
237
|
-
if (isExistingRegularFile(next, cwd)) {
|
|
238
|
-
const prot = checkProtectedPath(next);
|
|
239
|
-
if (prot) {
|
|
240
|
-
addDanger(
|
|
241
|
-
`"tee" would write to existing file "${next}"\n Sensitive: ${prot}, may contain sensitive information`,
|
|
242
|
-
prot,
|
|
243
|
-
);
|
|
244
|
-
} else {
|
|
245
|
-
addDanger(`"tee" would write to existing file "${next}"`);
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
return dangers;
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
export function formatBashDangers(dangers: BashDanger[]): string | null {
|
|
256
|
-
if (dangers.length === 0) return null;
|
|
257
|
-
if (dangers.length === 1) return dangers[0]!.reason;
|
|
258
|
-
return `dangerous operations detected:\n- ${dangers.map(d => d.reason).join("\n- ")}`;
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
// ─── Secret Detection — Entropy + Pattern ────────────────────────────────────
|
|
262
|
-
//
|
|
263
|
-
// Based on opencode-secrets-protect by Jared Scheel
|
|
264
|
-
// https://github.com/jscheel/opencode-secrets-protect (MIT License)
|
|
265
|
-
//
|
|
266
|
-
// Detection pipeline: High-confidence patterns (40+ known formats)
|
|
267
|
-
// → Low-confidence patterns (generic assignments, context-checked)
|
|
268
|
-
// → Adjusted Shannon Entropy v3+Dict (unknown formats)
|
|
269
|
-
// → Safe pattern exclusion (reduce false positives)
|
|
270
|
-
//
|
|
271
|
-
// Entropy v3+Dict formula:
|
|
272
|
-
// adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
|
|
273
|
-
//
|
|
274
|
-
// - baseShannon: Claude E. Shannon's 1948 "A Mathematical Theory of Communication"
|
|
275
|
-
// - trigramDensity: 3-char sliding window scores class transitions:
|
|
276
|
-
// • Letter↔Digit (digit in first 2 positions) → 1.0
|
|
277
|
-
// • Contains '-' with ≥3 classes → 1.0
|
|
278
|
-
// • AbA pattern (≥2 uppercase + lowercase) → 0.8
|
|
279
|
-
// X-class chars (not letter/digit/dash) split segments independently
|
|
280
|
-
// - wordRatio: vowel-containing lowercase fragments penalize secret likelihood
|
|
281
|
-
// - dictRatio: dictionary word coverage penalizes identifiers/English text
|
|
282
|
-
// - hexPenalty: -2.5 only if >90% hex AND contains '-' (UUID-like format)
|
|
283
|
-
|
|
284
|
-
//
|
|
285
|
-
// Based on opencode-secrets-protect by Jared Scheel
|
|
286
|
-
// https://github.com/jscheel/opencode-secrets-protect (MIT License)
|
|
287
|
-
//
|
|
288
|
-
// Detection approach:
|
|
289
|
-
// 1. Split content by whitespace + code punctuation
|
|
290
|
-
// 2. For each token ≥ 16 chars, compute adjusted entropy:
|
|
291
|
-
// adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
|
|
292
|
-
// 3. Trigram density uses a 3-character sliding window:
|
|
293
|
-
// - AbA pattern (≥2 uppercase) → 0.8
|
|
294
|
-
// - Letter↔Digit (digit in first 2 positions) → 1.0
|
|
295
|
-
// - Contains '-' with ≥3 classes → 1.0
|
|
296
|
-
// X-class chars split the token into independent segments;
|
|
297
|
-
// the segment with the highest density is used.
|
|
298
|
-
// 4. wordRatio: ratio of vowel-containing lowercase fragments ≥3 chars
|
|
299
|
-
// 5. dictRatio: ratio of dictionary word coverage (2121 English + tech words)
|
|
300
|
-
// 6. hexPenalty: -2.5 only if >90% hex AND contains '-' (UUID-like format)
|
|
301
|
-
|
|
302
|
-
/** Character class: U=uppercase, L=lowercase, D=digit, S=dash, X=other */
|
|
303
|
-
export function charClass(c: string): "U" | "L" | "D" | "S" | "X" {
|
|
304
|
-
const code = c.charCodeAt(0);
|
|
305
|
-
if (code >= 65 && code <= 90) return "U";
|
|
306
|
-
if (code >= 97 && code <= 122) return "L";
|
|
307
|
-
if (code >= 48 && code <= 57) return "D";
|
|
308
|
-
if (c === "-") return "S";
|
|
309
|
-
return "X";
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
/**
|
|
313
|
-
* Shannon entropy: measures average information content per character.
|
|
314
|
-
* H(X) = -Σ p(x) · log₂(p(x))
|
|
315
|
-
*/
|
|
316
|
-
export function shannonEntropy(data: string): number {
|
|
317
|
-
if (data.length === 0) return 0;
|
|
318
|
-
const freq = new Map<string, number>();
|
|
319
|
-
for (const char of data) {
|
|
320
|
-
freq.set(char, (freq.get(char) ?? 0) + 1);
|
|
321
|
-
}
|
|
322
|
-
let entropy = 0;
|
|
323
|
-
const len = data.length;
|
|
324
|
-
for (const count of freq.values()) {
|
|
325
|
-
const p = count / len;
|
|
326
|
-
entropy -= p * Math.log2(p);
|
|
327
|
-
}
|
|
328
|
-
return entropy;
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
/**
|
|
332
|
-
* Trigram (3-character sliding window) scoring.
|
|
333
|
-
* Rules (user-specified):
|
|
334
|
-
* - Pure digits → 0
|
|
335
|
-
* - Letter↔Digit switch (digit in first position, e.g. 4Vi) → 1.0
|
|
336
|
-
* - Contains '-' with ≥3 distinct classes → 1.0
|
|
337
|
-
* - Case switch AbA pattern (≥2 uppercase + ≥1 lowercase) → 0.8
|
|
338
|
-
* - Otherwise → 0
|
|
339
|
-
*/
|
|
340
|
-
export function trigramScore(c1: string, c2: string, c3: string): number {
|
|
341
|
-
const cls: string[] = [charClass(c1), charClass(c2), charClass(c3)];
|
|
342
|
-
|
|
343
|
-
// Any X-class character → skip
|
|
344
|
-
if (cls.includes("X")) return 0;
|
|
345
|
-
|
|
346
|
-
const unique = new Set(cls);
|
|
347
|
-
|
|
348
|
-
// Pure digits → 0
|
|
349
|
-
if (unique.size === 1 && cls[0] === "D") return 0;
|
|
350
|
-
|
|
351
|
-
// Contains '-' (S-class) with ≥3 distinct classes → 1.0
|
|
352
|
-
if (cls.includes("S") && unique.size >= 3) return 1.0;
|
|
353
|
-
|
|
354
|
-
// Letter↔Digit: digit must be in first position
|
|
355
|
-
const hasDigit = cls.includes("D");
|
|
356
|
-
const hasLetter = cls.includes("L") || cls.includes("U");
|
|
357
|
-
if (hasDigit && hasLetter && cls[0] === "D") return 1.0;
|
|
358
|
-
|
|
359
|
-
// AbA pattern: ≥2 uppercase + ≥1 lowercase (e.g. KeA, but not API)
|
|
360
|
-
const uCount = cls.filter(c => c === "U").length;
|
|
361
|
-
const lCount = cls.filter(c => c === "L").length;
|
|
362
|
-
if (uCount >= 2 && lCount >= 1) return 0.8;
|
|
363
|
-
|
|
364
|
-
return 0;
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
/**
|
|
368
|
-
* Split a token by X-class characters into independent segments.
|
|
369
|
-
* This prevents `://`, `@`, `.` etc. from diluting trigram density.
|
|
370
|
-
*/
|
|
371
|
-
export function splitByXClass(token: string): string[] {
|
|
372
|
-
const segments: string[] = [];
|
|
373
|
-
let current = "";
|
|
374
|
-
for (const c of token) {
|
|
375
|
-
if (charClass(c) === "X") {
|
|
376
|
-
if (current.length >= 3) segments.push(current);
|
|
377
|
-
current = "";
|
|
378
|
-
} else {
|
|
379
|
-
current += c;
|
|
380
|
-
}
|
|
381
|
-
}
|
|
382
|
-
if (current.length >= 3) segments.push(current);
|
|
383
|
-
return segments;
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
/**
|
|
387
|
-
* Compute average trigram density for a single segment.
|
|
388
|
-
*/
|
|
389
|
-
export function segmentDensity(segment: string): number {
|
|
390
|
-
if (segment.length < 3) return 0;
|
|
391
|
-
let totalScore = 0;
|
|
392
|
-
for (let i = 0; i <= segment.length - 3; i++) {
|
|
393
|
-
totalScore += trigramScore(segment[i]!, segment[i + 1]!, segment[i + 2]!);
|
|
394
|
-
}
|
|
395
|
-
return totalScore / (segment.length - 2);
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
/**
|
|
399
|
-
* Compute the maximum segment density across all X-split segments.
|
|
400
|
-
* The segment with the highest density is the most likely secret region.
|
|
401
|
-
*/
|
|
402
|
-
export function maxSegmentDensity(token: string): number {
|
|
403
|
-
const segments = splitByXClass(token);
|
|
404
|
-
if (segments.length === 0) return 0;
|
|
405
|
-
let maxD = 0;
|
|
406
|
-
for (const seg of segments) {
|
|
407
|
-
const d = segmentDensity(seg);
|
|
408
|
-
if (d > maxD) maxD = d;
|
|
409
|
-
}
|
|
410
|
-
return maxD;
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
/**
|
|
414
|
-
* Word ratio: fraction of token that consists of vowel-containing
|
|
415
|
-
* alphabetic fragments ≥3 characters, case-insensitive. Natural language
|
|
416
|
-
* words reduce the likelihood of being a secret.
|
|
417
|
-
*/
|
|
418
|
-
export function computeWordRatio(token: string): number {
|
|
419
|
-
const letterSeqs: string[] = [];
|
|
420
|
-
let current = "";
|
|
421
|
-
for (const c of token) {
|
|
422
|
-
const cls = charClass(c);
|
|
423
|
-
if (cls === "L" || cls === "U") {
|
|
424
|
-
current += c.toLowerCase();
|
|
425
|
-
} else {
|
|
426
|
-
if (current.length >= 3) letterSeqs.push(current);
|
|
427
|
-
current = "";
|
|
428
|
-
}
|
|
429
|
-
}
|
|
430
|
-
if (current.length >= 3) letterSeqs.push(current);
|
|
431
|
-
|
|
432
|
-
let wordLen = 0;
|
|
433
|
-
for (const seq of letterSeqs) {
|
|
434
|
-
if (/[aeiou]/.test(seq)) wordLen += seq.length;
|
|
435
|
-
}
|
|
436
|
-
return token.length > 0 ? wordLen / token.length : 0;
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
/**
|
|
440
|
-
* Hex ratio: fraction of characters that are hex characters (0-9, a-f, A-F, -).
|
|
441
|
-
* Values >0.9 indicate UUIDs or hex hashes which are safe.
|
|
442
|
-
*/
|
|
443
|
-
export function computeHexRatio(token: string): number {
|
|
444
|
-
let hexChars = 0;
|
|
445
|
-
for (const c of token) {
|
|
446
|
-
if (/[0-9a-fA-F\-]/.test(c)) hexChars++;
|
|
447
|
-
}
|
|
448
|
-
return token.length > 0 ? hexChars / token.length : 0;
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
// ── Dictionary Words for Secret Detection ─────────────────────────────────────
|
|
452
|
-
//
|
|
453
|
-
// Based on Google 10K most common English words (len >= 4)
|
|
454
|
-
// + top 500 most common words (len >= 3)
|
|
455
|
-
// + ~80 common tech abbreviations that appear in code identifiers.
|
|
456
|
-
// Used by computeDictRatio() to penalize tokens containing known words.
|
|
457
|
-
//
|
|
458
|
-
// Word list source: https://github.com/first20hours/google-10000-english (public domain)
|
|
459
|
-
|
|
460
|
-
const DICT_WORDS: ReadonlySet<string> = new Set(
|
|
461
|
-
// prettier-ignore
|
|
462
|
-
JSON.parse(`["ability","able","about","above","abstract","abuse","academic","accept","acceptance","accepted","access","accessories","accommodation","according","account","accounting","accounts","across","action","actions","active","activities","activity","actual","actually","added","addition","additional","address","adm","admin","administration","administrative","adult","advance","advanced","adventure","advertise","advertisement","advertising","advice","aes","affairs","affiliate","affiliates","africa","african","after","again","against","agencies","agency","agent","agents","agree","agreement","airport","album","allow","allowed","allows","almost","alone","along","already","also","alternative","although","always","amateur","amazon","america","american","among","amount","analysis","angeles","animal","animals","announcements","annual","another","answer","answers","anti","anyone","anything","apartments","api","apparel","appear","apple","application","applications","applied","apply","approach","appropriate","approval","approved","approximately","april","architecture","archive","archives","area","areas","argument","arizona","army","around","article","articles","artist","artists","arts","asia","asian","asked","assessment","assistance","assistant","associated","associates","association","attack","attention","attorney","auction","auctions","audio","august","australia","australian","auth","author","authority","authors","auto","automatically","automotive","availability","available","avenue","average","avg","avoid","award","awards","away","baby","back","background","balance","ball","band","bank","base","baseball","based","basic","basis","basket","battery","beach","beautiful","beauty","became","because","become","been","before","began","begin","beginning","behind","being","believe","below","benefit","benefits","best","better","between","beyond","bible","bill","birth","black","block","blog","blogs","blood","blue","board","boards","body","book","books","born","boston","both","bottom","boys","branch","brand","brands","break","breakfast","breast","bridge","bring","british","brought","brown","browse","browser","btn","budget","buf","build","building","built","bush","business","businesses","button","buyer","buying","cable","calendar","california","call","called","calls","came","camera","cameras","camp","campaign","campus","canada","canadian","cancer","canon","capacity","capital","card","cards","care","career","careers","carolina","cars","cart","case","cases","cash","casino","catalog","categories","category","cause","cb","cell","cells","center","centers","central","centre","century","certain","certificate","certified","cfg","chain","chair","challenge","chance","change","changed","changes","channel","chapter","character","characters","charge","charges","charles","chart","chat","cheap","check","chemical","chicago","chief","child","children","china","chinese","choice","choose","chris","christian","christmas","church","cities","city","civil","claim","claims","class","classes","classic","classifieds","clean","clear","cli","click","client","clients","clinical","close","closed","clothing","club","clubs","cnet","cnt","coast","code","codes","coffee","col","cold","collection","college","color","colorado","columbia","column","come","comes","coming","command","comment","comments","commerce","commercial","commission","committee","common","communication","communications","communities","community","companies","company","compare","compared","comparison","competition","complete","completed","complex","compliance","component","components","comprehensive","computer","computers","computing","condition","conditions","conference","configuration","congress","connect","connection","consider","considered","construction","consumer","contact","contacts","contains","content","contents","context","continue","continued","contract","control","cool","copy","copyright","core","corner","corporate","corporation","correct","cost","costs","could","council","count","counter","countries","country","county","couple","course","courses","court","cover","coverage","covered","cpu","create","created","creating","creative","credit","creek","crime","critical","cross","crud","css","csv","cultural","culture","currency","current","currently","custom","customer","customers","daily","damage","dance","dark","data","database","date","dates","dating","david","days","db","dead","deal","deals","death","debt","december","decision","deep","default","defense","define","defined","definition","degree","delivery","demand","department","described","description","design","designated","designed","desktop","detail","detailed","details","determine","determined","dev","develop","developed","developer","developing","development","device","devices","diamond","dictionary","died","diet","difference","different","difficult","digital","dir","direct","directions","directly","director","directory","disclaimer","discount","discuss","discussion","disease","disp","display","distance","distribution","district","division","dlg","dns","doctor","document","documentation","documents","does","doing","dollar","dollars","domain","domestic","done","door","double","down","download","downloads","draft","drive","driver","driving","drop","drug","drugs","dst","during","dvds","each","early","earth","easily","east","eastern","easy","ebay","economic","economy","edge","edit","edition","editor","education","educational","effect","effective","effects","effort","efforts","either","election","electric","electronic","electronics","element","elements","else","email","emergency","emit","employee","employees","employment","enable","ending","energy","engine","engineering","england","english","enjoy","enough","ensure","enter","enterprise","entertainment","entire","entries","entry","env","environment","environmental","equal","equipment","err","error","errors","especially","essential","established","estate","europe","european","evaluation","even","event","events","ever","every","everyone","everything","evidence","evt","example","examples","excellent","except","exchange","executive","exercise","existing","expect","expected","experience","expert","express","ext","extended","extension","external","extra","eyes","face","facilities","facility","fact","factor","factors","facts","faculty","failure","fair","faith","fall","families","family","fantasy","farm","fashion","fast","father","favorite","feat","feature","featured","features","february","federal","feed","feedback","feel","fees","feet","female","fiction","field","fields","figure","file","files","fill","film","films","filter","final","finally","finance","financial","find","finding","fine","fire","firm","first","fish","fishing","fitness","five","fixed","fixme","flag","flash","flat","flight","floor","florida","flow","flowers","focus","follow","following","follows","font","food","foot","football","force","ford","foreign","forest","form","format","former","forms","forum","forums","forward","found","foundation","four","frame","france","francisco","free","freedom","french","fresh","friday","friend","friendly","friends","from","front","ftr","fuel","full","fully","function","functional","functions","fund","funding","funds","furniture","further","future","galleries","gallery","game","games","gamma","garden","gave","gear","general","generally","generated","generation","george","georgia","german","germany","gets","getting","gid","gift","gifts","girl","girls","git","give","given","gives","giving","glass","global","goal","goals","goes","going","gold","golden","golf","gone","good","goods","google","government","gpt","gpu","grade","graduate","grand","grant","graphics","great","greater","green","ground","group","groups","growing","growth","grp","guarantee","guest","gui","guide","guidelines","guides","guitar","guys","hack","hair","half","hall","hand","hands","happy","hard","hardware","have","having","hdr","head","headlines","health","hear","heard","hearing","heart","heat","heavy","held","help","helpful","here","high","higher","highest","highly","hill","himself","hire","historical","history","hits","hold","holiday","holidays","home","homepage","homes","hook","hope","horse","hospital","host","hosting","hotel","hotels","hour","hours","house","housing","houston","however","html","huge","human","icon","idea","ideas","identify","idx","illinois","image","images","img","immediately","impact","implementation","important","improve","improvement","inch","include","included","includes","including","income","increase","increased","independent","index","india","indian","individual","individuals","industrial","industry","info","information","informed","initial","input","inside","install","installation","instead","institute","institutions","instructions","instruments","insurance","int","integrated","intended","interactive","interest","interested","interesting","interests","interface","internal","international","internet","into","introduction","investment","involved","ipod","iraq","ireland","isbn","island","islands","israel","issue","issues","italian","italy","item","items","itself","jack","jackson","james","january","japan","japanese","java","jersey","jesus","jewelry","jobs","john","johnson","join","joined","joint","jones","journal","json","july","jump","june","just","justice","kansas","keep","key","keyword","keywords","kids","kind","kinds","king","kingdom","kitchen","know","knowledge","known","kong","label","labor","lake","lan","land","language","languages","large","larger","largest","last","late","later","latest","latin","laws","lead","leader","leaders","leadership","leading","league","learn","learning","least","leather","leave","left","legal","len","length","lesbian","less","letter","letters","level","levels","lib","library","license","life","light","like","likely","limit","limited","line","lines","link","links","linux","list","listed","listen","listing","listings","lists","literature","little","live","lives","living","llm","load","loan","loans","local","located","location","locations","login","logo","london","long","longer","look","looking","looks","lord","loss","lost","lots","louis","love","lower","lowest","lyrics","mac","machine","machines","made","magazine","magazines","magic","mail","mailing","main","maintenance","major","make","makes","making","male","manage","management","manager","manual","manufacturer","manufacturing","many","maps","march","marine","mark","market","marketing","markets","martin","mary","mass","master","match","matching","material","materials","matter","mature","max","maximum","maybe","mean","means","measures","media","medical","medicine","medium","meet","meeting","meetings","mega","member","members","membership","memory","mental","menu","merchant","message","messages","metal","method","methods","mexico","michael","michigan","micro","microsoft","middle","might","mike","miles","military","million","min","mind","mini","minimum","minister","minnesota","minute","minutes","miss","missing","mission","mobile","mock","mod","mode","model","models","modern","modified","module","moment","monday","money","monitor","monitoring","month","monthly","months","more","morning","mortgage","most","mother","motion","motor","motorola","mount","mountain","move","moved","movement","movie","movies","moving","msg","much","multi","multimedia","multiple","museum","music","musical","must","myself","naked","name","names","nano","nation","national","native","natural","nature","nav","navigation","near","necessary","need","needed","needs","net","network","networking","networks","never","news","newsletter","next","nice","night","nlp","nokia","none","normal","north","northern","note","notes","nothing","notice","november","npm","num","number","numbers","nursing","oauth","object","october","offer","offered","offering","offers","office","officer","official","often","ohio","older","once","ones","online","only","ontario","open","opening","operating","operation","operations","opinion","opportunities","opportunity","ops","option","optional","options","oral","orange","order","orders","oregon","organization","organizations","original","orm","oss","other","others","otherwise","outdoor","output","outside","over","overall","overview","owned","owner","owners","pacific","pack","package","packages","page","pages","paid","pain","palm","panel","paper","paperback","papers","parent","parents","paris","park","parking","part","particular","particularly","parties","partner","partners","parts","party","pass","password","past","patch","path","patient","patients","paul","payment","paypal","peace","pennsylvania","people","percent","perfect","performance","perhaps","period","perm","permission","person","personal","persons","peter","phase","phentermine","phone","phones","photo","photography","photos","physical","pick","pics","picture","pictures","pid","piece","pink","pip","pipe","pkg","place","placed","places","plan","planning","plans","plant","plants","plastic","platform","play","played","player","players","playing","please","plus","pocket","point","points","poker","pol","police","policies","policy","political","politics","pool","poor","pop","popular","population","port","pos","position","positive","possible","post","posted","poster","posters","posts","potential","power","powered","practice","practices","premium","present","presentation","presented","president","press","pressure","pretty","prev","prevent","previous","price","prices","pricing","primary","prime","print","printer","printing","prior","privacy","private","pro","probably","problem","problems","procedure","procedures","process","processes","processing","prod","produce","produced","product","production","products","professional","professor","profile","profit","program","programme","programming","programs","progress","project","projects","properties","property","proposed","protect","protection","protein","provide","provided","provider","providers","provides","providing","ptr","public","publication","publications","published","publisher","publishing","purchase","purpose","purposes","quality","quantity","quarter","question","questions","quick","quickly","quite","quote","quotes","race","racing","radio","ram","random","range","rank","rate","rated","rates","rather","rating","ratings","reach","read","reader","readers","reading","ready","real","really","reason","reasons","receive","received","recent","recently","recipes","recommend","recommendations","recommended","record","records","recovery","reduce","ref","reference","references","regarding","region","regional","register","registered","registration","regular","regulations","related","relations","relationship","release","released","releases","relevant","religion","religious","remember","remote","remove","rent","rental","rentals","repair","replies","reply","report","reported","reporting","reports","republic","req","request","requests","require","required","requirements","requires","res","research","reserve","reserved","resolution","resort","resource","resources","respect","respective","response","responsibility","responsible","rest","restaurant","restaurants","result","results","retail","return","returns","rev","review","reviews","rich","richard","right","rights","ring","ringtones","risk","river","road","robert","rock","rol","role","room","rooms","root","rose","round","row","royal","rsa","rule","rules","running","russia","russian","safe","safety","said","saint","sale","sales","same","sample","samsung","santa","satellite","saturday","save","saying","says","scale","schedule","school","schools","science","sciences","scientific","score","scott","screen","sdk","search","searches","season","seattle","second","seconds","secretary","section","sections","sector","secure","security","seem","seems","seen","select","selected","selection","self","sell","seller","sellers","selling","send","senior","sense","sent","separate","september","sequence","series","serious","serve","server","servers","service","services","session","sets","setting","settings","seven","several","sha","shall","share","sheet","ship","shipping","ships","shirt","shirts","shoes","shop","shopping","shops","short","shot","should","show","showing","shown","shows","sid","side","sign","signed","significant","silver","similar","simple","simply","since","single","site","sitemap","sites","situation","size","skills","skin","skip","small","smart","smith","snow","social","society","soft","software","sold","solid","solution","solutions","some","someone","something","sometimes","song","songs","sony","soon","sorry","sort","sorted","sound","source","sources","south","southern","space","spain","spanish","special","species","specific","specified","speed","spirit","sponsored","sport","sports","spring","sql","square","src","sre","ssd","ssh","ssl","staff","stage","stand","standard","standards","star","stars","start","started","starting","state","statement","statements","states","station","statistics","status","stay","steel","step","steps","steve","still","stock","stone","stop","storage","store","stores","stories","story","str","strategies","strategy","stream","street","string","strong","structure","stub","student","students","studies","studio","study","stuff","style","subject","subjects","submit","submitted","subs","subscribe","success","successful","such","suggest","suite","sum","summary","summer","sunday","super","supplies","supply","support","supported","sure","surface","surgery","survey","switch","system","systems","tab","table","tables","tag","tags","take","taken","takes","taking","talk","talking","target","task","tcp","teacher","teachers","teaching","team","tech","technical","techniques","technologies","technology","teen","teens","telephone","television","tell","temp","temperature","term","terms","test","testing","tests","texas","text","than","thank","thanks","that","their","them","theme","themselves","then","theory","therapy","there","therefore","these","they","thing","things","think","thinking","third","this","thomas","those","though","thought","thoughts","thousands","thread","three","through","throughout","thursday","thus","tickets","tid","time","times","tip","tips","title","titles","tls","tmp","today","todo","together","told","took","tool","tools","topic","topics","total","touch","tour","tours","towards","town","toys","track","trade","trademarks","trading","traditional","traffic","training","transfer","transport","transportation","travel","treatment","tree","trial","trip","true","trust","truth","trying","tuesday","turn","type","types","udp","uid","under","understand","understanding","union","unique","unit","united","units","universal","university","unknown","unless","until","update","updated","updates","upgrade","upon","upper","urban","url","used","useful","user","username","users","uses","using","usr","usually","vacation","val","valid","valley","value","values","variable","variety","various","vegas","vehicle","vehicles","ver","version","very","video","videos","view","viewed","views","village","virginia","virtual","virus","vision","visit","visitors","visual","voice","volume","vote","vpn","wait","walk","wall","wan","want","wanted","warning","washington","waste","watch","watches","water","ways","weather","website","websites","wedding","wednesday","week","weekend","weekly","weeks","weight","welcome","well","went","were","west","western","what","when","where","whether","which","while","white","whole","wholesale","whose","wide","wife","wild","will","william","williams","wind","window","windows","wine","winter","wireless","wish","with","within","without","woman","women","wood","word","words","work","worked","workers","working","works","workshop","world","worldwide","worth","would","write","writing","written","wrong","wrote","xbox","xml","yahoo","yaml","year","years","yellow","yesterday","york","young","your","yourself","youth","zealand","zone"]`)
|
|
463
|
-
);
|
|
464
|
-
|
|
465
|
-
/**
|
|
466
|
-
* Dictionary word ratio: fraction of token characters covered by dictionary words.
|
|
467
|
-
*
|
|
468
|
-
* Extracts alphabetic sequences from the token (case-insensitive), then greedily
|
|
469
|
-
* matches the longest dictionary word at each position. Returns matched character
|
|
470
|
-
* count / token length.
|
|
471
|
-
*
|
|
472
|
-
* "devstral-small-2" → finds "dev", "str", "small" → covers 11/16 chars
|
|
473
|
-
* "NET_CHANNEL_INFO_REPORT_V20" → finds "net", "channel", "info", "report"
|
|
474
|
-
* "aB3xK9mPqR7wN" → no words found → dictRatio = 0
|
|
475
|
-
*/
|
|
476
|
-
export function computeDictRatio(token: string): number {
|
|
477
|
-
// Extract alphabetic sequences (>= 3 chars), case-insensitive
|
|
478
|
-
const lowerSeqs: string[] = [];
|
|
479
|
-
let current = "";
|
|
480
|
-
for (const c of token) {
|
|
481
|
-
const cls = charClass(c);
|
|
482
|
-
if (cls === "L" || cls === "U") {
|
|
483
|
-
current += c.toLowerCase();
|
|
484
|
-
} else {
|
|
485
|
-
if (current.length >= 3) lowerSeqs.push(current);
|
|
486
|
-
current = "";
|
|
487
|
-
}
|
|
488
|
-
}
|
|
489
|
-
if (current.length >= 3) lowerSeqs.push(current);
|
|
490
|
-
|
|
491
|
-
if (lowerSeqs.length === 0) return 0;
|
|
492
|
-
|
|
493
|
-
// Greedy match: find longest word at each position, then skip past it
|
|
494
|
-
let matchedChars = 0;
|
|
495
|
-
for (const seq of lowerSeqs) {
|
|
496
|
-
let pos = 0;
|
|
497
|
-
while (pos < seq.length) {
|
|
498
|
-
let longestMatch = 0;
|
|
499
|
-
for (let end = seq.length; end > pos; end--) {
|
|
500
|
-
if (DICT_WORDS.has(seq.slice(pos, end))) {
|
|
501
|
-
longestMatch = end - pos;
|
|
502
|
-
break;
|
|
503
|
-
}
|
|
504
|
-
}
|
|
505
|
-
if (longestMatch > 0) {
|
|
506
|
-
matchedChars += longestMatch;
|
|
507
|
-
pos += longestMatch;
|
|
508
|
-
} else {
|
|
509
|
-
pos++;
|
|
510
|
-
}
|
|
511
|
-
}
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
return token.length > 0 ? matchedChars / token.length : 0;
|
|
515
|
-
}
|
|
516
|
-
|
|
517
|
-
// ── Entropy Constants ────────────────────────────────────────────────────────
|
|
518
|
-
|
|
519
|
-
export const ENTROPY_THRESHOLD = 5.5;
|
|
520
|
-
export const MIN_ENTROPY_TOKEN_LENGTH = 32;
|
|
521
|
-
export const W1_DENSITY = 3.0;
|
|
522
|
-
export const W2_WORD = 3.0;
|
|
523
|
-
export const W3_DICT = 4.0;
|
|
524
|
-
export const HEX_PENALTY = 2.5;
|
|
525
|
-
export const HEX_RATIO_THRESHOLD = 0.9;
|
|
526
|
-
|
|
527
|
-
/**
|
|
528
|
-
* Adjusted entropy v3+Dict:
|
|
529
|
-
* adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
|
|
4
|
+
* Three-pass detection:
|
|
5
|
+
* 1. High-confidence pattern matching (unambiguous prefixes)
|
|
6
|
+
* 2. Config-key regex matching (config-like files only)
|
|
7
|
+
* 3. Adjusted Shannon entropy analysis (config-like files only)
|
|
530
8
|
*/
|
|
531
|
-
export function calculateAdjustedEntropy(data: string): number {
|
|
532
|
-
const base = shannonEntropy(data);
|
|
533
|
-
const density = maxSegmentDensity(data);
|
|
534
|
-
const wordRatio = computeWordRatio(data);
|
|
535
|
-
const dictRatio = computeDictRatio(data);
|
|
536
|
-
const hexRatio = computeHexRatio(data);
|
|
537
|
-
|
|
538
|
-
const densityBoost = density * W1_DENSITY;
|
|
539
|
-
const wordPenalty = wordRatio * W2_WORD;
|
|
540
|
-
const dictPenalty = dictRatio * W3_DICT;
|
|
541
|
-
// Hex penalty: only for hyphenated UUID-like tokens (>90% hex AND contains -)
|
|
542
|
-
// Pure hex strings without hyphens might be real secrets (not UUIDs/SHAs)
|
|
543
|
-
const hp = (hexRatio > HEX_RATIO_THRESHOLD && data.includes("-")) ? HEX_PENALTY : 0;
|
|
544
|
-
return base + densityBoost - wordPenalty - dictPenalty - hp;
|
|
545
|
-
}
|
|
546
|
-
|
|
547
|
-
export function isHighEntropy(data: string): boolean {
|
|
548
|
-
if (data.length < MIN_ENTROPY_TOKEN_LENGTH) return false;
|
|
549
|
-
if (isSafeContent(data)) return false;
|
|
550
|
-
return calculateAdjustedEntropy(data) > ENTROPY_THRESHOLD;
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
/**
|
|
554
|
-
* Split by whitespace only — the most conservative tokenization.
|
|
555
|
-
* This preserves JSON structure, URLs, and connection strings.
|
|
556
|
-
*/
|
|
557
|
-
export function findHighEntropyTokens(content: string): string[] {
|
|
558
|
-
const tokens = content.split(/[\s\[\]{}"',\/\\|()&#@!<>?]+/);
|
|
559
|
-
return tokens.filter(t => t.length >= MIN_ENTROPY_TOKEN_LENGTH && isHighEntropy(t));
|
|
560
|
-
}
|
|
561
|
-
|
|
562
|
-
// ── Known Secret Patterns ────────────────────────────────────────────────────
|
|
563
|
-
|
|
564
|
-
export interface SecretPattern {
|
|
565
|
-
name: string;
|
|
566
|
-
pattern: RegExp;
|
|
567
|
-
minLength: number;
|
|
568
|
-
allowsSpaces: boolean;
|
|
569
|
-
/** If true, skip safe-pattern exclusion (unambiguous prefix) */
|
|
570
|
-
highConfidence: boolean;
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
export const SECRET_PATTERNS: SecretPattern[] = [
|
|
574
|
-
// AWS
|
|
575
|
-
{ name: "AWS Access Key ID", pattern: /AKIA[0-9A-Z]{16}/, minLength: 16, allowsSpaces: false, highConfidence: true },
|
|
576
|
-
{ name: "AWS Secret Access Key", pattern: /(?:aws)?_?(?:secret)?_?(?:access)?_?key['"\s:=]+['"]?[0-9a-zA-Z/+]{40}['"]?/i, minLength: 30, allowsSpaces: false, highConfidence: true },
|
|
577
|
-
// GitHub
|
|
578
|
-
{ name: "GitHub OAuth Token", pattern: /gho_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
579
|
-
{ name: "GitHub App Token", pattern: /(?:ghu|ghs)_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
580
|
-
{ name: "GitHub PAT", pattern: /ghp_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
581
|
-
{ name: "GitHub Fine-Grained Token", pattern: /github_pat_[0-9a-zA-Z_]{22,}/, minLength: 26, allowsSpaces: false, highConfidence: true },
|
|
582
|
-
// GitLab
|
|
583
|
-
{ name: "GitLab PAT", pattern: /glpat-[0-9a-zA-Z\-_]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
584
|
-
{ name: "GitLab Runner Token", pattern: /glrt-[0-9a-zA-Z_\-]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
585
|
-
// Slack
|
|
586
|
-
{ name: "Slack Token", pattern: /xox[baprs]-[0-9a-zA-Z\-]{10,48}/, minLength: 15, allowsSpaces: false, highConfidence: true },
|
|
587
|
-
{ name: "Slack Webhook URL", pattern: /https:\/\/hooks\.slack\.com\/services\/T[a-zA-Z0-9_]{8,}\/B[a-zA-Z0-9_]{8,}\/[a-zA-Z0-9_]{24}/, minLength: 60, allowsSpaces: false, highConfidence: true },
|
|
588
|
-
// JWT
|
|
589
|
-
{ name: "JSON Web Token", pattern: /eyJ[a-zA-Z0-9_-]{10,}\.eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
590
|
-
// Google
|
|
591
|
-
{ name: "Google API Key", pattern: /AIza[0-9A-Za-z\-_]{35}/, minLength: 35, allowsSpaces: false, highConfidence: true },
|
|
592
|
-
{ name: "Google OAuth Token", pattern: /ya29\.[0-9A-Za-z\-_]+/, minLength: 10, allowsSpaces: false, highConfidence: true },
|
|
593
|
-
// Stripe
|
|
594
|
-
{ name: "Stripe Secret Key", pattern: /sk_live_[0-9a-zA-Z]{24,}/, minLength: 24, allowsSpaces: false, highConfidence: true },
|
|
595
|
-
{ name: "Stripe Restricted Key", pattern: /rk_live_[0-9a-zA-Z]{24,}/, minLength: 24, allowsSpaces: false, highConfidence: true },
|
|
596
|
-
// Twilio / SendGrid / Discord
|
|
597
|
-
{ name: "Twilio API Key", pattern: /SK[a-z0-9]{32}/, minLength: 30, allowsSpaces: false, highConfidence: true },
|
|
598
|
-
{ name: "SendGrid API Key", pattern: /SG\.[a-zA-Z0-9_-]{22,}\.[a-zA-Z0-9_-]{40,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
599
|
-
{ name: "Discord Bot Token", pattern: /[MN][A-Za-z\d]{23,}\.[\w-]{6}\.[\w-]{27,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
600
|
-
// OpenAI / Anthropic / Volcengine Ark
|
|
601
|
-
{ name: "OpenAI API Key", pattern: /sk-[a-zA-Z0-9]{20,}T3BlbkFJ[a-zA-Z0-9]{20,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
602
|
-
{ name: "OpenAI API Key (New)", pattern: /sk-(?:proj-)?[a-zA-Z0-9\-_]{40,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
603
|
-
{ name: "Anthropic API Key", pattern: /sk-ant-api[0-9]{2}-[a-zA-Z0-9\-_]{80,}/, minLength: 80, allowsSpaces: false, highConfidence: true },
|
|
604
|
-
{ name: "Volcengine Ark API Key", pattern: /ark-[a-zA-Z0-9\-_]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
605
|
-
// NPM / PyPI
|
|
606
|
-
{ name: "NPM Token", pattern: /npm_[a-zA-Z0-9]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
607
|
-
{ name: "PyPI Token", pattern: /pypi-[a-zA-Z0-9_\-]{50,}/, minLength: 50, allowsSpaces: false, highConfidence: true },
|
|
608
|
-
// Private Keys
|
|
609
|
-
{ name: "RSA Private Key", pattern: /-----BEGIN RSA PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
|
|
610
|
-
{ name: "OpenSSH Private Key", pattern: /-----BEGIN OPENSSH PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
|
|
611
|
-
{ name: "EC Private Key", pattern: /-----BEGIN EC PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
|
|
612
|
-
{ name: "PGP Private Key", pattern: /-----BEGIN PGP PRIVATE KEY BLOCK-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
|
|
613
|
-
{ name: "Generic Private Key", pattern: /-----BEGIN (?:ENCRYPTED )?PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
|
|
614
|
-
// Database URIs
|
|
615
|
-
{ name: "MongoDB Connection String", pattern: /mongodb(?:\+srv)?:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
616
|
-
{ name: "PostgreSQL Connection String", pattern: /postgres(?:ql)?:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
617
|
-
{ name: "MySQL Connection String", pattern: /mysql:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
618
|
-
{ name: "Redis Connection String", pattern: /redis:\/\/[^\s'"]*:[^\s'"]+@[^\s'"]+/, minLength: 15, allowsSpaces: false, highConfidence: true },
|
|
619
|
-
// URL-embedded passwords
|
|
620
|
-
{ name: "Password in URL", pattern: /[a-zA-Z]{3,10}:\/\/[^/\s:@]{3,20}:[^/\s:@]{3,20}@[^\s'"]+/, minLength: 15, allowsSpaces: false, highConfidence: true },
|
|
621
|
-
// Generic assignments (lower confidence — checked against SAFE_PATTERNS)
|
|
622
|
-
{ name: "Bearer Token", pattern: /[Bb]earer\s+[a-zA-Z0-9\-._~+/]+=*/, minLength: 15, allowsSpaces: false, highConfidence: false },
|
|
623
|
-
{ name: "Basic Auth Header", pattern: /[Bb]asic\s+[a-zA-Z0-9+/]{20,}={0,2}/, minLength: 20, allowsSpaces: false, highConfidence: false },
|
|
624
|
-
{ name: "API Key Assignment", pattern: /(?:api[_-]?key|apikey|api[_-]?secret)['"\s:=]+['"]?[a-zA-Z0-9\-._]{20,}['"]?/i, minLength: 20, allowsSpaces: false, highConfidence: false },
|
|
625
|
-
{ name: "Secret Assignment", pattern: /(?:secret|token|password|passwd|pwd)['"\s:=]+['"]?[a-zA-Z0-9\-._!@#$%^&*]{8,}['"]?/i, minLength: 12, allowsSpaces: false, highConfidence: false },
|
|
626
|
-
];
|
|
627
|
-
|
|
628
|
-
// ── Safe Patterns (exclude from detection to reduce false positives) ─────────
|
|
629
|
-
|
|
630
|
-
export const SAFE_PATTERNS: RegExp[] = [
|
|
631
|
-
/^https?:\/\/[a-zA-Z0-9.-]+(?:\/[a-zA-Z0-9.\/_\-?&=#%]*)?$/, // URLs without credentials
|
|
632
|
-
/^\.\.?\/[a-zA-Z0-9_\-./]+$/, // Relative file paths
|
|
633
|
-
/^\/[a-zA-Z0-9_\-./]+$/, // Absolute Unix paths
|
|
634
|
-
/^[a-zA-Z]:\\[a-zA-Z0-9_\-\\./]+$/, // Windows paths
|
|
635
|
-
/^[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}$/, // Email addresses
|
|
636
|
-
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/, // UUIDs
|
|
637
|
-
/^v?\d+\.\d+\.\d+(?:-[a-zA-Z0-9.]+)?(?:\+[a-zA-Z0-9.]+)?$/, // Semver
|
|
638
|
-
/^(?:xxx+|your[_-]?(?:api[_-]?)?key|placeholder|example|test|demo|sample)/i, // Placeholders
|
|
639
|
-
/^[0-9a-f]{40}$/i, // Git SHA-1
|
|
640
|
-
/^[0-9a-f]{64}$/i, // SHA-256
|
|
641
|
-
/^@[a-z0-9-]+\/[a-z0-9-]+$/, // npm scoped packages
|
|
642
|
-
];
|
|
643
|
-
|
|
644
|
-
export function isSafeContent(content: string): boolean {
|
|
645
|
-
for (const pat of SAFE_PATTERNS) {
|
|
646
|
-
if (pat.test(content)) return true;
|
|
647
|
-
}
|
|
648
|
-
return false;
|
|
649
|
-
}
|
|
650
|
-
|
|
651
|
-
// ── Detector ─────────────────────────────────────────────────────────────────
|
|
652
|
-
|
|
653
|
-
export type SecretMatchSource = "pattern" | "regex" | "entropy";
|
|
654
|
-
|
|
655
|
-
export interface SecretMatch {
|
|
656
|
-
name: string;
|
|
657
|
-
start: number;
|
|
658
|
-
end: number;
|
|
659
|
-
original: string;
|
|
660
|
-
source: SecretMatchSource;
|
|
661
|
-
}
|
|
662
|
-
|
|
663
|
-
export interface DetectSecretsOptions {
|
|
664
|
-
filePath?: string;
|
|
665
|
-
}
|
|
666
9
|
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
}
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
const CONFIG_VALUE_MIN_LENGTH = 32;
|
|
677
|
-
const CONFIG_FILE_EXTENSIONS = new Set([
|
|
678
|
-
".json", ".jsonc", ".env", ".toml", ".yaml", ".yml",
|
|
679
|
-
".ini", ".cfg", ".conf", ".properties",
|
|
680
|
-
]);
|
|
681
|
-
const CONFIG_BASENAME_REGEX = /^\.env(?:\..+)?$/i;
|
|
682
|
-
const SENSITIVE_CONFIG_KEY_REGEX = /(?:^|_)(?:apikey|api_(?:key|secret|token)|access_(?:key|token)|refresh_token|client_secret|secret(?:_key)?|private_key|bearer_token|auth(?:orization|_token)?|pass(?:word|wd)?|pwd|token|webhook_secret)(?:_|$)/i;
|
|
683
|
-
const PLACEHOLDER_VALUE_REGEX = /^(?:\$\{[^}]+\}|\{\{[^}]+\}\}|<[^>]+>|xxx+|placeholder|example|sample|demo|test|changeme|your[_-]?(?:api[_-]?)?key(?:[_-]?here)?)$/i;
|
|
684
|
-
const CONFIG_STRING_PATTERNS: RegExp[] = [
|
|
685
|
-
/(?<key>"[^"\r\n]+"|'[^'\r\n]+'|[A-Za-z0-9_.-]+)\s*[:=]\s*"(?<value>(?:\\.|[^"\\])*)"/g,
|
|
686
|
-
/(?<key>"[^"\r\n]+"|'[^'\r\n]+'|[A-Za-z0-9_.-]+)\s*[:=]\s*'(?<value>(?:\\.|[^'\\])*)'/g,
|
|
687
|
-
/(?<key>[A-Za-z0-9_.-]+)\s*=\s*(?<value>[^\r\n#;]+)/g,
|
|
688
|
-
];
|
|
10
|
+
import {
|
|
11
|
+
type SecretMatch,
|
|
12
|
+
type SecretMatchSource,
|
|
13
|
+
type DetectSecretsOptions,
|
|
14
|
+
MIN_SCAN_LENGTH,
|
|
15
|
+
SENSITIVE_CONFIG_KEY_REGEX,
|
|
16
|
+
} from "./types.js";
|
|
17
|
+
import { SECRET_PATTERNS, isConfigLikeFile, extractConfigStringEntries, looksLikeSensitiveConfigValue } from "./patterns.js";
|
|
18
|
+
import { isHighEntropy } from "./entropy.js";
|
|
689
19
|
|
|
690
|
-
|
|
691
|
-
return key
|
|
692
|
-
.trim()
|
|
693
|
-
.replace(/^['"]|['"]$/g, "")
|
|
694
|
-
.replace(/([A-Z]+)([A-Z][a-z])/g, "$1_$2")
|
|
695
|
-
.replace(/([a-z0-9])([A-Z])/g, "$1_$2")
|
|
696
|
-
.toLowerCase()
|
|
697
|
-
.replace(/[.\-\s]+/g, "_")
|
|
698
|
-
.replace(/_+/g, "_")
|
|
699
|
-
.replace(/^_+|_+$/g, "");
|
|
700
|
-
}
|
|
701
|
-
|
|
702
|
-
function isConfigLikeFile(filePath?: string): boolean {
|
|
703
|
-
if (!filePath) return false;
|
|
704
|
-
const name = basename(filePath);
|
|
705
|
-
if (CONFIG_BASENAME_REGEX.test(name)) return true;
|
|
706
|
-
return CONFIG_FILE_EXTENSIONS.has(extname(name).toLowerCase());
|
|
707
|
-
}
|
|
708
|
-
|
|
709
|
-
function looksLikeSensitiveConfigValue(value: string): boolean {
|
|
710
|
-
const trimmed = value.trim();
|
|
711
|
-
if (!trimmed) return false;
|
|
712
|
-
if (PLACEHOLDER_VALUE_REGEX.test(trimmed)) return false;
|
|
713
|
-
if (isSafeContent(trimmed)) return false;
|
|
714
|
-
if (/^(?:true|false|null)$/i.test(trimmed)) return false;
|
|
715
|
-
if (/^[+-]?\d+(?:\.\d+)?$/.test(trimmed)) return false;
|
|
716
|
-
return trimmed.length >= CONFIG_VALUE_MIN_LENGTH;
|
|
717
|
-
}
|
|
718
|
-
|
|
719
|
-
function extractConfigStringEntries(content: string): ConfigStringEntry[] {
|
|
720
|
-
const entries: ConfigStringEntry[] = [];
|
|
721
|
-
const seen = new Set<string>();
|
|
722
|
-
|
|
723
|
-
for (const pattern of CONFIG_STRING_PATTERNS) {
|
|
724
|
-
for (const match of content.matchAll(pattern)) {
|
|
725
|
-
const key = match.groups?.key;
|
|
726
|
-
const value = match.groups?.value;
|
|
727
|
-
if (!key || value === undefined || match.index === undefined) continue;
|
|
728
|
-
const full = match[0] ?? "";
|
|
729
|
-
const rel = full.indexOf(value);
|
|
730
|
-
if (rel < 0) continue;
|
|
731
|
-
const start = match.index + rel;
|
|
732
|
-
const end = start + value.length;
|
|
733
|
-
const dedupeKey = `${start}-${end}`;
|
|
734
|
-
if (seen.has(dedupeKey)) continue;
|
|
735
|
-
seen.add(dedupeKey);
|
|
736
|
-
entries.push({
|
|
737
|
-
key,
|
|
738
|
-
normalizedKey: normalizeConfigKey(key),
|
|
739
|
-
value,
|
|
740
|
-
start,
|
|
741
|
-
end,
|
|
742
|
-
});
|
|
743
|
-
}
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
return entries;
|
|
747
|
-
}
|
|
20
|
+
// ─── Internal helpers ──────────────────────────────────────────────────────
|
|
748
21
|
|
|
749
22
|
function addMatch(matches: SecretMatch[], seen: Set<string>, match: SecretMatch): void {
|
|
750
23
|
const key = `${match.start}-${match.end}`;
|
|
@@ -757,13 +30,15 @@ function isCoveredByExistingMatch(matches: SecretMatch[], start: number, end: nu
|
|
|
757
30
|
return matches.some((existing) => !(end <= existing.start || start >= existing.end));
|
|
758
31
|
}
|
|
759
32
|
|
|
33
|
+
// ─── Main API ──────────────────────────────────────────────────────────────
|
|
34
|
+
|
|
760
35
|
export function detectSecrets(content: string, options: DetectSecretsOptions = {}): SecretMatch[] {
|
|
761
36
|
if (content.length < MIN_SCAN_LENGTH) return [];
|
|
762
37
|
const matches: SecretMatch[] = [];
|
|
763
38
|
const seen = new Set<string>();
|
|
764
39
|
const configLike = isConfigLikeFile(options.filePath);
|
|
765
40
|
|
|
766
|
-
// Pass 1: High-confidence pattern matching (
|
|
41
|
+
// Pass 1: High-confidence pattern matching (unambiguous prefixes like ghp_, AKIA)
|
|
767
42
|
for (const sp of SECRET_PATTERNS) {
|
|
768
43
|
if (!sp.highConfidence) continue;
|
|
769
44
|
if (content.length < sp.minLength) continue;
|
|
@@ -817,6 +92,8 @@ export function detectSecrets(content: string, options: DetectSecretsOptions = {
|
|
|
817
92
|
return matches.sort((a, b) => b.start - a.start);
|
|
818
93
|
}
|
|
819
94
|
|
|
95
|
+
// ─── Masking ────────────────────────────────────────────────────────────────
|
|
96
|
+
|
|
820
97
|
function getMaskChar(source?: SecretMatchSource): string {
|
|
821
98
|
if (source === "regex") return "#";
|
|
822
99
|
if (source === "entropy") return "?";
|
|
@@ -828,4 +105,3 @@ export function maskSecret(text: string, source?: SecretMatchSource): string {
|
|
|
828
105
|
if (text.length <= 6) return maskChar.repeat(text.length);
|
|
829
106
|
return text.slice(0, 3) + maskChar.repeat(text.length - 6) + text.slice(-3);
|
|
830
107
|
}
|
|
831
|
-
|