decorated-pi 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,11 @@
1
1
  /**
2
2
  * Safety — 安全防护模块
3
3
  *
4
- * - Command Guard: 拦截危险 bash 命令与 shell 覆盖写入(枚举式)
5
- * - Protected Paths: 禁止写入敏感路径
6
- * - Write Guard: 覆盖非空文件前确认
4
+ * - Command Guard: 拦截危险 bash 命令(rm, sudo, npm publish, git push 等)
5
+ * - Redirect Guard: bash 覆盖写入(>)提示确认,保护路径额外警告敏感信息
6
+ * - Protected Paths: write/edit 写入保护路径需确认,提示敏感信息
7
+ * - Read Guard: read/cat 等读取保护路径需确认,提示敏感信息
8
+ * - Write Guard: 覆盖非空文件禁止 write 工具,建议用 edit
7
9
  * - Secret Redact: API Key / Token 自动掩码
8
10
  */
9
11
 
@@ -20,6 +22,7 @@ import { resolve } from "node:path";
20
22
  const DANGEROUS_COMMANDS: [string, string[]][] = [
21
23
  ["rm", []],
22
24
  ["sudo", []],
25
+ ["npm", ["publish"]],
23
26
  ["svn", ["commit", "revert"]],
24
27
  ["git", ["reset", "restore", "clean", "push", "revert"]],
25
28
  ];
@@ -31,7 +34,44 @@ const SAFE_REDIRECT_TARGETS = new Set([
31
34
  ]);
32
35
 
33
36
  const SHELL_SEGMENT_BREAKS = new Set(["|", "&&", "||", ";"]);
34
- const SHELL_REDIRECT_OPERATORS = new Set([">", ">>", "1>", "1>>", "2>", "2>>", "&>", "&>>"]);
37
+ const SHELL_REDIRECT_OVERWRITE = new Set([">", "1>", "2>", "&>"]);
38
+
39
+ // ─── 保护路径 ────────────────────────────────────────────────────────────────
40
+
41
+ const PROTECTED_PATH_SEGMENTS = [
42
+ ".env", ".git/", ".ssh/",
43
+ ".gnupg/", ".aws/", "secrets/", ".docker/",
44
+ ];
45
+ const PROTECTED_EXTENSIONS = [".pem", ".key", ".p12", ".pfx", ".keystore"];
46
+ const PROTECTED_FILENAMES = [
47
+ "id_rsa", "id_ed25519", "id_ecdsa",
48
+ "authorized_keys", "known_hosts",
49
+ ".env.local", ".env.production",
50
+ ];
51
+
52
+ /** Commands that read file contents (should confirm before reading protected paths) */
53
+ const READ_COMMANDS = new Set([
54
+ "cat", "head", "tail", "less", "more", "bat", "batcat",
55
+ "tac", "nl", "od", "xxd", "hexdump", "base64",
56
+ "file", "strings", "grep", "rg", "ag", "ack",
57
+ ]);
58
+
59
+ function checkProtectedPath(filePath: string): string | null {
60
+ const normalized = filePath.replace(/\\/g, "/");
61
+ const filename = normalized.split("/").pop() ?? "";
62
+ for (const seg of PROTECTED_PATH_SEGMENTS) {
63
+ if (normalized.includes(seg)) return `path contains "${seg}"`;
64
+ }
65
+ for (const ext of PROTECTED_EXTENSIONS) {
66
+ if (normalized.endsWith(ext)) return `file extension "${ext}"`;
67
+ }
68
+ for (const name of PROTECTED_FILENAMES) {
69
+ if (filename === name) return `protected file "${name}"`;
70
+ }
71
+ return null;
72
+ }
73
+
74
+ // ─── Shell tokenizer ────────────────────────────────────────────────────────
35
75
 
36
76
  function tokenizeShell(command: string): string[] {
37
77
  const tokens: string[] = [];
@@ -122,145 +162,588 @@ function isExistingRegularFile(target: string, cwd: string): boolean {
122
162
  }
123
163
  }
124
164
 
125
- function collectDangerousReasons(command: string, cwd: string): string[] {
165
+ // ─── Bash danger analysis ───────────────────────────────────────────────────
166
+
167
+ interface BashDanger {
168
+ reason: string;
169
+ /** Whether the danger involves a protected (sensitive) path */
170
+ protectedPath?: string;
171
+ }
172
+
173
+ function collectBashDangers(command: string, cwd: string): BashDanger[] {
126
174
  const tokens = tokenizeShell(command);
127
- const reasons: string[] = [];
175
+ const dangers: BashDanger[] = [];
128
176
  const seen = new Set<string>();
129
177
 
130
- const addReason = (reason: string) => {
178
+ const addDanger = (reason: string, protectedPath?: string) => {
131
179
  if (seen.has(reason)) return;
132
180
  seen.add(reason);
133
- reasons.push(reason);
181
+ dangers.push({ reason, protectedPath });
134
182
  };
135
183
 
136
184
  for (let i = 0; i < tokens.length; i++) {
137
185
  const token = tokens[i]!;
138
186
  if (SHELL_SEGMENT_BREAKS.has(token)) continue;
139
187
 
188
+ // ── Dangerous commands ──
140
189
  for (const [cmd, subs] of DANGEROUS_COMMANDS) {
141
190
  const name = token.split("/").pop() ?? token;
142
191
  if (name !== cmd && name !== `${cmd}.exe`) continue;
143
192
  if (subs.length === 0) {
144
- addReason(`"${cmd}" is a dangerous command`);
193
+ addDanger(`"${cmd}" is a dangerous command`);
145
194
  break;
146
195
  }
147
196
  const next = tokens[i + 1];
148
197
  if (next && subs.includes(next)) {
149
- addReason(`"${cmd} ${next}" is a dangerous command`);
198
+ addDanger(`"${cmd} ${next}" is a dangerous command`);
150
199
  break;
151
200
  }
152
201
  }
153
202
 
154
- if (SHELL_REDIRECT_OPERATORS.has(token)) {
203
+ // ── Overwrite redirect (>) ──
204
+ if (SHELL_REDIRECT_OVERWRITE.has(token)) {
155
205
  const target = tokens[i + 1];
156
206
  if (target && isExistingRegularFile(target, cwd)) {
157
- addReason(`shell redirection would write to existing file "${target}"`);
207
+ const prot = checkProtectedPath(target);
208
+ if (prot) {
209
+ addDanger(
210
+ `shell redirection would overwrite existing file "${target}"\n Sensitive: ${prot}, may contain sensitive information`,
211
+ prot,
212
+ );
213
+ } else {
214
+ addDanger(`shell redirection would overwrite existing file "${target}"`);
215
+ }
158
216
  }
159
217
  continue;
160
218
  }
161
219
 
162
- const name = token.split("/").pop() ?? token;
163
- if (name !== "tee" && name !== "tee.exe") continue;
220
+ // ── Read commands on protected paths ──
221
+ const cmdName = token.split("/").pop() ?? token;
222
+ if (READ_COMMANDS.has(cmdName) || READ_COMMANDS.has(`${cmdName}.exe`)) {
223
+ for (let j = i + 1; j < tokens.length; j++) {
224
+ const next = tokens[j]!;
225
+ if (SHELL_SEGMENT_BREAKS.has(next)) break;
226
+ if (next.startsWith("-")) continue;
227
+ if (next.includes("/") || next.startsWith(".") || isExistingRegularFile(next, cwd)) {
228
+ const prot = checkProtectedPath(next);
229
+ if (prot) {
230
+ addDanger(
231
+ `"${cmdName}" reads protected file "${next}"\n Sensitive: ${prot}, may contain sensitive information`,
232
+ prot,
233
+ );
234
+ }
235
+ }
236
+ }
237
+ }
164
238
 
165
- for (let j = i + 1; j < tokens.length; j++) {
166
- const next = tokens[j]!;
167
- if (SHELL_SEGMENT_BREAKS.has(next)) break;
168
- if (next === "-a" || next === "--append") continue;
169
- if (next.startsWith("-")) continue;
170
- if (isExistingRegularFile(next, cwd)) {
171
- addReason(`"tee" would write to existing file "${next}"`);
239
+ // ── tee writes to existing files ──
240
+ if (cmdName === "tee" || cmdName === "tee.exe") {
241
+ for (let j = i + 1; j < tokens.length; j++) {
242
+ const next = tokens[j]!;
243
+ if (SHELL_SEGMENT_BREAKS.has(next)) break;
244
+ if (next === "-a" || next === "--append") continue;
245
+ if (next.startsWith("-")) continue;
246
+ if (isExistingRegularFile(next, cwd)) {
247
+ const prot = checkProtectedPath(next);
248
+ if (prot) {
249
+ addDanger(
250
+ `"tee" would write to existing file "${next}"\n Sensitive: ${prot}, may contain sensitive information`,
251
+ prot,
252
+ );
253
+ } else {
254
+ addDanger(`"tee" would write to existing file "${next}"`);
255
+ }
256
+ }
172
257
  }
173
258
  }
174
259
  }
175
260
 
176
- return reasons;
261
+ return dangers;
177
262
  }
178
263
 
179
- function formatDangerousReasons(reasons: string[]): string | null {
180
- if (reasons.length === 0) return null;
181
- if (reasons.length === 1) return reasons[0]!;
182
- return `dangerous operations detected:\n- ${reasons.join("\n- ")}`;
264
+ function formatBashDangers(dangers: BashDanger[]): string | null {
265
+ if (dangers.length === 0) return null;
266
+ if (dangers.length === 1) return dangers[0]!.reason;
267
+ return `dangerous operations detected:\n- ${dangers.map(d => d.reason).join("\n- ")}`;
268
+ }
269
+
270
+ // ─── Secret Detection — Entropy + Pattern ────────────────────────────────────
271
+ //
272
+ // Based on opencode-secrets-protect by Jared Scheel
273
+ // https://github.com/jscheel/opencode-secrets-protect (MIT License)
274
+ //
275
+ // Detection pipeline: High-confidence patterns (40+ known formats)
276
+ // → Low-confidence patterns (generic assignments, context-checked)
277
+ // → Adjusted Shannon Entropy v3+Dict (unknown formats)
278
+ // → Safe pattern exclusion (reduce false positives)
279
+ //
280
+ // Entropy v3+Dict formula:
281
+ // adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
282
+ //
283
+ // - baseShannon: Claude E. Shannon's 1948 "A Mathematical Theory of Communication"
284
+ // - trigramDensity: 3-char sliding window scores class transitions:
285
+ // • Letter↔Digit (digit in first 2 positions) → 1.0
286
+ // • Contains '-' with ≥3 classes → 1.0
287
+ // • AbA pattern (≥2 uppercase + lowercase) → 0.8
288
+ // X-class chars (not letter/digit/dash) split segments independently
289
+ // - wordRatio: vowel-containing lowercase fragments penalize secret likelihood
290
+ // - dictRatio: dictionary word coverage penalizes identifiers/English text
291
+ // - hexPenalty: -2.5 only if >90% hex AND contains '-' (UUID-like format)
292
+
293
+ type ToolTextContent = Extract<NonNullable<ToolResultEvent["content"]>[number], { type: "text" }>;
294
+
295
+ // ── Entropy Analysis v3+Dict ─────────────────────────────────────────────────
296
+ //
297
+ // Based on opencode-secrets-protect by Jared Scheel
298
+ // https://github.com/jscheel/opencode-secrets-protect (MIT License)
299
+ //
300
+ // Detection approach:
301
+ // 1. Split content by whitespace + code punctuation
302
+ // 2. For each token ≥ 16 chars, compute adjusted entropy:
303
+ // adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
304
+ // 3. Trigram density uses a 3-character sliding window:
305
+ // - AbA pattern (≥2 uppercase) → 0.8
306
+ // - Letter↔Digit (digit in first 2 positions) → 1.0
307
+ // - Contains '-' with ≥3 classes → 1.0
308
+ // X-class chars split the token into independent segments;
309
+ // the segment with the highest density is used.
310
+ // 4. wordRatio: ratio of vowel-containing lowercase fragments ≥3 chars
311
+ // 5. dictRatio: ratio of dictionary word coverage (2121 English + tech words)
312
+ // 6. hexPenalty: -2.5 only if >90% hex AND contains '-' (UUID-like format)
313
+
314
+ /** Character class: U=uppercase, L=lowercase, D=digit, S=dash, X=other */
315
+ function charClass(c: string): "U" | "L" | "D" | "S" | "X" {
316
+ const code = c.charCodeAt(0);
317
+ if (code >= 65 && code <= 90) return "U";
318
+ if (code >= 97 && code <= 122) return "L";
319
+ if (code >= 48 && code <= 57) return "D";
320
+ if (c === "-") return "S";
321
+ return "X";
183
322
  }
184
323
 
185
- function checkDangerous(command: string, cwd: string): string | null {
186
- return formatDangerousReasons(collectDangerousReasons(command, cwd));
324
+ /**
325
+ * Shannon entropy: measures average information content per character.
326
+ * H(X) = -Σ p(x) · log₂(p(x))
327
+ */
328
+ function shannonEntropy(data: string): number {
329
+ if (data.length === 0) return 0;
330
+ const freq = new Map<string, number>();
331
+ for (const char of data) {
332
+ freq.set(char, (freq.get(char) ?? 0) + 1);
333
+ }
334
+ let entropy = 0;
335
+ const len = data.length;
336
+ for (const count of freq.values()) {
337
+ const p = count / len;
338
+ entropy -= p * Math.log2(p);
339
+ }
340
+ return entropy;
187
341
  }
188
342
 
189
- // ─── Protected Paths ────────────────────────────────────────────────────────
343
+ /**
344
+ * Trigram (3-character sliding window) scoring.
345
+ * Rules (user-specified):
346
+ * - Pure digits → 0
347
+ * - Letter↔Digit switch (digit in first 2 positions, e.g. 4Vi, K9m, a9t) → 1.0
348
+ * - Contains '-' with ≥3 distinct classes → 1.0
349
+ * - Case switch AbA pattern (≥2 uppercase + ≥1 lowercase) → 0.8
350
+ * - Otherwise → 0
351
+ */
352
+ function trigramScore(c1: string, c2: string, c3: string): number {
353
+ const cls: string[] = [charClass(c1), charClass(c2), charClass(c3)];
190
354
 
191
- const PROTECTED_PATH_SEGMENTS = [
192
- ".env", ".git/", "node_modules/", ".ssh/",
193
- ".gnupg/", ".aws/", "secrets/", ".docker/",
194
- ];
195
- const PROTECTED_EXTENSIONS = [".pem", ".key", ".p12", ".pfx", ".keystore"];
196
- const PROTECTED_FILENAMES = [
197
- "id_rsa", "id_ed25519", "id_ecdsa",
198
- "authorized_keys", "known_hosts",
199
- ".env.local", ".env.production",
200
- ];
355
+ // Any X-class character → skip
356
+ if (cls.includes("X")) return 0;
201
357
 
202
- function checkProtectedPath(filePath: string): string | null {
203
- const normalized = filePath.replace(/\\/g, "/");
204
- const filename = normalized.split("/").pop() ?? "";
205
- for (const seg of PROTECTED_PATH_SEGMENTS) {
206
- if (normalized.includes(seg)) return `path contains "${seg}"`;
358
+ const unique = new Set(cls);
359
+
360
+ // Pure digits 0
361
+ if (unique.size === 1 && cls[0] === "D") return 0;
362
+
363
+ // Contains '-' (S-class) with ≥3 distinct classes → 1.0
364
+ if (cls.includes("S") && unique.size >= 3) return 1.0;
365
+
366
+ // Letter↔Digit: digit must be in first 2 positions
367
+ const hasDigit = cls.includes("D");
368
+ const hasLetter = cls.includes("L") || cls.includes("U");
369
+ if (hasDigit && hasLetter && (cls[0] === "D" || cls[1] === "D")) return 1.0;
370
+
371
+ // AbA pattern: ≥2 uppercase + ≥1 lowercase (e.g. KeA, but not API)
372
+ const uCount = cls.filter(c => c === "U").length;
373
+ const lCount = cls.filter(c => c === "L").length;
374
+ if (uCount >= 2 && lCount >= 1) return 0.8;
375
+
376
+ return 0;
377
+ }
378
+
379
+ /**
380
+ * Split a token by X-class characters into independent segments.
381
+ * This prevents `://`, `@`, `.` etc. from diluting trigram density.
382
+ */
383
+ function splitByXClass(token: string): string[] {
384
+ const segments: string[] = [];
385
+ let current = "";
386
+ for (const c of token) {
387
+ if (charClass(c) === "X") {
388
+ if (current.length >= 3) segments.push(current);
389
+ current = "";
390
+ } else {
391
+ current += c;
392
+ }
207
393
  }
208
- for (const ext of PROTECTED_EXTENSIONS) {
209
- if (normalized.endsWith(ext)) return `file extension "${ext}"`;
394
+ if (current.length >= 3) segments.push(current);
395
+ return segments;
396
+ }
397
+
398
+ /**
399
+ * Compute average trigram density for a single segment.
400
+ */
401
+ function segmentDensity(segment: string): number {
402
+ if (segment.length < 3) return 0;
403
+ let totalScore = 0;
404
+ for (let i = 0; i <= segment.length - 3; i++) {
405
+ totalScore += trigramScore(segment[i]!, segment[i + 1]!, segment[i + 2]!);
210
406
  }
211
- for (const name of PROTECTED_FILENAMES) {
212
- if (filename === name) return `protected file "${name}"`;
407
+ return totalScore / (segment.length - 2);
408
+ }
409
+
410
+ /**
411
+ * Compute the maximum segment density across all X-split segments.
412
+ * The segment with the highest density is the most likely secret region.
413
+ */
414
+ function maxSegmentDensity(token: string): number {
415
+ const segments = splitByXClass(token);
416
+ if (segments.length === 0) return 0;
417
+ let maxD = 0;
418
+ for (const seg of segments) {
419
+ const d = segmentDensity(seg);
420
+ if (d > maxD) maxD = d;
213
421
  }
214
- return null;
422
+ return maxD;
215
423
  }
216
424
 
217
- // ─── Secret Redact ──────────────────────────────────────────────────────────
425
+ /**
426
+ * Word ratio: fraction of token that consists of vowel-containing
427
+ * lowercase fragments ≥3 characters. Natural language words reduce
428
+ * the likelihood of being a secret.
429
+ */
430
+ function computeWordRatio(token: string): number {
431
+ // Split by class boundaries
432
+ const segments: string[] = [];
433
+ let current = "";
434
+ let prevClass = "";
435
+ for (const c of token) {
436
+ const cls = charClass(c);
437
+ if (cls === "X") {
438
+ if (current.length > 0) { segments.push(current); current = ""; }
439
+ prevClass = "";
440
+ continue;
441
+ }
442
+ if (cls !== prevClass && current.length > 0) {
443
+ segments.push(current);
444
+ current = "";
445
+ }
446
+ current += c;
447
+ prevClass = cls;
448
+ }
449
+ if (current.length > 0) segments.push(current);
218
450
 
219
- import { createEngine } from "@secretlint/node";
451
+ let wordLen = 0;
452
+ for (const seg of segments) {
453
+ if (seg.length >= 3 && /^[a-z]+$/.test(seg)) {
454
+ if (/[aeiou]/.test(seg)) wordLen += seg.length;
455
+ }
456
+ }
457
+ return token.length > 0 ? wordLen / token.length : 0;
458
+ }
220
459
 
221
- type SecretLintEngine = Awaited<ReturnType<typeof createEngine>>;
222
- type ToolTextContent = Extract<NonNullable<ToolResultEvent["content"]>[number], { type: "text" }>;
460
+ /**
461
+ * Hex ratio: fraction of characters that are hex characters (0-9, a-f, A-F, -).
462
+ * Values >0.9 indicate UUIDs or hex hashes which are safe.
463
+ */
464
+ function computeHexRatio(token: string): number {
465
+ let hexChars = 0;
466
+ for (const c of token) {
467
+ if (/[0-9a-fA-F\-]/.test(c)) hexChars++;
468
+ }
469
+ return token.length > 0 ? hexChars / token.length : 0;
470
+ }
223
471
 
224
- let engine: SecretLintEngine | null = null;
472
+ // ── Dictionary Words for Secret Detection ─────────────────────────────────────
473
+ //
474
+ // Based on Google 10K most common English words (len >= 4)
475
+ // + top 500 most common words (len >= 3)
476
+ // + ~80 common tech abbreviations that appear in code identifiers.
477
+ // Used by computeDictRatio() to penalize tokens containing known words.
478
+ //
479
+ // Word list source: https://github.com/first20hours/google-10000-english (public domain)
225
480
 
226
- function maskSecret(text: string): string {
227
- if (text.length <= 8) return "********";
228
- return text.slice(0, 4) + "********" + text.slice(-4);
481
+ const DICT_WORDS: ReadonlySet<string> = new Set(
482
+ // prettier-ignore
483
+ JSON.parse(`["ability","able","about","above","abstract","abuse","academic","accept","acceptance","accepted","access","accessories","accommodation","according","account","accounting","accounts","across","action","actions","active","activities","activity","actual","actually","added","addition","additional","address","adm","admin","administration","administrative","adult","advance","advanced","adventure","advertise","advertisement","advertising","advice","aes","affairs","affiliate","affiliates","africa","african","after","again","against","agencies","agency","agent","agents","agree","agreement","airport","album","allow","allowed","allows","almost","alone","along","already","also","alternative","although","always","amateur","amazon","america","american","among","amount","analysis","angeles","animal","animals","announcements","annual","another","answer","answers","anti","anyone","anything","apartments","api","apparel","appear","apple","application","applications","applied","apply","approach","appropriate","approval","approved","approximately","april","architecture","archive","archives","area","areas","argument","arizona","army","around","article","articles","artist","artists","arts","asia","asian","asked","assessment","assistance","assistant","associated","associates","association","attack","attention","attorney","auction","auctions","audio","august","australia","australian","auth","author","authority","authors","auto","automatically","automotive","availability","available","avenue","average","avg","avoid","award","awards","away","baby","back","background","balance","ball","band","bank","base","baseball","based","basic","basis","basket","battery","beach","beautiful","beauty","became","because","become","been","before","began","begin","beginning","behind","being","believe","below","benefit","benefits","best","better","between","beyond","bible","bill","birth","black","block","blog","blogs","blood","blue","board","boards","body","book","books","born","boston","both","bottom","boys","branch","brand","brands","break","breakfast","breast","bridge","bring","british","brought","brown","browse","browser","btn","budget","buf","build","building","built","bush","business","businesses","button","buyer","buying","cable","calendar","california","call","called","calls","came","camera","cameras","camp","campaign","campus","canada","canadian","cancer","canon","capacity","capital","card","cards","care","career","careers","carolina","cars","cart","case","cases","cash","casino","catalog","categories","category","cause","cb","cell","cells","center","centers","central","centre","century","certain","certificate","certified","cfg","chain","chair","challenge","chance","change","changed","changes","channel","chapter","character","characters","charge","charges","charles","chart","chat","cheap","check","chemical","chicago","chief","child","children","china","chinese","choice","choose","chris","christian","christmas","church","cities","city","civil","claim","claims","class","classes","classic","classifieds","clean","clear","cli","click","client","clients","clinical","close","closed","clothing","club","clubs","cnet","cnt","coast","code","codes","coffee","col","cold","collection","college","color","colorado","columbia","column","come","comes","coming","command","comment","comments","commerce","commercial","commission","committee","common","communication","communications","communities","community","companies","company","compare","compared","comparison","competition","complete","completed","complex","compliance","component","components","comprehensive","computer","computers","computing","condition","conditions","conference","configuration","congress","connect","connection","consider","considered","construction","consumer","contact","contacts","contains","content","contents","context","continue","continued","contract","control","cool","copy","copyright","core","corner","corporate","corporation","correct","cost","costs","could","council","count","counter","countries","country","county","couple","course","courses","court","cover","coverage","covered","cpu","create","created","creating","creative","credit","creek","crime","critical","cross","crud","css","csv","cultural","culture","currency","current","currently","custom","customer","customers","daily","damage","dance","dark","data","database","date","dates","dating","david","days","db","dead","deal","deals","death","debt","december","decision","deep","default","defense","define","defined","definition","degree","delivery","demand","department","described","description","design","designated","designed","desktop","detail","detailed","details","determine","determined","dev","develop","developed","developer","developing","development","device","devices","diamond","dictionary","died","diet","difference","different","difficult","digital","dir","direct","directions","directly","director","directory","disclaimer","discount","discuss","discussion","disease","disp","display","distance","distribution","district","division","dlg","dns","doctor","document","documentation","documents","does","doing","dollar","dollars","domain","domestic","done","door","double","down","download","downloads","draft","drive","driver","driving","drop","drug","drugs","dst","during","dvds","each","early","earth","easily","east","eastern","easy","ebay","economic","economy","edge","edit","edition","editor","education","educational","effect","effective","effects","effort","efforts","either","election","electric","electronic","electronics","element","elements","else","email","emergency","emit","employee","employees","employment","enable","ending","energy","engine","engineering","england","english","enjoy","enough","ensure","enter","enterprise","entertainment","entire","entries","entry","env","environment","environmental","equal","equipment","err","error","errors","especially","essential","established","estate","europe","european","evaluation","even","event","events","ever","every","everyone","everything","evidence","evt","example","examples","excellent","except","exchange","executive","exercise","existing","expect","expected","experience","expert","express","ext","extended","extension","external","extra","eyes","face","facilities","facility","fact","factor","factors","facts","faculty","failure","fair","faith","fall","families","family","fantasy","farm","fashion","fast","father","favorite","feat","feature","featured","features","february","federal","feed","feedback","feel","fees","feet","female","fiction","field","fields","figure","file","files","fill","film","films","filter","final","finally","finance","financial","find","finding","fine","fire","firm","first","fish","fishing","fitness","five","fixed","fixme","flag","flash","flat","flight","floor","florida","flow","flowers","focus","follow","following","follows","font","food","foot","football","force","ford","foreign","forest","form","format","former","forms","forum","forums","forward","found","foundation","four","frame","france","francisco","free","freedom","french","fresh","friday","friend","friendly","friends","from","front","ftr","fuel","full","fully","function","functional","functions","fund","funding","funds","furniture","further","future","galleries","gallery","game","games","gamma","garden","gave","gear","general","generally","generated","generation","george","georgia","german","germany","gets","getting","gid","gift","gifts","girl","girls","git","give","given","gives","giving","glass","global","goal","goals","goes","going","gold","golden","golf","gone","good","goods","google","government","gpt","gpu","grade","graduate","grand","grant","graphics","great","greater","green","ground","group","groups","growing","growth","grp","guarantee","guest","gui","guide","guidelines","guides","guitar","guys","hack","hair","half","hall","hand","hands","happy","hard","hardware","have","having","hdr","head","headlines","health","hear","heard","hearing","heart","heat","heavy","held","help","helpful","here","high","higher","highest","highly","hill","himself","hire","historical","history","hits","hold","holiday","holidays","home","homepage","homes","hook","hope","horse","hospital","host","hosting","hotel","hotels","hour","hours","house","housing","houston","however","html","huge","human","icon","idea","ideas","identify","idx","illinois","image","images","img","immediately","impact","implementation","important","improve","improvement","inch","include","included","includes","including","income","increase","increased","independent","index","india","indian","individual","individuals","industrial","industry","info","information","informed","initial","input","inside","install","installation","instead","institute","institutions","instructions","instruments","insurance","int","integrated","intended","interactive","interest","interested","interesting","interests","interface","internal","international","internet","into","introduction","investment","involved","ipod","iraq","ireland","isbn","island","islands","israel","issue","issues","italian","italy","item","items","itself","jack","jackson","james","january","japan","japanese","java","jersey","jesus","jewelry","jobs","john","johnson","join","joined","joint","jones","journal","json","july","jump","june","just","justice","kansas","keep","key","keyword","keywords","kids","kind","kinds","king","kingdom","kitchen","know","knowledge","known","kong","label","labor","lake","lan","land","language","languages","large","larger","largest","last","late","later","latest","latin","laws","lead","leader","leaders","leadership","leading","league","learn","learning","least","leather","leave","left","legal","len","length","lesbian","less","letter","letters","level","levels","lib","library","license","life","light","like","likely","limit","limited","line","lines","link","links","linux","list","listed","listen","listing","listings","lists","literature","little","live","lives","living","llm","load","loan","loans","local","located","location","locations","login","logo","london","long","longer","look","looking","looks","lord","loss","lost","lots","louis","love","lower","lowest","lyrics","mac","machine","machines","made","magazine","magazines","magic","mail","mailing","main","maintenance","major","make","makes","making","male","manage","management","manager","manual","manufacturer","manufacturing","many","maps","march","marine","mark","market","marketing","markets","martin","mary","mass","master","match","matching","material","materials","matter","mature","max","maximum","maybe","mean","means","measures","media","medical","medicine","medium","meet","meeting","meetings","mega","member","members","membership","memory","mental","menu","merchant","message","messages","metal","method","methods","mexico","michael","michigan","micro","microsoft","middle","might","mike","miles","military","million","min","mind","mini","minimum","minister","minnesota","minute","minutes","miss","missing","mission","mobile","mock","mod","mode","model","models","modern","modified","module","moment","monday","money","monitor","monitoring","month","monthly","months","more","morning","mortgage","most","mother","motion","motor","motorola","mount","mountain","move","moved","movement","movie","movies","moving","msg","much","multi","multimedia","multiple","museum","music","musical","must","myself","naked","name","names","nano","nation","national","native","natural","nature","nav","navigation","near","necessary","need","needed","needs","net","network","networking","networks","never","news","newsletter","next","nice","night","nlp","nokia","none","normal","north","northern","note","notes","nothing","notice","november","npm","num","number","numbers","nursing","oauth","object","october","offer","offered","offering","offers","office","officer","official","often","ohio","older","once","ones","online","only","ontario","open","opening","operating","operation","operations","opinion","opportunities","opportunity","ops","option","optional","options","oral","orange","order","orders","oregon","organization","organizations","original","orm","oss","other","others","otherwise","outdoor","output","outside","over","overall","overview","owned","owner","owners","pacific","pack","package","packages","page","pages","paid","pain","palm","panel","paper","paperback","papers","parent","parents","paris","park","parking","part","particular","particularly","parties","partner","partners","parts","party","pass","password","past","patch","path","patient","patients","paul","payment","paypal","peace","pennsylvania","people","percent","perfect","performance","perhaps","period","perm","permission","person","personal","persons","peter","phase","phentermine","phone","phones","photo","photography","photos","physical","pick","pics","picture","pictures","pid","piece","pink","pip","pipe","pkg","place","placed","places","plan","planning","plans","plant","plants","plastic","platform","play","played","player","players","playing","please","plus","pocket","point","points","poker","pol","police","policies","policy","political","politics","pool","poor","pop","popular","population","port","pos","position","positive","possible","post","posted","poster","posters","posts","potential","power","powered","practice","practices","premium","present","presentation","presented","president","press","pressure","pretty","prev","prevent","previous","price","prices","pricing","primary","prime","print","printer","printing","prior","privacy","private","pro","probably","problem","problems","procedure","procedures","process","processes","processing","prod","produce","produced","product","production","products","professional","professor","profile","profit","program","programme","programming","programs","progress","project","projects","properties","property","proposed","protect","protection","protein","provide","provided","provider","providers","provides","providing","ptr","public","publication","publications","published","publisher","publishing","purchase","purpose","purposes","quality","quantity","quarter","question","questions","quick","quickly","quite","quote","quotes","race","racing","radio","ram","random","range","rank","rate","rated","rates","rather","rating","ratings","reach","read","reader","readers","reading","ready","real","really","reason","reasons","receive","received","recent","recently","recipes","recommend","recommendations","recommended","record","records","recovery","reduce","ref","reference","references","regarding","region","regional","register","registered","registration","regular","regulations","related","relations","relationship","release","released","releases","relevant","religion","religious","remember","remote","remove","rent","rental","rentals","repair","replies","reply","report","reported","reporting","reports","republic","req","request","requests","require","required","requirements","requires","res","research","reserve","reserved","resolution","resort","resource","resources","respect","respective","response","responsibility","responsible","rest","restaurant","restaurants","result","results","retail","return","returns","rev","review","reviews","rich","richard","right","rights","ring","ringtones","risk","river","road","robert","rock","rol","role","room","rooms","root","rose","round","row","royal","rsa","rule","rules","running","russia","russian","safe","safety","said","saint","sale","sales","same","sample","samsung","santa","satellite","saturday","save","saying","says","scale","schedule","school","schools","science","sciences","scientific","score","scott","screen","sdk","search","searches","season","seattle","second","seconds","secretary","section","sections","sector","secure","security","seem","seems","seen","select","selected","selection","self","sell","seller","sellers","selling","send","senior","sense","sent","separate","september","sequence","series","serious","serve","server","servers","service","services","session","sets","setting","settings","seven","several","sha","shall","share","sheet","ship","shipping","ships","shirt","shirts","shoes","shop","shopping","shops","short","shot","should","show","showing","shown","shows","sid","side","sign","signed","significant","silver","similar","simple","simply","since","single","site","sitemap","sites","situation","size","skills","skin","skip","small","smart","smith","snow","social","society","soft","software","sold","solid","solution","solutions","some","someone","something","sometimes","song","songs","sony","soon","sorry","sort","sorted","sound","source","sources","south","southern","space","spain","spanish","special","species","specific","specified","speed","spirit","sponsored","sport","sports","spring","sql","square","src","sre","ssd","ssh","ssl","staff","stage","stand","standard","standards","star","stars","start","started","starting","state","statement","statements","states","station","statistics","status","stay","steel","step","steps","steve","still","stock","stone","stop","storage","store","stores","stories","story","str","strategies","strategy","stream","street","string","strong","structure","stub","student","students","studies","studio","study","stuff","style","subject","subjects","submit","submitted","subs","subscribe","success","successful","such","suggest","suite","sum","summary","summer","sunday","super","supplies","supply","support","supported","sure","surface","surgery","survey","switch","system","systems","tab","table","tables","tag","tags","take","taken","takes","taking","talk","talking","target","task","tcp","teacher","teachers","teaching","team","tech","technical","techniques","technologies","technology","teen","teens","telephone","television","tell","temp","temperature","term","terms","test","testing","tests","texas","text","than","thank","thanks","that","their","them","theme","themselves","then","theory","therapy","there","therefore","these","they","thing","things","think","thinking","third","this","thomas","those","though","thought","thoughts","thousands","thread","three","through","throughout","thursday","thus","tickets","tid","time","times","tip","tips","title","titles","tls","tmp","today","todo","together","told","took","tool","tools","topic","topics","total","touch","tour","tours","towards","town","toys","track","trade","trademarks","trading","traditional","traffic","training","transfer","transport","transportation","travel","treatment","tree","trial","trip","true","trust","truth","trying","tuesday","turn","type","types","udp","uid","under","understand","understanding","union","unique","unit","united","units","universal","university","unknown","unless","until","update","updated","updates","upgrade","upon","upper","urban","url","used","useful","user","username","users","uses","using","usr","usually","vacation","val","valid","valley","value","values","variable","variety","various","vegas","vehicle","vehicles","ver","version","very","video","videos","view","viewed","views","village","virginia","virtual","virus","vision","visit","visitors","visual","voice","volume","vote","vpn","wait","walk","wall","wan","want","wanted","warning","washington","waste","watch","watches","water","ways","weather","website","websites","wedding","wednesday","week","weekend","weekly","weeks","weight","welcome","well","went","were","west","western","what","when","where","whether","which","while","white","whole","wholesale","whose","wide","wife","wild","will","william","williams","wind","window","windows","wine","winter","wireless","wish","with","within","without","woman","women","wood","word","words","work","worked","workers","working","works","workshop","world","worldwide","worth","would","write","writing","written","wrong","wrote","xbox","xml","yahoo","yaml","year","years","yellow","yesterday","york","young","your","yourself","youth","zealand","zone"]`)
484
+ );
485
+
486
+ /**
487
+ * Dictionary word ratio: fraction of token characters covered by dictionary words.
488
+ *
489
+ * Extracts lowercase letter sequences from the token, then greedily matches
490
+ * the longest dictionary word at each position. Returns matched character
491
+ * count / token length.
492
+ *
493
+ * "devstral-small-2" → finds "dev", "str", "small" → covers 11/16 chars
494
+ * "aB3xK9mPqR7wN" → no words found → dictRatio = 0
495
+ */
496
+ function computeDictRatio(token: string): number {
497
+ // Extract lowercase letter sequences (>= 3 chars)
498
+ const lowerSeqs: string[] = [];
499
+ let current = "";
500
+ for (const c of token) {
501
+ if (/[a-z]/.test(c)) {
502
+ current += c;
503
+ } else {
504
+ if (current.length >= 3) lowerSeqs.push(current);
505
+ current = "";
506
+ }
507
+ }
508
+ if (current.length >= 3) lowerSeqs.push(current);
509
+
510
+ if (lowerSeqs.length === 0) return 0;
511
+
512
+ // Greedy match: find longest word at each position, then skip past it
513
+ let matchedChars = 0;
514
+ for (const seq of lowerSeqs) {
515
+ let pos = 0;
516
+ while (pos < seq.length) {
517
+ let longestMatch = 0;
518
+ for (let end = seq.length; end > pos; end--) {
519
+ if (DICT_WORDS.has(seq.slice(pos, end))) {
520
+ longestMatch = end - pos;
521
+ break;
522
+ }
523
+ }
524
+ if (longestMatch > 0) {
525
+ matchedChars += longestMatch;
526
+ pos += longestMatch;
527
+ } else {
528
+ pos++;
529
+ }
530
+ }
531
+ }
532
+
533
+ return token.length > 0 ? matchedChars / token.length : 0;
229
534
  }
230
535
 
231
- async function ensureEngine(): Promise<SecretLintEngine> {
232
- if (!engine) {
233
- engine = await createEngine({
234
- formatter: "json",
235
- color: false,
236
- maskSecrets: false,
237
- configFileJSON: {
238
- rules: [
239
- { id: "@secretlint/secretlint-rule-preset-recommend" },
240
- { id: "@secretlint/secretlint-rule-azure" },
241
- { id: "@secretlint/secretlint-rule-secp256k1-privatekey" },
242
- ],
243
- },
244
- });
536
+ // ── Entropy Constants ────────────────────────────────────────────────────────
537
+
538
+ const ENTROPY_THRESHOLD = 5.5;
539
+ const MIN_ENTROPY_TOKEN_LENGTH = 16;
540
+ const W1_DENSITY = 3.0; // trigram density weight
541
+ const W2_WORD = 3.0; // vowel-word penalty weight
542
+ const W3_DICT = 4.0; // dictionary word penalty weight
543
+ const HEX_PENALTY = 2.5; // penalty for >90% hex chars
544
+ const HEX_RATIO_THRESHOLD = 0.9;
545
+
546
+ /**
547
+ * Adjusted entropy v3+Dict:
548
+ * adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
549
+ */
550
+ function calculateAdjustedEntropy(data: string): number {
551
+ const base = shannonEntropy(data);
552
+ const density = maxSegmentDensity(data);
553
+ const wordRatio = computeWordRatio(data);
554
+ const dictRatio = computeDictRatio(data);
555
+ const hexRatio = computeHexRatio(data);
556
+
557
+ const densityBoost = density * W1_DENSITY;
558
+ const wordPenalty = wordRatio * W2_WORD;
559
+ const dictPenalty = dictRatio * W3_DICT;
560
+ // Hex penalty: only for hyphenated UUID-like tokens (>90% hex AND contains -)
561
+ // Pure hex strings without hyphens might be real secrets (not UUIDs/SHAs)
562
+ const hp = (hexRatio > HEX_RATIO_THRESHOLD && data.includes("-")) ? HEX_PENALTY : 0;
563
+ return base + densityBoost - wordPenalty - dictPenalty - hp;
564
+ }
565
+
566
+ function isHighEntropy(data: string): boolean {
567
+ if (data.length < MIN_ENTROPY_TOKEN_LENGTH) return false;
568
+ if (isSafeContent(data)) return false;
569
+ return calculateAdjustedEntropy(data) > ENTROPY_THRESHOLD;
570
+ }
571
+
572
+ /**
573
+ * Split by whitespace only — the most conservative tokenization.
574
+ * This preserves JSON structure, URLs, and connection strings.
575
+ */
576
+ function findHighEntropyTokens(content: string): string[] {
577
+ const tokens = content.split(/[\s\[\]{}"',\/\\|()&#@!<>?]+/);
578
+ return tokens.filter(t => t.length >= MIN_ENTROPY_TOKEN_LENGTH && isHighEntropy(t));
579
+ }
580
+
581
+ // ── Known Secret Patterns ────────────────────────────────────────────────────
582
+
583
+ interface SecretPattern {
584
+ name: string;
585
+ pattern: RegExp;
586
+ minLength: number;
587
+ allowsSpaces: boolean;
588
+ /** If true, skip safe-pattern exclusion (unambiguous prefix) */
589
+ highConfidence: boolean;
590
+ }
591
+
592
+ const SECRET_PATTERNS: SecretPattern[] = [
593
+ // AWS
594
+ { name: "AWS Access Key ID", pattern: /AKIA[0-9A-Z]{16}/, minLength: 16, allowsSpaces: false, highConfidence: true },
595
+ { name: "AWS Secret Access Key", pattern: /(?:aws)?_?(?:secret)?_?(?:access)?_?key['"\s:=]+['"]?[0-9a-zA-Z/+]{40}['"]?/i, minLength: 30, allowsSpaces: false, highConfidence: true },
596
+ // GitHub
597
+ { name: "GitHub OAuth Token", pattern: /gho_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
598
+ { name: "GitHub App Token", pattern: /(?:ghu|ghs)_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
599
+ { name: "GitHub PAT", pattern: /ghp_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
600
+ { name: "GitHub Fine-Grained Token", pattern: /github_pat_[0-9a-zA-Z_]{22,}/, minLength: 26, allowsSpaces: false, highConfidence: true },
601
+ // GitLab
602
+ { name: "GitLab PAT", pattern: /glpat-[0-9a-zA-Z\-_]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
603
+ { name: "GitLab Runner Token", pattern: /glrt-[0-9a-zA-Z_\-]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
604
+ // Slack
605
+ { name: "Slack Token", pattern: /xox[baprs]-[0-9a-zA-Z\-]{10,48}/, minLength: 15, allowsSpaces: false, highConfidence: true },
606
+ { name: "Slack Webhook URL", pattern: /https:\/\/hooks\.slack\.com\/services\/T[a-zA-Z0-9_]{8,}\/B[a-zA-Z0-9_]{8,}\/[a-zA-Z0-9_]{24}/, minLength: 60, allowsSpaces: false, highConfidence: true },
607
+ // JWT
608
+ { name: "JSON Web Token", pattern: /eyJ[a-zA-Z0-9_-]{10,}\.eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/, minLength: 36, allowsSpaces: false, highConfidence: true },
609
+ // Google
610
+ { name: "Google API Key", pattern: /AIza[0-9A-Za-z\-_]{35}/, minLength: 35, allowsSpaces: false, highConfidence: true },
611
+ { name: "Google OAuth Token", pattern: /ya29\.[0-9A-Za-z\-_]+/, minLength: 10, allowsSpaces: false, highConfidence: true },
612
+ // Stripe
613
+ { name: "Stripe Secret Key", pattern: /sk_live_[0-9a-zA-Z]{24,}/, minLength: 24, allowsSpaces: false, highConfidence: true },
614
+ { name: "Stripe Restricted Key", pattern: /rk_live_[0-9a-zA-Z]{24,}/, minLength: 24, allowsSpaces: false, highConfidence: true },
615
+ // Twilio / SendGrid / Discord
616
+ { name: "Twilio API Key", pattern: /SK[a-z0-9]{32}/, minLength: 30, allowsSpaces: false, highConfidence: true },
617
+ { name: "SendGrid API Key", pattern: /SG\.[a-zA-Z0-9_-]{22,}\.[a-zA-Z0-9_-]{40,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
618
+ { name: "Discord Bot Token", pattern: /[MN][A-Za-z\d]{23,}\.[\w-]{6}\.[\w-]{27,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
619
+ // OpenAI / Anthropic / Volcengine Ark
620
+ { name: "OpenAI API Key", pattern: /sk-[a-zA-Z0-9]{20,}T3BlbkFJ[a-zA-Z0-9]{20,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
621
+ { name: "OpenAI API Key (New)", pattern: /sk-(?:proj-)?[a-zA-Z0-9\-_]{40,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
622
+ { name: "Anthropic API Key", pattern: /sk-ant-api[0-9]{2}-[a-zA-Z0-9\-_]{80,}/, minLength: 80, allowsSpaces: false, highConfidence: true },
623
+ { name: "Volcengine Ark API Key", pattern: /ark-[a-zA-Z0-9\-_]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
624
+ // NPM / PyPI
625
+ { name: "NPM Token", pattern: /npm_[a-zA-Z0-9]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
626
+ { name: "PyPI Token", pattern: /pypi-[a-zA-Z0-9_\-]{50,}/, minLength: 50, allowsSpaces: false, highConfidence: true },
627
+ // Private Keys
628
+ { name: "RSA Private Key", pattern: /-----BEGIN RSA PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
629
+ { name: "OpenSSH Private Key", pattern: /-----BEGIN OPENSSH PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
630
+ { name: "EC Private Key", pattern: /-----BEGIN EC PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
631
+ { name: "PGP Private Key", pattern: /-----BEGIN PGP PRIVATE KEY BLOCK-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
632
+ { name: "Generic Private Key", pattern: /-----BEGIN (?:ENCRYPTED )?PRIVATE KEY-----/, minLength: 20, allowsSpaces: true, highConfidence: true },
633
+ // Database URIs
634
+ { name: "MongoDB Connection String", pattern: /mongodb(?:\+srv)?:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
635
+ { name: "PostgreSQL Connection String", pattern: /postgres(?:ql)?:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
636
+ { name: "MySQL Connection String", pattern: /mysql:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
637
+ { name: "Redis Connection String", pattern: /redis:\/\/[^\s'"]*:[^\s'"]+@[^\s'"]+/, minLength: 15, allowsSpaces: false, highConfidence: true },
638
+ // URL-embedded passwords
639
+ { name: "Password in URL", pattern: /[a-zA-Z]{3,10}:\/\/[^/\s:@]{3,20}:[^/\s:@]{3,20}@[^\s'"]+/, minLength: 15, allowsSpaces: false, highConfidence: true },
640
+ // Generic assignments (lower confidence — checked against SAFE_PATTERNS)
641
+ { name: "Bearer Token", pattern: /[Bb]earer\s+[a-zA-Z0-9\-._~+/]+=*/, minLength: 15, allowsSpaces: false, highConfidence: false },
642
+ { name: "Basic Auth Header", pattern: /[Bb]asic\s+[a-zA-Z0-9+/]{20,}={0,2}/, minLength: 20, allowsSpaces: false, highConfidence: false },
643
+ { name: "API Key Assignment", pattern: /(?:api[_-]?key|apikey|api[_-]?secret)['"\s:=]+['"]?[a-zA-Z0-9\-._]{20,}['"]?/i, minLength: 20, allowsSpaces: false, highConfidence: false },
644
+ { name: "Secret Assignment", pattern: /(?:secret|token|password|passwd|pwd)['"\s:=]+['"]?[a-zA-Z0-9\-._!@#$%^&*]{8,}['"]?/i, minLength: 12, allowsSpaces: false, highConfidence: false },
645
+ ];
646
+
647
+ // ── Safe Patterns (exclude from detection to reduce false positives) ─────────
648
+
649
+ const SAFE_PATTERNS: RegExp[] = [
650
+ /^https?:\/\/[a-zA-Z0-9.-]+(?:\/[a-zA-Z0-9.\/_\-?&=#%]*)?$/, // URLs without credentials
651
+ /^\.\.?\/[a-zA-Z0-9_\-./]+$/, // Relative file paths
652
+ /^\/[a-zA-Z0-9_\-./]+$/, // Absolute Unix paths
653
+ /^[a-zA-Z]:\\[a-zA-Z0-9_\-\\./]+$/, // Windows paths
654
+ /^[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}$/, // Email addresses
655
+ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/, // UUIDs
656
+ /^v?\d+\.\d+\.\d+(?:-[a-zA-Z0-9.]+)?(?:\+[a-zA-Z0-9.]+)?$/, // Semver
657
+ /^(?:xxx+|your[_-]?(?:api[_-]?)?key|placeholder|example|test|demo|sample)/i, // Placeholders
658
+ /^[0-9a-f]{40}$/i, // Git SHA-1
659
+ /^[0-9a-f]{64}$/i, // SHA-256
660
+ /^@[a-z0-9-]+\/[a-z0-9-]+$/, // npm scoped packages
661
+ ];
662
+
663
+ function isSafeContent(content: string): boolean {
664
+ for (const pat of SAFE_PATTERNS) {
665
+ if (pat.test(content)) return true;
245
666
  }
246
- return engine;
667
+ return false;
247
668
  }
248
669
 
249
- function extractRanges(jsonOutput: string): Array<{ start: number; end: number }> {
250
- try {
251
- const reports = JSON.parse(jsonOutput) as Array<{
252
- messages: Array<{ range: [number, number]; ruleId: string }>;
253
- }>;
254
- const ranges: Array<{ start: number; end: number }> = [];
255
- for (const report of reports) {
256
- for (const msg of report.messages) {
257
- ranges.push({ start: msg.range[0], end: msg.range[1] });
258
- }
670
+ // ── Detector ─────────────────────────────────────────────────────────────────
671
+
672
+ interface SecretMatch {
673
+ name: string;
674
+ start: number;
675
+ end: number;
676
+ original: string;
677
+ }
678
+
679
+ const MIN_SCAN_LENGTH = 10;
680
+
681
+ function detectSecrets(content: string): SecretMatch[] {
682
+ if (content.length < MIN_SCAN_LENGTH) return [];
683
+ const matches: SecretMatch[] = [];
684
+ const seen = new Set<string>(); // deduplicate by position
685
+
686
+ // Pass 1: High-confidence pattern matching (specific prefixes like ghp_, AKIA)
687
+ for (const sp of SECRET_PATTERNS) {
688
+ if (!sp.highConfidence) continue;
689
+ if (content.length < sp.minLength) continue;
690
+ for (const m of content.matchAll(new RegExp(sp.pattern.source, sp.pattern.flags + "g"))) {
691
+ const text = m[0];
692
+ if (!text) continue;
693
+ if (!sp.allowsSpaces && text.includes(" ")) continue;
694
+ const key = `${m.index}-${m.index + text.length}`;
695
+ if (seen.has(key)) continue;
696
+ seen.add(key);
697
+ matches.push({ name: sp.name, start: m.index!, end: m.index! + text.length, original: text });
698
+ }
699
+ }
700
+
701
+ // Pass 2: Low-confidence pattern matching (generic assignments like secret=xxx)
702
+ // Skip ranges already covered by high-confidence matches
703
+ for (const sp of SECRET_PATTERNS) {
704
+ if (sp.highConfidence) continue;
705
+ if (content.length < sp.minLength) continue;
706
+ for (const m of content.matchAll(new RegExp(sp.pattern.source, sp.pattern.flags + "g"))) {
707
+ const text = m[0];
708
+ if (!text) continue;
709
+ if (!sp.allowsSpaces && text.includes(" ")) continue;
710
+ // Check against safe patterns to reduce false positives
711
+ if (isSafeContent(text)) continue;
712
+ // Also check surrounding context (e.g. "your_api_key=xxx" is a placeholder)
713
+ const contextStart = Math.max(0, m.index! - 10);
714
+ const context = content.slice(contextStart, m.index! + text.length);
715
+ if (isSafeContent(context)) continue;
716
+ // Skip if range already covered by a high-confidence match
717
+ const start = m.index!, end = m.index! + text.length;
718
+ if (matches.some(hc => hc.start <= start && hc.end >= end)) continue;
719
+ const key = `${start}-${end}`;
720
+ if (seen.has(key)) continue;
721
+ seen.add(key);
722
+ matches.push({ name: sp.name, start, end, original: text });
259
723
  }
260
- const unique = new Map<string, { start: number; end: number }>();
261
- for (const r of ranges) unique.set(`${r.start}-${r.end}`, r);
262
- return [...unique.values()].sort((a, b) => b.start - a.start);
263
- } catch { return []; }
724
+ }
725
+
726
+ // Pass 3: Entropy analysis (catches unknown formats like third-party sk- keys)
727
+ const highEntropyTokens = findHighEntropyTokens(content);
728
+ for (const token of highEntropyTokens) {
729
+ if (isSafeContent(token)) continue;
730
+ const idx = content.indexOf(token);
731
+ if (idx === -1) continue;
732
+ // Skip if already covered by a pattern match
733
+ if (matches.some(m => m.start <= idx && m.end >= idx + token.length)) continue;
734
+ const key = `${idx}-${idx + token.length}`;
735
+ if (seen.has(key)) continue;
736
+ seen.add(key);
737
+ matches.push({ name: "High Entropy String", start: idx, end: idx + token.length, original: token });
738
+ }
739
+
740
+ // Sort by start position descending for safe right-to-left replacement
741
+ return matches.sort((a, b) => b.start - a.start);
742
+ }
743
+
744
+ function maskSecret(text: string): string {
745
+ if (text.length <= 8) return "********";
746
+ return text.slice(0, 4) + "********" + text.slice(-4);
264
747
  }
265
748
 
266
749
  // ─── Setup ──────────────────────────────────────────────────────────────────
@@ -269,42 +752,43 @@ export function setupSafety(pi: ExtensionAPI) {
269
752
  // ── Command Guard + Protected Paths + Write Guard (tool_call) ─────────
270
753
 
271
754
  pi.on("tool_call", async (event, ctx) => {
272
-
273
- // Gate 1: 危险命令
755
+
756
+ // Gate 1: 危险命令 + 覆盖写入 + 读取保护路径
274
757
  if (event.toolName === "bash") {
275
758
  const command = (event.input as { command?: string }).command;
276
759
  if (command) {
277
- const danger = checkDangerous(command, ctx.cwd);
278
- if (danger) {
760
+ const dangers = collectBashDangers(command, ctx.cwd);
761
+ if (dangers.length > 0) {
762
+ const message = formatBashDangers(dangers)!;
279
763
  if (!ctx.hasUI) {
280
- return { block: true, reason: `⛔ ${danger} (non-interactive)` };
764
+ return { block: true, reason: `\u26D4 ${message} (non-interactive)` };
281
765
  }
282
766
  const choice = await ctx.ui.select(
283
- `⚠️ ${danger}\n\nAllow execution?`,
767
+ `\u26A0\uFE0F ${message}\n\nAllow execution?`,
284
768
  ["Block", "Allow once"],
285
769
  );
286
770
  if (!choice || choice === "Block") {
287
- return { block: true, reason: `⛔ ${danger}` };
771
+ return { block: true, reason: `\u26D4 ${message}` };
288
772
  }
289
773
  }
290
774
  }
291
775
  }
292
776
 
293
- // Gate 2: 保护路径
777
+ // Gate 2: write/edit 写入保护路径
294
778
  if (event.toolName === "write" || event.toolName === "edit") {
295
779
  const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
296
780
  if (filePath) {
297
781
  const danger = checkProtectedPath(filePath);
298
782
  if (danger) {
299
783
  if (!ctx.hasUI) {
300
- return { block: true, reason: `🔐 ${danger}` };
784
+ return { block: true, reason: `\uD83D\uDD10 ${danger}\nmay contain sensitive information` };
301
785
  }
302
786
  const choice = await ctx.ui.select(
303
- `🔐 ${danger}\n\nProceed?`,
787
+ `\uD83D\uDD10 ${danger}\nmay contain sensitive information\n\nProceed?`,
304
788
  ["Block", "Allow once"],
305
789
  );
306
790
  if (!choice || choice === "Block") {
307
- return { block: true, reason: `🔐 ${danger}` };
791
+ return { block: true, reason: `\uD83D\uDD10 ${danger}\nmay contain sensitive information` };
308
792
  }
309
793
  }
310
794
  }
@@ -322,6 +806,26 @@ export function setupSafety(pi: ExtensionAPI) {
322
806
  } catch { /* file doesn't exist */ }
323
807
  }
324
808
  }
809
+
810
+ // Gate 4: read 工具读取保护路径(bash 读取已在 Gate 1 处理)
811
+ if (event.toolName === "read") {
812
+ const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
813
+ if (filePath) {
814
+ const danger = checkProtectedPath(filePath);
815
+ if (danger) {
816
+ if (!ctx.hasUI) {
817
+ return { block: true, reason: `\uD83D\uDD10 Reading protected file: ${danger}\nmay contain sensitive information` };
818
+ }
819
+ const choice = await ctx.ui.select(
820
+ `\uD83D\uDD10 Reading protected file: ${danger}\nmay contain sensitive information\n\nProceed?`,
821
+ ["Block", "Allow once"],
822
+ );
823
+ if (!choice || choice === "Block") {
824
+ return { block: true, reason: `\uD83D\uDD10 Reading protected file: ${danger}\nmay contain sensitive information` };
825
+ }
826
+ }
827
+ }
828
+ }
325
829
  });
326
830
 
327
831
  // ── Secret Redact (tool_result) ────────────────────────────────────────
@@ -332,6 +836,10 @@ export function setupSafety(pi: ExtensionAPI) {
332
836
  ): Promise<{ content?: NonNullable<ToolResultEvent["content"]> } | void> => {
333
837
  if (!event.content || !Array.isArray(event.content)) return;
334
838
 
839
+ // Only scan read tool output — other tools (bash, write, edit) are either
840
+ // covered by path guards or produce git/diff noise that causes false positives.
841
+ if (event.toolName !== "read") return;
842
+
335
843
  const textParts: Array<{ index: number; text: string; item: ToolTextContent }> = [];
336
844
  for (let i = 0; i < event.content.length; i++) {
337
845
  const item = event.content[i];
@@ -341,18 +849,16 @@ export function setupSafety(pi: ExtensionAPI) {
341
849
  }
342
850
  if (textParts.length === 0) return;
343
851
 
344
- const eng = await ensureEngine();
345
852
  let totalCount = 0;
346
853
  const newContent = [...event.content];
347
854
 
348
855
  for (const { index, text, item } of textParts) {
349
- const result = await eng.executeOnContent({ content: text, filePath: "tool-output.txt" });
350
- const ranges = extractRanges(result.output);
351
- if (ranges.length === 0) continue;
856
+ const matches = detectSecrets(text);
857
+ if (matches.length === 0) continue;
352
858
 
353
- totalCount += ranges.length;
859
+ totalCount += matches.length;
354
860
  let redacted = text;
355
- for (const { start, end } of ranges) {
861
+ for (const { start, end } of matches) {
356
862
  const original = redacted.slice(start, end);
357
863
  redacted = redacted.slice(0, start) + maskSecret(original) + redacted.slice(end);
358
864
  }
@@ -362,7 +868,7 @@ export function setupSafety(pi: ExtensionAPI) {
362
868
 
363
869
  if (totalCount === 0) return;
364
870
  const label = totalCount === 1 ? "1 secret" : `${totalCount} secrets`;
365
- ctx.ui.notify(`🔐 Redacted ${label} in ${event.toolName} output`, "warning");
871
+ ctx.ui.notify(`\uD83D\uDD10 Redacted ${label} in ${event.toolName} output`, "warning");
366
872
  return { content: newContent };
367
873
  };
368
874