decorated-pi 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,14 @@
1
1
  /**
2
- * Safety — 安全防护模块
2
+ * Safety Detection 纯逻辑层(零 Pi 依赖)
3
3
  *
4
- * - Command Guard: 拦截危险 bash 命令(rm, sudo, npm publish, git push 等)
5
- * - Redirect Guard: bash 覆盖写入(>)提示确认,保护路径额外警告敏感信息
6
- * - Protected Paths: write/edit 写入保护路径需确认,提示敏感信息
7
- * - Read Guard: read/cat 等读取保护路径需确认,提示敏感信息
8
- * - Write Guard: 覆盖非空文件禁止 write 工具,建议用 edit
9
- * - Secret Redact: API Key / Token 自动掩码
4
+ * - Command Guard: 危险命令检测 + 覆盖写入检测 + 读取保护路径检测
5
+ * - Secret Detection: 40+ 高置信模式 + 熵分析 V3+Dict + 安全模式排除
6
+ *
7
+ * 本模块可独立测试,不依赖 Pi API。
10
8
  */
11
9
 
12
- import type {
13
- ExtensionAPI,
14
- ExtensionContext,
15
- ToolResultEvent,
16
- } from "@earendil-works/pi-coding-agent";
17
10
  import * as fs from "node:fs";
18
- import { resolve } from "node:path";
19
-
20
- // ─── 危险命令枚举 ──────────────────────────────────────────────────────────
11
+ import { basename, extname, resolve } from "node:path";
21
12
 
22
13
  const DANGEROUS_COMMANDS: [string, string[]][] = [
23
14
  ["rm", []],
@@ -56,7 +47,7 @@ const READ_COMMANDS = new Set([
56
47
  "file", "strings", "grep", "rg", "ag", "ack",
57
48
  ]);
58
49
 
59
- function checkProtectedPath(filePath: string): string | null {
50
+ export function checkProtectedPath(filePath: string): string | null {
60
51
  const normalized = filePath.replace(/\\/g, "/");
61
52
  const filename = normalized.split("/").pop() ?? "";
62
53
  for (const seg of PROTECTED_PATH_SEGMENTS) {
@@ -73,7 +64,7 @@ function checkProtectedPath(filePath: string): string | null {
73
64
 
74
65
  // ─── Shell tokenizer ────────────────────────────────────────────────────────
75
66
 
76
- function tokenizeShell(command: string): string[] {
67
+ export function tokenizeShell(command: string): string[] {
77
68
  const tokens: string[] = [];
78
69
  let current = "";
79
70
  let quote: "'" | '"' | null = null;
@@ -164,13 +155,13 @@ function isExistingRegularFile(target: string, cwd: string): boolean {
164
155
 
165
156
  // ─── Bash danger analysis ───────────────────────────────────────────────────
166
157
 
167
- interface BashDanger {
158
+ export interface BashDanger {
168
159
  reason: string;
169
160
  /** Whether the danger involves a protected (sensitive) path */
170
161
  protectedPath?: string;
171
162
  }
172
163
 
173
- function collectBashDangers(command: string, cwd: string): BashDanger[] {
164
+ export function collectBashDangers(command: string, cwd: string): BashDanger[] {
174
165
  const tokens = tokenizeShell(command);
175
166
  const dangers: BashDanger[] = [];
176
167
  const seen = new Set<string>();
@@ -261,7 +252,7 @@ function collectBashDangers(command: string, cwd: string): BashDanger[] {
261
252
  return dangers;
262
253
  }
263
254
 
264
- function formatBashDangers(dangers: BashDanger[]): string | null {
255
+ export function formatBashDangers(dangers: BashDanger[]): string | null {
265
256
  if (dangers.length === 0) return null;
266
257
  if (dangers.length === 1) return dangers[0]!.reason;
267
258
  return `dangerous operations detected:\n- ${dangers.map(d => d.reason).join("\n- ")}`;
@@ -290,9 +281,6 @@ function formatBashDangers(dangers: BashDanger[]): string | null {
290
281
  // - dictRatio: dictionary word coverage penalizes identifiers/English text
291
282
  // - hexPenalty: -2.5 only if >90% hex AND contains '-' (UUID-like format)
292
283
 
293
- type ToolTextContent = Extract<NonNullable<ToolResultEvent["content"]>[number], { type: "text" }>;
294
-
295
- // ── Entropy Analysis v3+Dict ─────────────────────────────────────────────────
296
284
  //
297
285
  // Based on opencode-secrets-protect by Jared Scheel
298
286
  // https://github.com/jscheel/opencode-secrets-protect (MIT License)
@@ -312,7 +300,7 @@ type ToolTextContent = Extract<NonNullable<ToolResultEvent["content"]>[number],
312
300
  // 6. hexPenalty: -2.5 only if >90% hex AND contains '-' (UUID-like format)
313
301
 
314
302
  /** Character class: U=uppercase, L=lowercase, D=digit, S=dash, X=other */
315
- function charClass(c: string): "U" | "L" | "D" | "S" | "X" {
303
+ export function charClass(c: string): "U" | "L" | "D" | "S" | "X" {
316
304
  const code = c.charCodeAt(0);
317
305
  if (code >= 65 && code <= 90) return "U";
318
306
  if (code >= 97 && code <= 122) return "L";
@@ -325,7 +313,7 @@ function charClass(c: string): "U" | "L" | "D" | "S" | "X" {
325
313
  * Shannon entropy: measures average information content per character.
326
314
  * H(X) = -Σ p(x) · log₂(p(x))
327
315
  */
328
- function shannonEntropy(data: string): number {
316
+ export function shannonEntropy(data: string): number {
329
317
  if (data.length === 0) return 0;
330
318
  const freq = new Map<string, number>();
331
319
  for (const char of data) {
@@ -344,12 +332,12 @@ function shannonEntropy(data: string): number {
344
332
  * Trigram (3-character sliding window) scoring.
345
333
  * Rules (user-specified):
346
334
  * - Pure digits → 0
347
- * - Letter↔Digit switch (digit in first 2 positions, e.g. 4Vi, K9m, a9t) → 1.0
335
+ * - Letter↔Digit switch (digit in first position, e.g. 4Vi) → 1.0
348
336
  * - Contains '-' with ≥3 distinct classes → 1.0
349
337
  * - Case switch AbA pattern (≥2 uppercase + ≥1 lowercase) → 0.8
350
338
  * - Otherwise → 0
351
339
  */
352
- function trigramScore(c1: string, c2: string, c3: string): number {
340
+ export function trigramScore(c1: string, c2: string, c3: string): number {
353
341
  const cls: string[] = [charClass(c1), charClass(c2), charClass(c3)];
354
342
 
355
343
  // Any X-class character → skip
@@ -363,10 +351,10 @@ function trigramScore(c1: string, c2: string, c3: string): number {
363
351
  // Contains '-' (S-class) with ≥3 distinct classes → 1.0
364
352
  if (cls.includes("S") && unique.size >= 3) return 1.0;
365
353
 
366
- // Letter↔Digit: digit must be in first 2 positions
354
+ // Letter↔Digit: digit must be in first position
367
355
  const hasDigit = cls.includes("D");
368
356
  const hasLetter = cls.includes("L") || cls.includes("U");
369
- if (hasDigit && hasLetter && (cls[0] === "D" || cls[1] === "D")) return 1.0;
357
+ if (hasDigit && hasLetter && cls[0] === "D") return 1.0;
370
358
 
371
359
  // AbA pattern: ≥2 uppercase + ≥1 lowercase (e.g. KeA, but not API)
372
360
  const uCount = cls.filter(c => c === "U").length;
@@ -380,7 +368,7 @@ function trigramScore(c1: string, c2: string, c3: string): number {
380
368
  * Split a token by X-class characters into independent segments.
381
369
  * This prevents `://`, `@`, `.` etc. from diluting trigram density.
382
370
  */
383
- function splitByXClass(token: string): string[] {
371
+ export function splitByXClass(token: string): string[] {
384
372
  const segments: string[] = [];
385
373
  let current = "";
386
374
  for (const c of token) {
@@ -398,7 +386,7 @@ function splitByXClass(token: string): string[] {
398
386
  /**
399
387
  * Compute average trigram density for a single segment.
400
388
  */
401
- function segmentDensity(segment: string): number {
389
+ export function segmentDensity(segment: string): number {
402
390
  if (segment.length < 3) return 0;
403
391
  let totalScore = 0;
404
392
  for (let i = 0; i <= segment.length - 3; i++) {
@@ -411,7 +399,7 @@ function segmentDensity(segment: string): number {
411
399
  * Compute the maximum segment density across all X-split segments.
412
400
  * The segment with the highest density is the most likely secret region.
413
401
  */
414
- function maxSegmentDensity(token: string): number {
402
+ export function maxSegmentDensity(token: string): number {
415
403
  const segments = splitByXClass(token);
416
404
  if (segments.length === 0) return 0;
417
405
  let maxD = 0;
@@ -424,35 +412,26 @@ function maxSegmentDensity(token: string): number {
424
412
 
425
413
  /**
426
414
  * Word ratio: fraction of token that consists of vowel-containing
427
- * lowercase fragments ≥3 characters. Natural language words reduce
428
- * the likelihood of being a secret.
415
+ * alphabetic fragments ≥3 characters, case-insensitive. Natural language
416
+ * words reduce the likelihood of being a secret.
429
417
  */
430
- function computeWordRatio(token: string): number {
431
- // Split by class boundaries
432
- const segments: string[] = [];
418
+ export function computeWordRatio(token: string): number {
419
+ const letterSeqs: string[] = [];
433
420
  let current = "";
434
- let prevClass = "";
435
421
  for (const c of token) {
436
422
  const cls = charClass(c);
437
- if (cls === "X") {
438
- if (current.length > 0) { segments.push(current); current = ""; }
439
- prevClass = "";
440
- continue;
441
- }
442
- if (cls !== prevClass && current.length > 0) {
443
- segments.push(current);
423
+ if (cls === "L" || cls === "U") {
424
+ current += c.toLowerCase();
425
+ } else {
426
+ if (current.length >= 3) letterSeqs.push(current);
444
427
  current = "";
445
428
  }
446
- current += c;
447
- prevClass = cls;
448
429
  }
449
- if (current.length > 0) segments.push(current);
430
+ if (current.length >= 3) letterSeqs.push(current);
450
431
 
451
432
  let wordLen = 0;
452
- for (const seg of segments) {
453
- if (seg.length >= 3 && /^[a-z]+$/.test(seg)) {
454
- if (/[aeiou]/.test(seg)) wordLen += seg.length;
455
- }
433
+ for (const seq of letterSeqs) {
434
+ if (/[aeiou]/.test(seq)) wordLen += seq.length;
456
435
  }
457
436
  return token.length > 0 ? wordLen / token.length : 0;
458
437
  }
@@ -461,7 +440,7 @@ function computeWordRatio(token: string): number {
461
440
  * Hex ratio: fraction of characters that are hex characters (0-9, a-f, A-F, -).
462
441
  * Values >0.9 indicate UUIDs or hex hashes which are safe.
463
442
  */
464
- function computeHexRatio(token: string): number {
443
+ export function computeHexRatio(token: string): number {
465
444
  let hexChars = 0;
466
445
  for (const c of token) {
467
446
  if (/[0-9a-fA-F\-]/.test(c)) hexChars++;
@@ -486,20 +465,22 @@ const DICT_WORDS: ReadonlySet<string> = new Set(
486
465
  /**
487
466
  * Dictionary word ratio: fraction of token characters covered by dictionary words.
488
467
  *
489
- * Extracts lowercase letter sequences from the token, then greedily matches
490
- * the longest dictionary word at each position. Returns matched character
468
+ * Extracts alphabetic sequences from the token (case-insensitive), then greedily
469
+ * matches the longest dictionary word at each position. Returns matched character
491
470
  * count / token length.
492
471
  *
493
472
  * "devstral-small-2" → finds "dev", "str", "small" → covers 11/16 chars
494
- * "aB3xK9mPqR7wN" no words found dictRatio = 0
473
+ * "NET_CHANNEL_INFO_REPORT_V20" finds "net", "channel", "info", "report"
474
+ * "aB3xK9mPqR7wN" → no words found → dictRatio = 0
495
475
  */
496
- function computeDictRatio(token: string): number {
497
- // Extract lowercase letter sequences (>= 3 chars)
476
+ export function computeDictRatio(token: string): number {
477
+ // Extract alphabetic sequences (>= 3 chars), case-insensitive
498
478
  const lowerSeqs: string[] = [];
499
479
  let current = "";
500
480
  for (const c of token) {
501
- if (/[a-z]/.test(c)) {
502
- current += c;
481
+ const cls = charClass(c);
482
+ if (cls === "L" || cls === "U") {
483
+ current += c.toLowerCase();
503
484
  } else {
504
485
  if (current.length >= 3) lowerSeqs.push(current);
505
486
  current = "";
@@ -535,19 +516,19 @@ function computeDictRatio(token: string): number {
535
516
 
536
517
  // ── Entropy Constants ────────────────────────────────────────────────────────
537
518
 
538
- const ENTROPY_THRESHOLD = 5.5;
539
- const MIN_ENTROPY_TOKEN_LENGTH = 16;
540
- const W1_DENSITY = 3.0; // trigram density weight
541
- const W2_WORD = 3.0; // vowel-word penalty weight
542
- const W3_DICT = 4.0; // dictionary word penalty weight
543
- const HEX_PENALTY = 2.5; // penalty for >90% hex chars
544
- const HEX_RATIO_THRESHOLD = 0.9;
519
+ export const ENTROPY_THRESHOLD = 5.5;
520
+ export const MIN_ENTROPY_TOKEN_LENGTH = 32;
521
+ export const W1_DENSITY = 3.0;
522
+ export const W2_WORD = 3.0;
523
+ export const W3_DICT = 4.0;
524
+ export const HEX_PENALTY = 2.5;
525
+ export const HEX_RATIO_THRESHOLD = 0.9;
545
526
 
546
527
  /**
547
528
  * Adjusted entropy v3+Dict:
548
529
  * adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
549
530
  */
550
- function calculateAdjustedEntropy(data: string): number {
531
+ export function calculateAdjustedEntropy(data: string): number {
551
532
  const base = shannonEntropy(data);
552
533
  const density = maxSegmentDensity(data);
553
534
  const wordRatio = computeWordRatio(data);
@@ -563,7 +544,7 @@ function calculateAdjustedEntropy(data: string): number {
563
544
  return base + densityBoost - wordPenalty - dictPenalty - hp;
564
545
  }
565
546
 
566
- function isHighEntropy(data: string): boolean {
547
+ export function isHighEntropy(data: string): boolean {
567
548
  if (data.length < MIN_ENTROPY_TOKEN_LENGTH) return false;
568
549
  if (isSafeContent(data)) return false;
569
550
  return calculateAdjustedEntropy(data) > ENTROPY_THRESHOLD;
@@ -573,14 +554,14 @@ function isHighEntropy(data: string): boolean {
573
554
  * Split by whitespace only — the most conservative tokenization.
574
555
  * This preserves JSON structure, URLs, and connection strings.
575
556
  */
576
- function findHighEntropyTokens(content: string): string[] {
557
+ export function findHighEntropyTokens(content: string): string[] {
577
558
  const tokens = content.split(/[\s\[\]{}"',\/\\|()&#@!<>?]+/);
578
559
  return tokens.filter(t => t.length >= MIN_ENTROPY_TOKEN_LENGTH && isHighEntropy(t));
579
560
  }
580
561
 
581
562
  // ── Known Secret Patterns ────────────────────────────────────────────────────
582
563
 
583
- interface SecretPattern {
564
+ export interface SecretPattern {
584
565
  name: string;
585
566
  pattern: RegExp;
586
567
  minLength: number;
@@ -589,7 +570,7 @@ interface SecretPattern {
589
570
  highConfidence: boolean;
590
571
  }
591
572
 
592
- const SECRET_PATTERNS: SecretPattern[] = [
573
+ export const SECRET_PATTERNS: SecretPattern[] = [
593
574
  // AWS
594
575
  { name: "AWS Access Key ID", pattern: /AKIA[0-9A-Z]{16}/, minLength: 16, allowsSpaces: false, highConfidence: true },
595
576
  { name: "AWS Secret Access Key", pattern: /(?:aws)?_?(?:secret)?_?(?:access)?_?key['"\s:=]+['"]?[0-9a-zA-Z/+]{40}['"]?/i, minLength: 30, allowsSpaces: false, highConfidence: true },
@@ -646,7 +627,7 @@ const SECRET_PATTERNS: SecretPattern[] = [
646
627
 
647
628
  // ── Safe Patterns (exclude from detection to reduce false positives) ─────────
648
629
 
649
- const SAFE_PATTERNS: RegExp[] = [
630
+ export const SAFE_PATTERNS: RegExp[] = [
650
631
  /^https?:\/\/[a-zA-Z0-9.-]+(?:\/[a-zA-Z0-9.\/_\-?&=#%]*)?$/, // URLs without credentials
651
632
  /^\.\.?\/[a-zA-Z0-9_\-./]+$/, // Relative file paths
652
633
  /^\/[a-zA-Z0-9_\-./]+$/, // Absolute Unix paths
@@ -660,7 +641,7 @@ const SAFE_PATTERNS: RegExp[] = [
660
641
  /^@[a-z0-9-]+\/[a-z0-9-]+$/, // npm scoped packages
661
642
  ];
662
643
 
663
- function isSafeContent(content: string): boolean {
644
+ export function isSafeContent(content: string): boolean {
664
645
  for (const pat of SAFE_PATTERNS) {
665
646
  if (pat.test(content)) return true;
666
647
  }
@@ -669,19 +650,118 @@ function isSafeContent(content: string): boolean {
669
650
 
670
651
  // ── Detector ─────────────────────────────────────────────────────────────────
671
652
 
672
- interface SecretMatch {
653
+ export type SecretMatchSource = "pattern" | "regex" | "entropy";
654
+
655
+ export interface SecretMatch {
673
656
  name: string;
674
657
  start: number;
675
658
  end: number;
676
659
  original: string;
660
+ source: SecretMatchSource;
661
+ }
662
+
663
+ export interface DetectSecretsOptions {
664
+ filePath?: string;
665
+ }
666
+
667
+ interface ConfigStringEntry {
668
+ key: string;
669
+ normalizedKey: string;
670
+ value: string;
671
+ start: number;
672
+ end: number;
677
673
  }
678
674
 
679
675
  const MIN_SCAN_LENGTH = 10;
676
+ const CONFIG_VALUE_MIN_LENGTH = 32;
677
+ const CONFIG_FILE_EXTENSIONS = new Set([
678
+ ".json", ".jsonc", ".env", ".toml", ".yaml", ".yml",
679
+ ".ini", ".cfg", ".conf", ".properties",
680
+ ]);
681
+ const CONFIG_BASENAME_REGEX = /^\.env(?:\..+)?$/i;
682
+ const SENSITIVE_CONFIG_KEY_REGEX = /(?:^|_)(?:apikey|api_(?:key|secret|token)|access_(?:key|token)|refresh_token|client_secret|secret(?:_key)?|private_key|bearer_token|auth(?:orization|_token)?|pass(?:word|wd)?|pwd|token|webhook_secret)(?:_|$)/i;
683
+ const PLACEHOLDER_VALUE_REGEX = /^(?:\$\{[^}]+\}|\{\{[^}]+\}\}|<[^>]+>|xxx+|placeholder|example|sample|demo|test|changeme|your[_-]?(?:api[_-]?)?key(?:[_-]?here)?)$/i;
684
+ const CONFIG_STRING_PATTERNS: RegExp[] = [
685
+ /(?<key>"[^"\r\n]+"|'[^'\r\n]+'|[A-Za-z0-9_.-]+)\s*[:=]\s*"(?<value>(?:\\.|[^"\\])*)"/g,
686
+ /(?<key>"[^"\r\n]+"|'[^'\r\n]+'|[A-Za-z0-9_.-]+)\s*[:=]\s*'(?<value>(?:\\.|[^'\\])*)'/g,
687
+ /(?<key>[A-Za-z0-9_.-]+)\s*=\s*(?<value>[^\r\n#;]+)/g,
688
+ ];
689
+
690
+ function normalizeConfigKey(key: string): string {
691
+ return key
692
+ .trim()
693
+ .replace(/^['"]|['"]$/g, "")
694
+ .replace(/([A-Z]+)([A-Z][a-z])/g, "$1_$2")
695
+ .replace(/([a-z0-9])([A-Z])/g, "$1_$2")
696
+ .toLowerCase()
697
+ .replace(/[.\-\s]+/g, "_")
698
+ .replace(/_+/g, "_")
699
+ .replace(/^_+|_+$/g, "");
700
+ }
680
701
 
681
- function detectSecrets(content: string): SecretMatch[] {
702
+ function isConfigLikeFile(filePath?: string): boolean {
703
+ if (!filePath) return false;
704
+ const name = basename(filePath);
705
+ if (CONFIG_BASENAME_REGEX.test(name)) return true;
706
+ return CONFIG_FILE_EXTENSIONS.has(extname(name).toLowerCase());
707
+ }
708
+
709
+ function looksLikeSensitiveConfigValue(value: string): boolean {
710
+ const trimmed = value.trim();
711
+ if (!trimmed) return false;
712
+ if (PLACEHOLDER_VALUE_REGEX.test(trimmed)) return false;
713
+ if (isSafeContent(trimmed)) return false;
714
+ if (/^(?:true|false|null)$/i.test(trimmed)) return false;
715
+ if (/^[+-]?\d+(?:\.\d+)?$/.test(trimmed)) return false;
716
+ return trimmed.length >= CONFIG_VALUE_MIN_LENGTH;
717
+ }
718
+
719
+ function extractConfigStringEntries(content: string): ConfigStringEntry[] {
720
+ const entries: ConfigStringEntry[] = [];
721
+ const seen = new Set<string>();
722
+
723
+ for (const pattern of CONFIG_STRING_PATTERNS) {
724
+ for (const match of content.matchAll(pattern)) {
725
+ const key = match.groups?.key;
726
+ const value = match.groups?.value;
727
+ if (!key || value === undefined || match.index === undefined) continue;
728
+ const full = match[0] ?? "";
729
+ const rel = full.indexOf(value);
730
+ if (rel < 0) continue;
731
+ const start = match.index + rel;
732
+ const end = start + value.length;
733
+ const dedupeKey = `${start}-${end}`;
734
+ if (seen.has(dedupeKey)) continue;
735
+ seen.add(dedupeKey);
736
+ entries.push({
737
+ key,
738
+ normalizedKey: normalizeConfigKey(key),
739
+ value,
740
+ start,
741
+ end,
742
+ });
743
+ }
744
+ }
745
+
746
+ return entries;
747
+ }
748
+
749
+ function addMatch(matches: SecretMatch[], seen: Set<string>, match: SecretMatch): void {
750
+ const key = `${match.start}-${match.end}`;
751
+ if (seen.has(key)) return;
752
+ seen.add(key);
753
+ matches.push(match);
754
+ }
755
+
756
+ function isCoveredByExistingMatch(matches: SecretMatch[], start: number, end: number): boolean {
757
+ return matches.some((existing) => !(end <= existing.start || start >= existing.end));
758
+ }
759
+
760
+ export function detectSecrets(content: string, options: DetectSecretsOptions = {}): SecretMatch[] {
682
761
  if (content.length < MIN_SCAN_LENGTH) return [];
683
762
  const matches: SecretMatch[] = [];
684
- const seen = new Set<string>(); // deduplicate by position
763
+ const seen = new Set<string>();
764
+ const configLike = isConfigLikeFile(options.filePath);
685
765
 
686
766
  // Pass 1: High-confidence pattern matching (specific prefixes like ghp_, AKIA)
687
767
  for (const sp of SECRET_PATTERNS) {
@@ -689,188 +769,63 @@ function detectSecrets(content: string): SecretMatch[] {
689
769
  if (content.length < sp.minLength) continue;
690
770
  for (const m of content.matchAll(new RegExp(sp.pattern.source, sp.pattern.flags + "g"))) {
691
771
  const text = m[0];
692
- if (!text) continue;
772
+ if (!text || m.index === undefined) continue;
693
773
  if (!sp.allowsSpaces && text.includes(" ")) continue;
694
- const key = `${m.index}-${m.index + text.length}`;
695
- if (seen.has(key)) continue;
696
- seen.add(key);
697
- matches.push({ name: sp.name, start: m.index!, end: m.index! + text.length, original: text });
774
+ addMatch(matches, seen, {
775
+ name: sp.name,
776
+ start: m.index,
777
+ end: m.index + text.length,
778
+ original: text,
779
+ source: "pattern",
780
+ });
698
781
  }
699
782
  }
700
783
 
701
- // Pass 2: Low-confidence pattern matching (generic assignments like secret=xxx)
702
- // Skip ranges already covered by high-confidence matches
703
- for (const sp of SECRET_PATTERNS) {
704
- if (sp.highConfidence) continue;
705
- if (content.length < sp.minLength) continue;
706
- for (const m of content.matchAll(new RegExp(sp.pattern.source, sp.pattern.flags + "g"))) {
707
- const text = m[0];
708
- if (!text) continue;
709
- if (!sp.allowsSpaces && text.includes(" ")) continue;
710
- // Check against safe patterns to reduce false positives
711
- if (isSafeContent(text)) continue;
712
- // Also check surrounding context (e.g. "your_api_key=xxx" is a placeholder)
713
- const contextStart = Math.max(0, m.index! - 10);
714
- const context = content.slice(contextStart, m.index! + text.length);
715
- if (isSafeContent(context)) continue;
716
- // Skip if range already covered by a high-confidence match
717
- const start = m.index!, end = m.index! + text.length;
718
- if (matches.some(hc => hc.start <= start && hc.end >= end)) continue;
719
- const key = `${start}-${end}`;
720
- if (seen.has(key)) continue;
721
- seen.add(key);
722
- matches.push({ name: sp.name, start, end, original: text });
784
+ if (configLike) {
785
+ const entries = extractConfigStringEntries(content);
786
+
787
+ // Pass 2: Regex key-name matching for config-like files only
788
+ for (const entry of entries) {
789
+ if (!SENSITIVE_CONFIG_KEY_REGEX.test(entry.normalizedKey)) continue;
790
+ if (!looksLikeSensitiveConfigValue(entry.value)) continue;
791
+ if (isCoveredByExistingMatch(matches, entry.start, entry.end)) continue;
792
+ addMatch(matches, seen, {
793
+ name: `Sensitive config key: ${entry.normalizedKey}`,
794
+ start: entry.start,
795
+ end: entry.end,
796
+ original: entry.value,
797
+ source: "regex",
798
+ });
723
799
  }
724
- }
725
800
 
726
- // Pass 3: Entropy analysis (catches unknown formats like third-party sk- keys)
727
- const highEntropyTokens = findHighEntropyTokens(content);
728
- for (const token of highEntropyTokens) {
729
- if (isSafeContent(token)) continue;
730
- const idx = content.indexOf(token);
731
- if (idx === -1) continue;
732
- // Skip if already covered by a pattern match
733
- if (matches.some(m => m.start <= idx && m.end >= idx + token.length)) continue;
734
- const key = `${idx}-${idx + token.length}`;
735
- if (seen.has(key)) continue;
736
- seen.add(key);
737
- matches.push({ name: "High Entropy String", start: idx, end: idx + token.length, original: token });
801
+ // Pass 3: Entropy analysis for config-like files only
802
+ for (const entry of entries) {
803
+ if (isCoveredByExistingMatch(matches, entry.start, entry.end)) continue;
804
+ if (!looksLikeSensitiveConfigValue(entry.value)) continue;
805
+ if (!isHighEntropy(entry.value)) continue;
806
+ addMatch(matches, seen, {
807
+ name: "High Entropy String",
808
+ start: entry.start,
809
+ end: entry.end,
810
+ original: entry.value,
811
+ source: "entropy",
812
+ });
813
+ }
738
814
  }
739
815
 
740
816
  // Sort by start position descending for safe right-to-left replacement
741
817
  return matches.sort((a, b) => b.start - a.start);
742
818
  }
743
819
 
744
- function maskSecret(text: string): string {
745
- if (text.length <= 8) return "********";
746
- return text.slice(0, 4) + "********" + text.slice(-4);
820
+ function getMaskChar(source?: SecretMatchSource): string {
821
+ if (source === "regex") return "#";
822
+ if (source === "entropy") return "?";
823
+ return "*";
747
824
  }
748
825
 
749
- // ─── Setup ──────────────────────────────────────────────────────────────────
750
-
751
- export function setupSafety(pi: ExtensionAPI) {
752
- // ── Command Guard + Protected Paths + Write Guard (tool_call) ─────────
753
-
754
- pi.on("tool_call", async (event, ctx) => {
755
-
756
- // Gate 1: 危险命令 + 覆盖写入 + 读取保护路径
757
- if (event.toolName === "bash") {
758
- const command = (event.input as { command?: string }).command;
759
- if (command) {
760
- const dangers = collectBashDangers(command, ctx.cwd);
761
- if (dangers.length > 0) {
762
- const message = formatBashDangers(dangers)!;
763
- if (!ctx.hasUI) {
764
- return { block: true, reason: `\u26D4 ${message} (non-interactive)` };
765
- }
766
- const choice = await ctx.ui.select(
767
- `\u26A0\uFE0F ${message}\n\nAllow execution?`,
768
- ["Block", "Allow once"],
769
- );
770
- if (!choice || choice === "Block") {
771
- return { block: true, reason: `\u26D4 ${message}` };
772
- }
773
- }
774
- }
775
- }
776
-
777
- // Gate 2: write/edit 写入保护路径
778
- if (event.toolName === "write" || event.toolName === "edit") {
779
- const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
780
- if (filePath) {
781
- const danger = checkProtectedPath(filePath);
782
- if (danger) {
783
- if (!ctx.hasUI) {
784
- return { block: true, reason: `\uD83D\uDD10 ${danger}\nmay contain sensitive information` };
785
- }
786
- const choice = await ctx.ui.select(
787
- `\uD83D\uDD10 ${danger}\nmay contain sensitive information\n\nProceed?`,
788
- ["Block", "Allow once"],
789
- );
790
- if (!choice || choice === "Block") {
791
- return { block: true, reason: `\uD83D\uDD10 ${danger}\nmay contain sensitive information` };
792
- }
793
- }
794
- }
795
- }
796
-
797
- // Gate 3: 写保护(已有内容的文件禁止 write,直接返回信息给 agent)
798
- if (event.toolName === "write") {
799
- const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
800
- if (filePath) {
801
- try {
802
- const abs = resolve(ctx.cwd, filePath);
803
- if (fs.existsSync(abs) && fs.readFileSync(abs, "utf8").length > 0) {
804
- return { block: true, reason: "Overwriting a non-empty file is dangerous, use the edit tool instead!" };
805
- }
806
- } catch { /* file doesn't exist */ }
807
- }
808
- }
809
-
810
- // Gate 4: read 工具读取保护路径(bash 读取已在 Gate 1 处理)
811
- if (event.toolName === "read") {
812
- const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
813
- if (filePath) {
814
- const danger = checkProtectedPath(filePath);
815
- if (danger) {
816
- if (!ctx.hasUI) {
817
- return { block: true, reason: `\uD83D\uDD10 Reading protected file: ${danger}\nmay contain sensitive information` };
818
- }
819
- const choice = await ctx.ui.select(
820
- `\uD83D\uDD10 Reading protected file: ${danger}\nmay contain sensitive information\n\nProceed?`,
821
- ["Block", "Allow once"],
822
- );
823
- if (!choice || choice === "Block") {
824
- return { block: true, reason: `\uD83D\uDD10 Reading protected file: ${danger}\nmay contain sensitive information` };
825
- }
826
- }
827
- }
828
- }
829
- });
830
-
831
- // ── Secret Redact (tool_result) ────────────────────────────────────────
832
-
833
- const handleToolResult = async (
834
- event: ToolResultEvent,
835
- ctx: ExtensionContext,
836
- ): Promise<{ content?: NonNullable<ToolResultEvent["content"]> } | void> => {
837
- if (!event.content || !Array.isArray(event.content)) return;
838
-
839
- // Only scan read tool output — other tools (bash, write, edit) are either
840
- // covered by path guards or produce git/diff noise that causes false positives.
841
- if (event.toolName !== "read") return;
842
-
843
- const textParts: Array<{ index: number; text: string; item: ToolTextContent }> = [];
844
- for (let i = 0; i < event.content.length; i++) {
845
- const item = event.content[i];
846
- if (item.type === "text" && typeof item.text === "string" && item.text.length > 0) {
847
- textParts.push({ index: i, text: item.text, item });
848
- }
849
- }
850
- if (textParts.length === 0) return;
851
-
852
- let totalCount = 0;
853
- const newContent = [...event.content];
854
-
855
- for (const { index, text, item } of textParts) {
856
- const matches = detectSecrets(text);
857
- if (matches.length === 0) continue;
858
-
859
- totalCount += matches.length;
860
- let redacted = text;
861
- for (const { start, end } of matches) {
862
- const original = redacted.slice(start, end);
863
- redacted = redacted.slice(0, start) + maskSecret(original) + redacted.slice(end);
864
- }
865
- const updatedItem: ToolTextContent = { ...item, text: redacted };
866
- newContent[index] = updatedItem;
867
- }
868
-
869
- if (totalCount === 0) return;
870
- const label = totalCount === 1 ? "1 secret" : `${totalCount} secrets`;
871
- ctx.ui.notify(`\uD83D\uDD10 Redacted ${label} in ${event.toolName} output`, "warning");
872
- return { content: newContent };
873
- };
874
-
875
- pi.on("tool_result", handleToolResult);
826
+ export function maskSecret(text: string, source?: SecretMatchSource): string {
827
+ const maskChar = getMaskChar(source);
828
+ if (text.length <= 6) return maskChar.repeat(text.length);
829
+ return text.slice(0, 3) + maskChar.repeat(text.length - 6) + text.slice(-3);
876
830
  }
831
+