decorated-pi 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Safety Detection — Shannon entropy and adjusted entropy analysis
3
+ */
4
+
5
+ import { isSafeContent } from "./patterns.js";
6
+
7
+ // ─── Character classification ────────────────────────────────────────────
8
+
9
+ /** Character class: U=uppercase, L=lowercase, D=digit, S=dash, X=other */
10
+ export function charClass(c: string): "U" | "L" | "D" | "S" | "X" {
11
+ const code = c.charCodeAt(0);
12
+ if (code >= 65 && code <= 90) return "U";
13
+ if (code >= 97 && code <= 122) return "L";
14
+ if (code >= 48 && code <= 57) return "D";
15
+ if (c === "-") return "S";
16
+ return "X";
17
+ }
18
+
19
+ // ─── Shannon entropy ──────────────────────────────────────────────────────
20
+
21
+ /** H(X) = -Σ p(x) · log₂(p(x)) */
22
+ export function shannonEntropy(data: string): number {
23
+ if (data.length === 0) return 0;
24
+ const freq = new Map<string, number>();
25
+ for (const char of data) freq.set(char, (freq.get(char) ?? 0) + 1);
26
+ let entropy = 0;
27
+ const len = data.length;
28
+ for (const count of freq.values()) {
29
+ const p = count / len;
30
+ entropy -= p * Math.log2(p);
31
+ }
32
+ return entropy;
33
+ }
34
+
35
+ // ─── Trigram density ──────────────────────────────────────────────────────
36
+
37
+ /**
38
+ * 3-character sliding window scoring.
39
+ * - Pure digits → 0
40
+ * - Letter↔Digit switch (digit in first position, e.g. 4Vi) → 1.0
41
+ * - Contains '-' with ≥3 distinct classes → 1.0
42
+ * - Case switch AbA pattern (≥2 uppercase + ≥1 lowercase) → 0.8
43
+ */
44
+ export function trigramScore(c1: string, c2: string, c3: string): number {
45
+ const cls = [charClass(c1), charClass(c2), charClass(c3)];
46
+ if (cls.includes("X")) return 0;
47
+ const unique = new Set(cls);
48
+ if (unique.size === 1 && cls[0] === "D") return 0;
49
+ if (cls.includes("S") && unique.size >= 3) return 1.0;
50
+ const hasDigit = cls.includes("D");
51
+ const hasLetter = cls.includes("L") || cls.includes("U");
52
+ if (hasDigit && hasLetter && cls[0] === "D") return 1.0;
53
+ const uCount = cls.filter(c => c === "U").length;
54
+ const lCount = cls.filter(c => c === "L").length;
55
+ if (uCount >= 2 && lCount >= 1) return 0.8;
56
+ return 0;
57
+ }
58
+
59
+ /** Split a token by X-class characters into independent segments. */
60
+ export function splitByXClass(token: string): string[] {
61
+ const segments: string[] = [];
62
+ let current = "";
63
+ for (const c of token) {
64
+ if (charClass(c) === "X") {
65
+ if (current.length >= 3) segments.push(current);
66
+ current = "";
67
+ } else {
68
+ current += c;
69
+ }
70
+ }
71
+ if (current.length >= 3) segments.push(current);
72
+ return segments;
73
+ }
74
+
75
+ /** Average trigram density for a single segment. */
76
+ export function segmentDensity(segment: string): number {
77
+ if (segment.length < 3) return 0;
78
+ let totalScore = 0;
79
+ for (let i = 0; i <= segment.length - 3; i++) {
80
+ totalScore += trigramScore(segment[i]!, segment[i + 1]!, segment[i + 2]!);
81
+ }
82
+ return totalScore / (segment.length - 2);
83
+ }
84
+
85
+ /** Maximum segment density across all X-split segments. */
86
+ export function maxSegmentDensity(token: string): number {
87
+ const segments = splitByXClass(token);
88
+ if (segments.length === 0) return 0;
89
+ let maxD = 0;
90
+ for (const seg of segments) {
91
+ const d = segmentDensity(seg);
92
+ if (d > maxD) maxD = d;
93
+ }
94
+ return maxD;
95
+ }
96
+
97
+ // ─── Word / dictionary / hex ratios ───────────────────────────────────────
98
+
99
+ /**
100
+ * Word ratio: fraction of token in vowel-containing alphabetic fragments
101
+ * ≥3 characters. Natural language words reduce secret likelihood.
102
+ */
103
+ export function computeWordRatio(token: string): number {
104
+ const letterSeqs: string[] = [];
105
+ let current = "";
106
+ for (const c of token) {
107
+ const cls = charClass(c);
108
+ if (cls === "L" || cls === "U") {
109
+ current += c.toLowerCase();
110
+ } else {
111
+ if (current.length >= 3) letterSeqs.push(current);
112
+ current = "";
113
+ }
114
+ }
115
+ if (current.length >= 3) letterSeqs.push(current);
116
+ const vowels = /[aeiou]/;
117
+ const words = letterSeqs.filter(seq => vowels.test(seq));
118
+ return words.length > 0 ? words.reduce((sum, w) => sum + w.length, 0) / token.length : 0;
119
+ }
120
+
121
+ export function computeHexRatio(token: string): number {
122
+ let hexCount = 0;
123
+ const len = token.length;
124
+ if (len === 0) return 0;
125
+ for (const c of token) {
126
+ if (/[0-9a-fA-F-]/.test(c)) hexCount++;
127
+ }
128
+ return hexCount / len;
129
+ }
130
+
131
+ /** 2121 English + tech words for dictionary coverage check */
132
+ const DICT_WORDS: ReadonlySet<string> = new Set(
133
+ // prettier-ignore
134
+ JSON.parse(`["ability","able","about","above","abstract","abuse","academic","accept","acceptance","accepted","access","accessories","accommodation","according","account","accounting","accounts","across","action","actions","active","activities","activity","actual","actually","added","addition","additional","address","adm","admin","administration","administrative","adult","advance","advanced","adventure","advertise","advertisement","advertising","advice","aes","affairs","affiliate","affiliates","africa","african","after","again","against","agencies","agency","agent","agents","agree","agreement","airport","album","allow","allowed","allows","almost","alone","along","already","also","alternative","although","always","amateur","amazon","america","american","among","amount","analysis","angeles","animal","animals","announcements","annual","another","answer","answers","anti","anyone","anything","apartments","api","apparel","appear","apple","application","applications","applied","apply","approach","appropriate","approval","approved","approximately","april","architecture","archive","archives","area","areas","argument","arizona","army","around","article","articles","artist","artists","arts","asia","asian","asked","assessment","assistance","assistant","associated","associates","association","attack","attention","attorney","auction","auctions","audio","august","australia","australian","auth","author","authority","authors","auto","automatically","automotive","availability","available","avenue","average","avg","avoid","award","awards","away","baby","back","background","balance","ball","band","bank","base","baseball","based","basic","basis","basket","battery","beach","beautiful","beauty","became","because","become","been","before","began","begin","beginning","behind","being","believe","below","benefit","benefits","best","better","between","beyond","bible","bill","birth","black","block","blog","blogs","blood","blue","board","boards","body","book","books","born","boston","both","bottom","boys","branch","brand","brands","break","breakfast","breast","bridge","bring","british","brought","brown","browse","browser","btn","budget","buf","build","building","built","bush","business","businesses","button","buyer","buying","cable","calendar","california","call","called","calls","came","camera","cameras","camp","campaign","campus","canada","canadian","cancer","canon","capacity","capital","card","cards","care","career","careers","carolina","cars","cart","case","cases","cash","casino","catalog","categories","category","cause","cb","cell","cells","center","centers","central","centre","century","certain","certificate","certified","cfg","chain","chair","challenge","chance","change","changed","changes","channel","chapter","character","characters","charge","charges","charles","chart","chat","cheap","check","chemical","chicago","chief","child","children","china","chinese","choice","choose","chris","christian","christmas","church","cities","city","civil","claim","claims","class","classes","classic","classifieds","clean","clear","cli","click","client","clients","clinical","close","closed","clothing","club","clubs","cnet","cnt","coast","code","codes","coffee","col","cold","collection","college","color","colorado","columbia","column","come","comes","coming","command","comment","comments","commerce","commercial","commission","committee","common","communication","communications","communities","community","companies","company","compare","compared","comparison","competition","complete","completed","complex","compliance","component","components","comprehensive","computer","computers","computing","condition","conditions","conference","configuration","congress","connect","connection","consider","considered","construction","consumer","contact","contacts","contains","content","contents","context","continue","continued","contract","control","cool","copy","copyright","core","corner","corporate","corporation","correct","cost","costs","could","council","count","counter","countries","country","county","couple","course","courses","court","cover","coverage","covered","cpu","create","created","creating","creative","credit","creek","crime","critical","cross","crud","css","csv","cultural","culture","currency","current","currently","custom","customer","customers","daily","damage","dance","dark","data","database","date","dates","dating","david","days","db","dead","deal","deals","death","debt","december","decision","deep","default","defense","define","defined","definition","degree","delivery","demand","department","described","description","design","designated","designed","desktop","detail","detailed","details","determine","determined","dev","develop","developed","developer","developing","development","device","devices","diamond","dictionary","died","diet","difference","different","difficult","digital","dir","direct","directions","directly","director","directory","disclaimer","discount","discuss","discussion","disease","disp","display","distance","distribution","district","division","dlg","dns","doctor","document","documentation","documents","does","doing","dollar","dollars","domain","domestic","done","door","double","down","download","downloads","draft","drive","driver","driving","drop","drug","drugs","dst","during","dvds","each","early","earth","easily","east","eastern","easy","ebay","economic","economy","edge","edit","edition","editor","education","educational","effect","effective","effects","effort","efforts","either","election","electric","electronic","electronics","element","elements","else","email","emergency","emit","employee","employees","employment","enable","ending","energy","engine","engineering","england","english","enjoy","enough","ensure","enter","enterprise","entertainment","entire","entries","entry","env","environment","environmental","equal","equipment","err","error","errors","especially","essential","established","estate","europe","european","evaluation","even","event","events","ever","every","everyone","everything","evidence","evt","example","examples","excellent","except","exchange","executive","exercise","existing","expect","expected","experience","expert","express","ext","extended","extension","external","extra","eyes","face","facilities","facility","fact","factor","factors","facts","faculty","failure","fair","faith","fall","families","family","fantasy","farm","fashion","fast","father","favorite","feat","feature","featured","features","february","federal","feed","feedback","feel","fees","feet","female","fiction","field","fields","figure","file","files","fill","film","films","filter","final","finally","finance","financial","find","finding","fine","fire","firm","first","fish","fishing","fitness","five","fixed","fixme","flag","flash","flat","flight","floor","florida","flow","flowers","focus","follow","following","follows","font","food","foot","football","force","ford","foreign","forest","form","format","former","forms","forum","forums","forward","found","foundation","four","frame","france","francisco","free","freedom","french","fresh","friday","friend","friendly","friends","from","front","ftr","fuel","full","fully","function","functional","functions","fund","funding","funds","furniture","further","future","galleries","gallery","game","games","gamma","garden","gave","gear","general","generally","generated","generation","george","georgia","german","germany","gets","getting","gid","gift","gifts","girl","girls","git","give","given","gives","giving","glass","global","goal","goals","goes","going","gold","golden","golf","gone","good","goods","google","government","gpt","gpu","grade","graduate","grand","grant","graphics","great","greater","green","ground","group","groups","growing","growth","grp","guarantee","guest","gui","guide","guidelines","guides","guitar","guys","hack","hair","half","hall","hand","hands","happy","hard","hardware","have","having","hdr","head","headlines","health","hear","heard","hearing","heart","heat","heavy","held","help","helpful","here","high","higher","highest","highly","hill","himself","hire","historical","history","hits","hold","holiday","holidays","home","homepage","homes","hook","hope","horse","hospital","host","hosting","hotel","hotels","hour","hours","house","housing","houston","however","html","huge","human","icon","idea","ideas","identify","idx","illinois","image","images","img","immediately","impact","implementation","important","improve","improvement","inch","include","included","includes","including","income","increase","increased","independent","index","india","indian","individual","individuals","industrial","industry","info","information","informed","initial","input","inside","install","installation","instead","institute","institutions","instructions","instruments","insurance","int","integrated","intended","interactive","interest","interested","interesting","interests","interface","internal","international","internet","into","introduction","investment","involved","ipod","iraq","ireland","isbn","island","islands","israel","issue","issues","italian","italy","item","items","itself","jack","jackson","james","january","japan","japanese","java","jersey","jesus","jewelry","jobs","john","johnson","join","joined","joint","jones","journal","json","july","jump","june","just","justice","kansas","keep","key","keyword","keywords","kids","kind","kinds","king","kingdom","kitchen","know","knowledge","known","kong","label","labor","lake","lan","land","language","languages","large","larger","largest","last","late","later","latest","latin","laws","lead","leader","leaders","leadership","leading","league","learn","learning","least","leather","leave","left","legal","len","length","lesbian","less","letter","letters","level","levels","lib","library","license","life","light","like","likely","limit","limited","line","lines","link","links","linux","list","listed","listen","listing","listings","lists","literature","little","live","lives","living","llm","load","loan","loans","local","located","location","locations","login","logo","london","long","longer","look","looking","looks","lord","loss","lost","lots","louis","love","lower","lowest","lyrics","mac","machine","machines","made","magazine","magazines","magic","mail","mailing","main","maintenance","major","make","makes","making","male","manage","management","manager","manual","manufacturer","manufacturing","many","maps","march","marine","mark","market","marketing","markets","martin","mary","mass","master","match","matching","material","materials","matter","mature","max","maximum","maybe","mean","means","measures","media","medical","medicine","medium","meet","meeting","meetings","mega","member","members","membership","memory","mental","menu","merchant","message","messages","metal","method","methods","mexico","michael","michigan","micro","microsoft","middle","might","mike","miles","military","million","min","mind","mini","minimum","minister","minnesota","minute","minutes","miss","missing","mission","mobile","mock","mod","mode","model","models","modern","modified","module","moment","monday","money","monitor","monitoring","month","monthly","months","more","morning","mortgage","most","mother","motion","motor","motorola","mount","mountain","move","moved","movement","movie","movies","moving","msg","much","multi","multimedia","multiple","museum","music","musical","must","myself","naked","name","names","nano","nation","national","native","natural","nature","nav","navigation","near","necessary","need","needed","needs","net","network","networking","networks","never","news","newsletter","next","nice","night","nlp","nokia","none","normal","north","northern","note","notes","nothing","notice","november","npm","num","number","numbers","nursing","oauth","object","october","offer","offered","offering","offers","office","officer","official","often","ohio","older","once","ones","online","only","ontario","open","opening","operating","operation","operations","opinion","opportunities","opportunity","ops","option","optional","options","oral","orange","order","orders","oregon","organization","organizations","original","orm","oss","other","others","otherwise","outdoor","output","outside","over","overall","overview","owned","owner","owners","pacific","pack","package","packages","page","pages","paid","pain","palm","panel","paper","paperback","papers","parent","parents","paris","park","parking","part","particular","particularly","parties","partner","partners","parts","party","pass","password","past","patch","path","patient","patients","paul","payment","paypal","peace","pennsylvania","people","percent","perfect","performance","perhaps","period","perm","permission","person","personal","persons","peter","phase","phentermine","phone","phones","photo","photography","photos","physical","pick","pics","picture","pictures","pid","piece","pink","pip","pipe","pkg","place","placed","places","plan","planning","plans","plant","plants","plastic","platform","play","played","player","players","playing","please","plus","pocket","point","points","poker","pol","police","policies","policy","political","politics","pool","poor","pop","popular","population","port","pos","position","positive","possible","post","posted","poster","posters","posts","potential","power","powered","practice","practices","premium","present","presentation","presented","president","press","pressure","pretty","prev","prevent","previous","price","prices","pricing","primary","prime","print","printer","printing","prior","privacy","private","pro","probably","problem","problems","procedure","procedures","process","processes","processing","prod","produce","produced","product","production","products","professional","professor","profile","profit","program","programme","programming","programs","progress","project","projects","properties","property","proposed","protect","protection","protein","provide","provided","provider","providers","provides","providing","ptr","public","publication","publications","published","publisher","publishing","purchase","purpose","purposes","quality","quantity","quarter","question","questions","quick","quickly","quite","quote","quotes","race","racing","radio","ram","random","range","rank","rate","rated","rates","rather","rating","ratings","reach","read","reader","readers","reading","ready","real","really","reason","reasons","receive","received","recent","recently","recipes","recommend","recommendations","recommended","record","records","recovery","reduce","ref","reference","references","regarding","region","regional","register","registered","registration","regular","regulations","related","relations","relationship","release","released","releases","relevant","religion","religious","remember","remote","remove","rent","rental","rentals","repair","replies","reply","report","reported","reporting","reports","republic","req","request","requests","require","required","requirements","requires","res","research","reserve","reserved","resolution","resort","resource","resources","respect","respective","response","responsibility","responsible","rest","restaurant","restaurants","result","results","retail","return","returns","rev","review","reviews","rich","richard","right","rights","ring","ringtones","risk","river","road","robert","rock","rol","role","room","rooms","root","rose","round","row","royal","rsa","rule","rules","running","russia","russian","safe","safety","said","saint","sale","sales","same","sample","samsung","santa","satellite","saturday","save","saying","says","scale","schedule","school","schools","science","sciences","scientific","score","scott","screen","sdk","search","searches","season","seattle","second","seconds","secretary","section","sections","sector","secure","security","seem","seems","seen","select","selected","selection","self","sell","seller","sellers","selling","send","senior","sense","sent","separate","september","sequence","series","serious","serve","server","servers","service","services","session","sets","setting","settings","seven","several","sha","shall","share","sheet","ship","shipping","ships","shirt","shirts","shoes","shop","shopping","shops","short","shot","should","show","showing","shown","shows","sid","side","sign","signed","significant","silver","similar","simple","simply","since","single","site","sitemap","sites","situation","size","skills","skin","skip","small","smart","smith","snow","social","society","soft","software","sold","solid","solution","solutions","some","someone","something","sometimes","song","songs","sony","soon","sorry","sort","sorted","sound","source","sources","south","southern","space","spain","spanish","special","species","specific","specified","speed","spirit","sponsored","sport","sports","spring","sql","square","src","sre","ssd","ssh","ssl","staff","stage","stand","standard","standards","star","stars","start","started","starting","state","statement","statements","states","station","statistics","status","stay","steel","step","steps","steve","still","stock","stone","stop","storage","store","stores","stories","story","str","strategies","strategy","stream","street","string","strong","structure","stub","student","students","studies","studio","study","stuff","style","subject","subjects","submit","submitted","subs","subscribe","success","successful","such","suggest","suite","sum","summary","summer","sunday","super","supplies","supply","support","supported","sure","surface","surgery","survey","switch","system","systems","tab","table","tables","tag","tags","take","taken","takes","taking","talk","talking","target","task","tcp","teacher","teachers","teaching","team","tech","technical","techniques","technologies","technology","teen","teens","telephone","television","tell","temp","temperature","term","terms","test","testing","tests","texas","text","than","thank","thanks","that","their","them","theme","themselves","then","theory","therapy","there","therefore","these","they","thing","things","think","thinking","third","this","thomas","those","though","thought","thoughts","thousands","thread","three","through","throughout","thursday","thus","tickets","tid","time","times","tip","tips","title","titles","tls","tmp","today","todo","together","told","took","tool","tools","topic","topics","total","touch","tour","tours","towards","town","toys","track","trade","trademarks","trading","traditional","traffic","training","transfer","transport","transportation","travel","treatment","tree","trial","trip","true","trust","truth","trying","tuesday","turn","type","types","udp","uid","under","understand","understanding","union","unique","unit","united","units","universal","university","unknown","unless","until","update","updated","updates","upgrade","upon","upper","urban","url","used","useful","user","username","users","uses","using","usr","usually","vacation","val","valid","valley","value","values","variable","variety","various","vegas","vehicle","vehicles","ver","version","very","video","videos","view","viewed","views","village","virginia","virtual","virus","vision","visit","visitors","visual","voice","volume","vote","vpn","wait","walk","wall","wan","want","wanted","warning","washington","waste","watch","watches","water","ways","weather","website","websites","wedding","wednesday","week","weekend","weekly","weeks","weight","welcome","well","went","were","west","western","what","when","where","whether","which","while","white","whole","wholesale","whose","wide","wife","wild","will","william","williams","wind","window","windows","wine","winter","wireless","wish","with","within","without","woman","women","wood","word","words","work","worked","workers","working","works","workshop","world","worldwide","worth","would","write","writing","written","wrong","wrote","xbox","xml","yahoo","yaml","year","years","yellow","yesterday","york","young","your","yourself","youth","zealand","zone"]`)
135
+ );
136
+
137
+ /**
138
+ * Dict ratio: fraction covered by dictionary words.
139
+ * High dict ratio → likely English text / identifier, not a secret.
140
+ */
141
+ export function computeDictRatio(token: string): number {
142
+ // Extract alphabetic sequences (>= 3 chars), case-insensitive
143
+ const lowerSeqs: string[] = [];
144
+ let current = "";
145
+ for (const c of token) {
146
+ const cls = charClass(c);
147
+ if (cls === "L" || cls === "U") {
148
+ current += c.toLowerCase();
149
+ } else {
150
+ if (current.length >= 3) lowerSeqs.push(current);
151
+ current = "";
152
+ }
153
+ }
154
+ if (current.length >= 3) lowerSeqs.push(current);
155
+
156
+ if (lowerSeqs.length === 0) return 0;
157
+
158
+ // Greedy match: find longest word at each position, then skip past it
159
+ let matchedChars = 0;
160
+ for (const seq of lowerSeqs) {
161
+ let pos = 0;
162
+ while (pos < seq.length) {
163
+ let longestMatch = 0;
164
+ for (let end = seq.length; end > pos; end--) {
165
+ if (DICT_WORDS.has(seq.slice(pos, end))) {
166
+ longestMatch = end - pos;
167
+ break;
168
+ }
169
+ }
170
+ if (longestMatch > 0) {
171
+ matchedChars += longestMatch;
172
+ pos += longestMatch;
173
+ } else {
174
+ pos++;
175
+ }
176
+ }
177
+ }
178
+
179
+ return token.length > 0 ? matchedChars / token.length : 0;
180
+ }
181
+
182
+ // ─── Adjusted entropy ─────────────────────────────────────────────────────
183
+
184
+ export const ENTROPY_THRESHOLD = 5.5;
185
+ export const MIN_ENTROPY_TOKEN_LENGTH = 32;
186
+ const W1_DENSITY = 3.0;
187
+ const W2_WORD = 3.0;
188
+ const W3_DICT = 4.0;
189
+ const HEX_PENALTY = 2.5;
190
+ const HEX_RATIO_THRESHOLD = 0.9;
191
+
192
+ /**
193
+ * Adjusted entropy:
194
+ * adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
195
+ *
196
+ * Hex penalty only applies for hyphenated UUID-like tokens
197
+ * (>90% hex AND contains '-').
198
+ */
199
+ export function calculateAdjustedEntropy(data: string): number {
200
+ const base = shannonEntropy(data);
201
+ const density = maxSegmentDensity(data);
202
+ const wordRatio = computeWordRatio(data);
203
+ const dictRatio = computeDictRatio(data);
204
+ const hexRatio = computeHexRatio(data);
205
+
206
+ const densityBoost = density * W1_DENSITY;
207
+ const wordPenalty = wordRatio * W2_WORD;
208
+ const dictPenalty = dictRatio * W3_DICT;
209
+ const hp = (hexRatio > HEX_RATIO_THRESHOLD && data.includes("-")) ? HEX_PENALTY : 0;
210
+ return base + densityBoost - wordPenalty - dictPenalty - hp;
211
+ }
212
+
213
+ export function isHighEntropy(data: string): boolean {
214
+ if (data.length < MIN_ENTROPY_TOKEN_LENGTH) return false;
215
+ if (isSafeContent(data)) return false;
216
+ return calculateAdjustedEntropy(data) > ENTROPY_THRESHOLD;
217
+ }
218
+
219
+ /**
220
+ * Split by whitespace — the most conservative tokenization.
221
+ * Preserves JSON structure, URLs, and connection strings.
222
+ */
223
+ export function findHighEntropyTokens(content: string): string[] {
224
+ const tokens = content.split(/[\s\[\]{}"',\/\\|()&#@!<>?]+/);
225
+ return tokens.filter(t => t.length >= MIN_ENTROPY_TOKEN_LENGTH && isHighEntropy(t));
226
+ }
@@ -1,11 +1,7 @@
1
1
  /**
2
2
  * Safety — Pi 集成层
3
3
  *
4
- * - Command Guard: 拦截危险 bash 命令
5
- * - Redirect Guard: bash 覆盖写入提示确认
6
- * - Protected Paths: write/edit/read 保护路径提示确认
7
- * - Write Guard: 覆盖非空文件禁止 write
8
- * - Secret Redact: API Key / Token 自动掩码
4
+ * - Secret Redact: API Key / Token 自动掩码
9
5
  */
10
6
 
11
7
  import type {
@@ -13,100 +9,36 @@ import type {
13
9
  ExtensionContext,
14
10
  ToolResultEvent,
15
11
  } from "@earendil-works/pi-coding-agent";
16
- import * as fs from "node:fs";
17
- import { resolve } from "node:path";
18
12
  import {
19
- checkProtectedPath,
20
- collectBashDangers,
21
- formatBashDangers,
22
13
  detectSecrets,
23
14
  maskSecret,
24
15
  } from "./detect.js";
25
16
 
26
17
  type ToolTextContent = Extract<NonNullable<ToolResultEvent["content"]>[number], { type: "text" }>;
27
18
 
19
+ function summarizeCommand(command: string, maxLength = 48): string {
20
+ const singleLine = command.replace(/\s+/g, " ").trim();
21
+ if (singleLine.length <= maxLength) return singleLine;
22
+ return `${singleLine.slice(0, maxLength - 1)}…`;
23
+ }
24
+
25
+ function formatRedactionContext(event: ToolResultEvent): string {
26
+ if (event.toolName === "read") {
27
+ const filePath = (event.input as any)?.path ?? (event.input as any)?.file ?? (event.input as any)?.file_path;
28
+ return filePath ? `read ${filePath}` : "read";
29
+ }
30
+ if (event.toolName === "bash") {
31
+ const command = (event.input as any)?.command;
32
+ return typeof command === "string" && command.trim().length > 0
33
+ ? `bash ${summarizeCommand(command)}`
34
+ : "bash";
35
+ }
36
+ return event.toolName;
37
+ }
38
+
28
39
  // ─── Setup ──────────────────────────────────────────────────────────────────
29
40
 
30
41
  export function setupSafety(pi: ExtensionAPI) {
31
- // ── Command Guard + Protected Paths + Write Guard (tool_call) ─────────
32
-
33
- pi.on("tool_call", async (event, ctx) => {
34
-
35
- // Gate 1: 危险命令 + 覆盖写入 + 读取保护路径
36
- if (event.toolName === "bash") {
37
- const command = (event.input as { command?: string }).command;
38
- if (command) {
39
- const dangers = collectBashDangers(command, ctx.cwd);
40
- if (dangers.length > 0) {
41
- const message = formatBashDangers(dangers)!;
42
- if (!ctx.hasUI) {
43
- return { block: true, reason: `⚠ ${message} (non-interactive)` };
44
- }
45
- const choice = await ctx.ui.select(
46
- `⚠️ ${message}\n\nAllow execution?`,
47
- ["Block", "Allow once"],
48
- );
49
- if (!choice || choice === "Block") {
50
- return { block: true, reason: `⚠ ${message}` };
51
- }
52
- }
53
- }
54
- }
55
-
56
- // Gate 2: write/edit 写入保护路径
57
- if (event.toolName === "write" || event.toolName === "edit") {
58
- const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
59
- if (filePath) {
60
- const danger = checkProtectedPath(filePath);
61
- if (danger) {
62
- if (!ctx.hasUI) {
63
- return { block: true, reason: `🔒 ${danger}\nmay contain sensitive information` };
64
- }
65
- const choice = await ctx.ui.select(
66
- `🔒 ${danger}\nmay contain sensitive information\n\nProceed?`,
67
- ["Block", "Allow once"],
68
- );
69
- if (!choice || choice === "Block") {
70
- return { block: true, reason: `🔒 ${danger}\nmay contain sensitive information` };
71
- }
72
- }
73
- }
74
- }
75
-
76
- // Gate 3: 写保护(已有内容的文件禁止 write,直接返回信息给 agent)
77
- if (event.toolName === "write") {
78
- const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
79
- if (filePath) {
80
- try {
81
- const abs = resolve(ctx.cwd, filePath);
82
- if (fs.existsSync(abs) && fs.readFileSync(abs, "utf8").length > 0) {
83
- return { block: true, reason: "Overwriting a non-empty file is dangerous, use the edit tool instead!" };
84
- }
85
- } catch { /* file doesn't exist */ }
86
- }
87
- }
88
-
89
- // Gate 4: read 工具读取保护路径(bash 读取已在 Gate 1 处理)
90
- if (event.toolName === "read") {
91
- const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
92
- if (filePath) {
93
- const danger = checkProtectedPath(filePath);
94
- if (danger) {
95
- if (!ctx.hasUI) {
96
- return { block: true, reason: `🔒 Reading protected file: ${danger}\nmay contain sensitive information` };
97
- }
98
- const choice = await ctx.ui.select(
99
- `🔒 Reading protected file: ${danger}\nmay contain sensitive information\n\nProceed?`,
100
- ["Block", "Allow once"],
101
- );
102
- if (!choice || choice === "Block") {
103
- return { block: true, reason: `🔒 Reading protected file: ${danger}\nmay contain sensitive information` };
104
- }
105
- }
106
- }
107
- }
108
- });
109
-
110
42
  // ── Secret Redact (tool_result) ────────────────────────────────────────
111
43
 
112
44
  const handleToolResult = async (
@@ -115,9 +47,10 @@ export function setupSafety(pi: ExtensionAPI) {
115
47
  ): Promise<{ content?: NonNullable<ToolResultEvent["content"]> } | void> => {
116
48
  if (!event.content || !Array.isArray(event.content)) return;
117
49
 
118
- // Only scan read tool output other tools (bash, write, edit) are either
119
- // covered by path guards or produce git/diff noise that causes false positives.
120
- if (event.toolName !== "read") return;
50
+ // Scan read + bash tool output. Skip write/edit/patch because they mainly
51
+ // produce diffs or generated file bodies, which are handled elsewhere and are
52
+ // more prone to noisy false positives.
53
+ if (event.toolName !== "read" && event.toolName !== "bash") return;
121
54
 
122
55
  const textParts: Array<{ index: number; text: string; item: ToolTextContent }> = [];
123
56
  for (let i = 0; i < event.content.length; i++) {
@@ -129,17 +62,25 @@ export function setupSafety(pi: ExtensionAPI) {
129
62
  if (textParts.length === 0) return;
130
63
 
131
64
  let totalCount = 0;
65
+ const counts: Record<"pattern" | "regex" | "entropy", number> = {
66
+ pattern: 0,
67
+ regex: 0,
68
+ entropy: 0,
69
+ };
132
70
  const newContent = [...event.content];
133
71
 
72
+ const filePath = (event.input as any)?.path ?? (event.input as any)?.file ?? (event.input as any)?.file_path;
73
+
134
74
  for (const { index, text, item } of textParts) {
135
- const matches = detectSecrets(text);
75
+ const matches = detectSecrets(text, { filePath });
136
76
  if (matches.length === 0) continue;
137
77
 
138
78
  totalCount += matches.length;
139
79
  let redacted = text;
140
- for (const { start, end } of matches) {
80
+ for (const { start, end, source } of matches) {
81
+ counts[source] += 1;
141
82
  const original = redacted.slice(start, end);
142
- redacted = redacted.slice(0, start) + maskSecret(original) + redacted.slice(end);
83
+ redacted = redacted.slice(0, start) + maskSecret(original, source) + redacted.slice(end);
143
84
  }
144
85
  const updatedItem: ToolTextContent = { ...item, text: redacted };
145
86
  newContent[index] = updatedItem;
@@ -147,9 +88,15 @@ export function setupSafety(pi: ExtensionAPI) {
147
88
 
148
89
  if (totalCount === 0) return;
149
90
  const label = totalCount === 1 ? "1 secret" : `${totalCount} secrets`;
150
- ctx.ui.notify(`🔒 Redacted ${label} in ${event.toolName} output`, "warning");
91
+ const breakdown: string[] = [];
92
+ if (counts.pattern > 0) breakdown.push(`*:pattern=${counts.pattern}`);
93
+ if (counts.regex > 0) breakdown.push(`#:regex=${counts.regex}`);
94
+ if (counts.entropy > 0) breakdown.push(`?:entropy=${counts.entropy}`);
95
+ const suffix = breakdown.length > 0 ? ` · ${breakdown.join(" ")}` : "";
96
+ const contextLabel = formatRedactionContext(event);
97
+ ctx.ui.notify(`🔒 [${contextLabel}] Redacted ${label}${suffix}`, "warning");
151
98
  return { content: newContent };
152
99
  };
153
100
 
154
101
  pi.on("tool_result", handleToolResult);
155
- }
102
+ }