decorated-pi 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Safety Detection — Shannon entropy and adjusted entropy analysis
3
+ */
4
+
5
+ import { isSafeContent } from "./patterns.js";
6
+
7
+ // ─── Character classification ────────────────────────────────────────────
8
+
9
+ /** Character class: U=uppercase, L=lowercase, D=digit, S=dash, X=other */
10
+ export function charClass(c: string): "U" | "L" | "D" | "S" | "X" {
11
+ const code = c.charCodeAt(0);
12
+ if (code >= 65 && code <= 90) return "U";
13
+ if (code >= 97 && code <= 122) return "L";
14
+ if (code >= 48 && code <= 57) return "D";
15
+ if (c === "-") return "S";
16
+ return "X";
17
+ }
18
+
19
+ // ─── Shannon entropy ──────────────────────────────────────────────────────
20
+
21
+ /** H(X) = -Σ p(x) · log₂(p(x)) */
22
+ export function shannonEntropy(data: string): number {
23
+ if (data.length === 0) return 0;
24
+ const freq = new Map<string, number>();
25
+ for (const char of data) freq.set(char, (freq.get(char) ?? 0) + 1);
26
+ let entropy = 0;
27
+ const len = data.length;
28
+ for (const count of freq.values()) {
29
+ const p = count / len;
30
+ entropy -= p * Math.log2(p);
31
+ }
32
+ return entropy;
33
+ }
34
+
35
+ // ─── Trigram density ──────────────────────────────────────────────────────
36
+
37
+ /**
38
+ * 3-character sliding window scoring.
39
+ * - Pure digits → 0
40
+ * - Letter↔Digit switch (digit in first position, e.g. 4Vi) → 1.0
41
+ * - Contains '-' with ≥3 distinct classes → 1.0
42
+ * - Case switch AbA pattern (≥2 uppercase + ≥1 lowercase) → 0.8
43
+ */
44
+ export function trigramScore(c1: string, c2: string, c3: string): number {
45
+ const cls = [charClass(c1), charClass(c2), charClass(c3)];
46
+ if (cls.includes("X")) return 0;
47
+ const unique = new Set(cls);
48
+ if (unique.size === 1 && cls[0] === "D") return 0;
49
+ if (cls.includes("S") && unique.size >= 3) return 1.0;
50
+ const hasDigit = cls.includes("D");
51
+ const hasLetter = cls.includes("L") || cls.includes("U");
52
+ if (hasDigit && hasLetter && cls[0] === "D") return 1.0;
53
+ const uCount = cls.filter(c => c === "U").length;
54
+ const lCount = cls.filter(c => c === "L").length;
55
+ if (uCount >= 2 && lCount >= 1) return 0.8;
56
+ return 0;
57
+ }
58
+
59
+ /** Split a token by X-class characters into independent segments. */
60
+ export function splitByXClass(token: string): string[] {
61
+ const segments: string[] = [];
62
+ let current = "";
63
+ for (const c of token) {
64
+ if (charClass(c) === "X") {
65
+ if (current.length >= 3) segments.push(current);
66
+ current = "";
67
+ } else {
68
+ current += c;
69
+ }
70
+ }
71
+ if (current.length >= 3) segments.push(current);
72
+ return segments;
73
+ }
74
+
75
+ /** Average trigram density for a single segment. */
76
+ export function segmentDensity(segment: string): number {
77
+ if (segment.length < 3) return 0;
78
+ let totalScore = 0;
79
+ for (let i = 0; i <= segment.length - 3; i++) {
80
+ totalScore += trigramScore(segment[i]!, segment[i + 1]!, segment[i + 2]!);
81
+ }
82
+ return totalScore / (segment.length - 2);
83
+ }
84
+
85
+ /** Maximum segment density across all X-split segments. */
86
+ export function maxSegmentDensity(token: string): number {
87
+ const segments = splitByXClass(token);
88
+ if (segments.length === 0) return 0;
89
+ let maxD = 0;
90
+ for (const seg of segments) {
91
+ const d = segmentDensity(seg);
92
+ if (d > maxD) maxD = d;
93
+ }
94
+ return maxD;
95
+ }
96
+
97
+ // ─── Word / dictionary / hex ratios ───────────────────────────────────────
98
+
99
+ /**
100
+ * Word ratio: fraction of token in vowel-containing alphabetic fragments
101
+ * ≥3 characters. Natural language words reduce secret likelihood.
102
+ */
103
+ export function computeWordRatio(token: string): number {
104
+ const letterSeqs: string[] = [];
105
+ let current = "";
106
+ for (const c of token) {
107
+ const cls = charClass(c);
108
+ if (cls === "L" || cls === "U") {
109
+ current += c.toLowerCase();
110
+ } else {
111
+ if (current.length >= 3) letterSeqs.push(current);
112
+ current = "";
113
+ }
114
+ }
115
+ if (current.length >= 3) letterSeqs.push(current);
116
+ const vowels = /[aeiou]/;
117
+ const words = letterSeqs.filter(seq => vowels.test(seq));
118
+ return words.length > 0 ? words.reduce((sum, w) => sum + w.length, 0) / token.length : 0;
119
+ }
120
+
121
+ export function computeHexRatio(token: string): number {
122
+ let hexCount = 0;
123
+ const len = token.length;
124
+ if (len === 0) return 0;
125
+ for (const c of token) {
126
+ if (/[0-9a-fA-F-]/.test(c)) hexCount++;
127
+ }
128
+ return hexCount / len;
129
+ }
130
+
131
+ /** 2121 English + tech words for dictionary coverage check */
132
+ const DICT_WORDS: ReadonlySet<string> = new Set(
133
+ // prettier-ignore
134
+ JSON.parse(`["ability","able","about","above","abstract","abuse","academic","accept","acceptance","accepted","access","accessories","accommodation","according","account","accounting","accounts","across","action","actions","active","activities","activity","actual","actually","added","addition","additional","address","adm","admin","administration","administrative","adult","advance","advanced","adventure","advertise","advertisement","advertising","advice","aes","affairs","affiliate","affiliates","africa","african","after","again","against","agencies","agency","agent","agents","agree","agreement","airport","album","allow","allowed","allows","almost","alone","along","already","also","alternative","although","always","amateur","amazon","america","american","among","amount","analysis","angeles","animal","animals","announcements","annual","another","answer","answers","anti","anyone","anything","apartments","api","apparel","appear","apple","application","applications","applied","apply","approach","appropriate","approval","approved","approximately","april","architecture","archive","archives","area","areas","argument","arizona","army","around","article","articles","artist","artists","arts","asia","asian","asked","assessment","assistance","assistant","associated","associates","association","attack","attention","attorney","auction","auctions","audio","august","australia","australian","auth","author","authority","authors","auto","automatically","automotive","availability","available","avenue","average","avg","avoid","award","awards","away","baby","back","background","balance","ball","band","bank","base","baseball","based","basic","basis","basket","battery","beach","beautiful","beauty","became","because","become","been","before","began","begin","beginning","behind","being","believe","below","benefit","benefits","best","better","between","beyond","bible","bill","birth","black","block","blog","blogs","blood","blue","board","boards","body","book","books","born","boston","both","bottom","boys","branch","brand","brands","break","breakfast","breast","bridge","bring","british","brought","brown","browse","browser","btn","budget","buf","build","building","built","bush","business","businesses","button","buyer","buying","cable","calendar","california","call","called","calls","came","camera","cameras","camp","campaign","campus","canada","canadian","cancer","canon","capacity","capital","card","cards","care","career","careers","carolina","cars","cart","case","cases","cash","casino","catalog","categories","category","cause","cb","cell","cells","center","centers","central","centre","century","certain","certificate","certified","cfg","chain","chair","challenge","chance","change","changed","changes","channel","chapter","character","characters","charge","charges","charles","chart","chat","cheap","check","chemical","chicago","chief","child","children","china","chinese","choice","choose","chris","christian","christmas","church","cities","city","civil","claim","claims","class","classes","classic","classifieds","clean","clear","cli","click","client","clients","clinical","close","closed","clothing","club","clubs","cnet","cnt","coast","code","codes","coffee","col","cold","collection","college","color","colorado","columbia","column","come","comes","coming","command","comment","comments","commerce","commercial","commission","committee","common","communication","communications","communities","community","companies","company","compare","compared","comparison","competition","complete","completed","complex","compliance","component","components","comprehensive","computer","computers","computing","condition","conditions","conference","configuration","congress","connect","connection","consider","considered","construction","consumer","contact","contacts","contains","content","contents","context","continue","continued","contract","control","cool","copy","copyright","core","corner","corporate","corporation","correct","cost","costs","could","council","count","counter","countries","country","county","couple","course","courses","court","cover","coverage","covered","cpu","create","created","creating","creative","credit","creek","crime","critical","cross","crud","css","csv","cultural","culture","currency","current","currently","custom","customer","customers","daily","damage","dance","dark","data","database","date","dates","dating","david","days","db","dead","deal","deals","death","debt","december","decision","deep","default","defense","define","defined","definition","degree","delivery","demand","department","described","description","design","designated","designed","desktop","detail","detailed","details","determine","determined","dev","develop","developed","developer","developing","development","device","devices","diamond","dictionary","died","diet","difference","different","difficult","digital","dir","direct","directions","directly","director","directory","disclaimer","discount","discuss","discussion","disease","disp","display","distance","distribution","district","division","dlg","dns","doctor","document","documentation","documents","does","doing","dollar","dollars","domain","domestic","done","door","double","down","download","downloads","draft","drive","driver","driving","drop","drug","drugs","dst","during","dvds","each","early","earth","easily","east","eastern","easy","ebay","economic","economy","edge","edit","edition","editor","education","educational","effect","effective","effects","effort","efforts","either","election","electric","electronic","electronics","element","elements","else","email","emergency","emit","employee","employees","employment","enable","ending","energy","engine","engineering","england","english","enjoy","enough","ensure","enter","enterprise","entertainment","entire","entries","entry","env","environment","environmental","equal","equipment","err","error","errors","especially","essential","established","estate","europe","european","evaluation","even","event","events","ever","every","everyone","everything","evidence","evt","example","examples","excellent","except","exchange","executive","exercise","existing","expect","expected","experience","expert","express","ext","extended","extension","external","extra","eyes","face","facilities","facility","fact","factor","factors","facts","faculty","failure","fair","faith","fall","families","family","fantasy","farm","fashion","fast","father","favorite","feat","feature","featured","features","february","federal","feed","feedback","feel","fees","feet","female","fiction","field","fields","figure","file","files","fill","film","films","filter","final","finally","finance","financial","find","finding","fine","fire","firm","first","fish","fishing","fitness","five","fixed","fixme","flag","flash","flat","flight","floor","florida","flow","flowers","focus","follow","following","follows","font","food","foot","football","force","ford","foreign","forest","form","format","former","forms","forum","forums","forward","found","foundation","four","frame","france","francisco","free","freedom","french","fresh","friday","friend","friendly","friends","from","front","ftr","fuel","full","fully","function","functional","functions","fund","funding","funds","furniture","further","future","galleries","gallery","game","games","gamma","garden","gave","gear","general","generally","generated","generation","george","georgia","german","germany","gets","getting","gid","gift","gifts","girl","girls","git","give","given","gives","giving","glass","global","goal","goals","goes","going","gold","golden","golf","gone","good","goods","google","government","gpt","gpu","grade","graduate","grand","grant","graphics","great","greater","green","ground","group","groups","growing","growth","grp","guarantee","guest","gui","guide","guidelines","guides","guitar","guys","hack","hair","half","hall","hand","hands","happy","hard","hardware","have","having","hdr","head","headlines","health","hear","heard","hearing","heart","heat","heavy","held","help","helpful","here","high","higher","highest","highly","hill","himself","hire","historical","history","hits","hold","holiday","holidays","home","homepage","homes","hook","hope","horse","hospital","host","hosting","hotel","hotels","hour","hours","house","housing","houston","however","html","huge","human","icon","idea","ideas","identify","idx","illinois","image","images","img","immediately","impact","implementation","important","improve","improvement","inch","include","included","includes","including","income","increase","increased","independent","index","india","indian","individual","individuals","industrial","industry","info","information","informed","initial","input","inside","install","installation","instead","institute","institutions","instructions","instruments","insurance","int","integrated","intended","interactive","interest","interested","interesting","interests","interface","internal","international","internet","into","introduction","investment","involved","ipod","iraq","ireland","isbn","island","islands","israel","issue","issues","italian","italy","item","items","itself","jack","jackson","james","january","japan","japanese","java","jersey","jesus","jewelry","jobs","john","johnson","join","joined","joint","jones","journal","json","july","jump","june","just","justice","kansas","keep","key","keyword","keywords","kids","kind","kinds","king","kingdom","kitchen","know","knowledge","known","kong","label","labor","lake","lan","land","language","languages","large","larger","largest","last","late","later","latest","latin","laws","lead","leader","leaders","leadership","leading","league","learn","learning","least","leather","leave","left","legal","len","length","lesbian","less","letter","letters","level","levels","lib","library","license","life","light","like","likely","limit","limited","line","lines","link","links","linux","list","listed","listen","listing","listings","lists","literature","little","live","lives","living","llm","load","loan","loans","local","located","location","locations","login","logo","london","long","longer","look","looking","looks","lord","loss","lost","lots","louis","love","lower","lowest","lyrics","mac","machine","machines","made","magazine","magazines","magic","mail","mailing","main","maintenance","major","make","makes","making","male","manage","management","manager","manual","manufacturer","manufacturing","many","maps","march","marine","mark","market","marketing","markets","martin","mary","mass","master","match","matching","material","materials","matter","mature","max","maximum","maybe","mean","means","measures","media","medical","medicine","medium","meet","meeting","meetings","mega","member","members","membership","memory","mental","menu","merchant","message","messages","metal","method","methods","mexico","michael","michigan","micro","microsoft","middle","might","mike","miles","military","million","min","mind","mini","minimum","minister","minnesota","minute","minutes","miss","missing","mission","mobile","mock","mod","mode","model","models","modern","modified","module","moment","monday","money","monitor","monitoring","month","monthly","months","more","morning","mortgage","most","mother","motion","motor","motorola","mount","mountain","move","moved","movement","movie","movies","moving","msg","much","multi","multimedia","multiple","museum","music","musical","must","myself","naked","name","names","nano","nation","national","native","natural","nature","nav","navigation","near","necessary","need","needed","needs","net","network","networking","networks","never","news","newsletter","next","nice","night","nlp","nokia","none","normal","north","northern","note","notes","nothing","notice","november","npm","num","number","numbers","nursing","oauth","object","october","offer","offered","offering","offers","office","officer","official","often","ohio","older","once","ones","online","only","ontario","open","opening","operating","operation","operations","opinion","opportunities","opportunity","ops","option","optional","options","oral","orange","order","orders","oregon","organization","organizations","original","orm","oss","other","others","otherwise","outdoor","output","outside","over","overall","overview","owned","owner","owners","pacific","pack","package","packages","page","pages","paid","pain","palm","panel","paper","paperback","papers","parent","parents","paris","park","parking","part","particular","particularly","parties","partner","partners","parts","party","pass","password","past","patch","path","patient","patients","paul","payment","paypal","peace","pennsylvania","people","percent","perfect","performance","perhaps","period","perm","permission","person","personal","persons","peter","phase","phentermine","phone","phones","photo","photography","photos","physical","pick","pics","picture","pictures","pid","piece","pink","pip","pipe","pkg","place","placed","places","plan","planning","plans","plant","plants","plastic","platform","play","played","player","players","playing","please","plus","pocket","point","points","poker","pol","police","policies","policy","political","politics","pool","poor","pop","popular","population","port","pos","position","positive","possible","post","posted","poster","posters","posts","potential","power","powered","practice","practices","premium","present","presentation","presented","president","press","pressure","pretty","prev","prevent","previous","price","prices","pricing","primary","prime","print","printer","printing","prior","privacy","private","pro","probably","problem","problems","procedure","procedures","process","processes","processing","prod","produce","produced","product","production","products","professional","professor","profile","profit","program","programme","programming","programs","progress","project","projects","properties","property","proposed","protect","protection","protein","provide","provided","provider","providers","provides","providing","ptr","public","publication","publications","published","publisher","publishing","purchase","purpose","purposes","quality","quantity","quarter","question","questions","quick","quickly","quite","quote","quotes","race","racing","radio","ram","random","range","rank","rate","rated","rates","rather","rating","ratings","reach","read","reader","readers","reading","ready","real","really","reason","reasons","receive","received","recent","recently","recipes","recommend","recommendations","recommended","record","records","recovery","reduce","ref","reference","references","regarding","region","regional","register","registered","registration","regular","regulations","related","relations","relationship","release","released","releases","relevant","religion","religious","remember","remote","remove","rent","rental","rentals","repair","replies","reply","report","reported","reporting","reports","republic","req","request","requests","require","required","requirements","requires","res","research","reserve","reserved","resolution","resort","resource","resources","respect","respective","response","responsibility","responsible","rest","restaurant","restaurants","result","results","retail","return","returns","rev","review","reviews","rich","richard","right","rights","ring","ringtones","risk","river","road","robert","rock","rol","role","room","rooms","root","rose","round","row","royal","rsa","rule","rules","running","russia","russian","safe","safety","said","saint","sale","sales","same","sample","samsung","santa","satellite","saturday","save","saying","says","scale","schedule","school","schools","science","sciences","scientific","score","scott","screen","sdk","search","searches","season","seattle","second","seconds","secretary","section","sections","sector","secure","security","seem","seems","seen","select","selected","selection","self","sell","seller","sellers","selling","send","senior","sense","sent","separate","september","sequence","series","serious","serve","server","servers","service","services","session","sets","setting","settings","seven","several","sha","shall","share","sheet","ship","shipping","ships","shirt","shirts","shoes","shop","shopping","shops","short","shot","should","show","showing","shown","shows","sid","side","sign","signed","significant","silver","similar","simple","simply","since","single","site","sitemap","sites","situation","size","skills","skin","skip","small","smart","smith","snow","social","society","soft","software","sold","solid","solution","solutions","some","someone","something","sometimes","song","songs","sony","soon","sorry","sort","sorted","sound","source","sources","south","southern","space","spain","spanish","special","species","specific","specified","speed","spirit","sponsored","sport","sports","spring","sql","square","src","sre","ssd","ssh","ssl","staff","stage","stand","standard","standards","star","stars","start","started","starting","state","statement","statements","states","station","statistics","status","stay","steel","step","steps","steve","still","stock","stone","stop","storage","store","stores","stories","story","str","strategies","strategy","stream","street","string","strong","structure","stub","student","students","studies","studio","study","stuff","style","subject","subjects","submit","submitted","subs","subscribe","success","successful","such","suggest","suite","sum","summary","summer","sunday","super","supplies","supply","support","supported","sure","surface","surgery","survey","switch","system","systems","tab","table","tables","tag","tags","take","taken","takes","taking","talk","talking","target","task","tcp","teacher","teachers","teaching","team","tech","technical","techniques","technologies","technology","teen","teens","telephone","television","tell","temp","temperature","term","terms","test","testing","tests","texas","text","than","thank","thanks","that","their","them","theme","themselves","then","theory","therapy","there","therefore","these","they","thing","things","think","thinking","third","this","thomas","those","though","thought","thoughts","thousands","thread","three","through","throughout","thursday","thus","tickets","tid","time","times","tip","tips","title","titles","tls","tmp","today","todo","together","told","took","tool","tools","topic","topics","total","touch","tour","tours","towards","town","toys","track","trade","trademarks","trading","traditional","traffic","training","transfer","transport","transportation","travel","treatment","tree","trial","trip","true","trust","truth","trying","tuesday","turn","type","types","udp","uid","under","understand","understanding","union","unique","unit","united","units","universal","university","unknown","unless","until","update","updated","updates","upgrade","upon","upper","urban","url","used","useful","user","username","users","uses","using","usr","usually","vacation","val","valid","valley","value","values","variable","variety","various","vegas","vehicle","vehicles","ver","version","very","video","videos","view","viewed","views","village","virginia","virtual","virus","vision","visit","visitors","visual","voice","volume","vote","vpn","wait","walk","wall","wan","want","wanted","warning","washington","waste","watch","watches","water","ways","weather","website","websites","wedding","wednesday","week","weekend","weekly","weeks","weight","welcome","well","went","were","west","western","what","when","where","whether","which","while","white","whole","wholesale","whose","wide","wife","wild","will","william","williams","wind","window","windows","wine","winter","wireless","wish","with","within","without","woman","women","wood","word","words","work","worked","workers","working","works","workshop","world","worldwide","worth","would","write","writing","written","wrong","wrote","xbox","xml","yahoo","yaml","year","years","yellow","yesterday","york","young","your","yourself","youth","zealand","zone"]`)
135
+ );
136
+
137
+ /**
138
+ * Dict ratio: fraction covered by dictionary words.
139
+ * High dict ratio → likely English text / identifier, not a secret.
140
+ */
141
+ export function computeDictRatio(token: string): number {
142
+ // Extract alphabetic sequences (>= 3 chars), case-insensitive
143
+ const lowerSeqs: string[] = [];
144
+ let current = "";
145
+ for (const c of token) {
146
+ const cls = charClass(c);
147
+ if (cls === "L" || cls === "U") {
148
+ current += c.toLowerCase();
149
+ } else {
150
+ if (current.length >= 3) lowerSeqs.push(current);
151
+ current = "";
152
+ }
153
+ }
154
+ if (current.length >= 3) lowerSeqs.push(current);
155
+
156
+ if (lowerSeqs.length === 0) return 0;
157
+
158
+ // Greedy match: find longest word at each position, then skip past it
159
+ let matchedChars = 0;
160
+ for (const seq of lowerSeqs) {
161
+ let pos = 0;
162
+ while (pos < seq.length) {
163
+ let longestMatch = 0;
164
+ for (let end = seq.length; end > pos; end--) {
165
+ if (DICT_WORDS.has(seq.slice(pos, end))) {
166
+ longestMatch = end - pos;
167
+ break;
168
+ }
169
+ }
170
+ if (longestMatch > 0) {
171
+ matchedChars += longestMatch;
172
+ pos += longestMatch;
173
+ } else {
174
+ pos++;
175
+ }
176
+ }
177
+ }
178
+
179
+ return token.length > 0 ? matchedChars / token.length : 0;
180
+ }
181
+
182
+ // ─── Adjusted entropy ─────────────────────────────────────────────────────
183
+
184
+ export const ENTROPY_THRESHOLD = 5.5;
185
+ export const MIN_ENTROPY_TOKEN_LENGTH = 32;
186
+ const W1_DENSITY = 3.0;
187
+ const W2_WORD = 3.0;
188
+ const W3_DICT = 4.0;
189
+ const HEX_PENALTY = 2.5;
190
+ const HEX_RATIO_THRESHOLD = 0.9;
191
+
192
+ /**
193
+ * Adjusted entropy:
194
+ * adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
195
+ *
196
+ * Hex penalty only applies for hyphenated UUID-like tokens
197
+ * (>90% hex AND contains '-').
198
+ */
199
+ export function calculateAdjustedEntropy(data: string): number {
200
+ const base = shannonEntropy(data);
201
+ const density = maxSegmentDensity(data);
202
+ const wordRatio = computeWordRatio(data);
203
+ const dictRatio = computeDictRatio(data);
204
+ const hexRatio = computeHexRatio(data);
205
+
206
+ const densityBoost = density * W1_DENSITY;
207
+ const wordPenalty = wordRatio * W2_WORD;
208
+ const dictPenalty = dictRatio * W3_DICT;
209
+ const hp = (hexRatio > HEX_RATIO_THRESHOLD && data.includes("-")) ? HEX_PENALTY : 0;
210
+ return base + densityBoost - wordPenalty - dictPenalty - hp;
211
+ }
212
+
213
+ export function isHighEntropy(data: string): boolean {
214
+ if (data.length < MIN_ENTROPY_TOKEN_LENGTH) return false;
215
+ if (isSafeContent(data)) return false;
216
+ return calculateAdjustedEntropy(data) > ENTROPY_THRESHOLD;
217
+ }
218
+
219
+ /**
220
+ * Split by whitespace — the most conservative tokenization.
221
+ * Preserves JSON structure, URLs, and connection strings.
222
+ */
223
+ export function findHighEntropyTokens(content: string): string[] {
224
+ const tokens = content.split(/[\s\[\]{}"',\/\\|()&#@!<>?]+/);
225
+ return tokens.filter(t => t.length >= MIN_ENTROPY_TOKEN_LENGTH && isHighEntropy(t));
226
+ }
@@ -1,11 +1,7 @@
1
1
  /**
2
2
  * Safety — Pi 集成层
3
3
  *
4
- * - Command Guard: 拦截危险 bash 命令
5
- * - Redirect Guard: bash 覆盖写入提示确认
6
- * - Protected Paths: write/edit/patch/read 保护路径提示确认
7
- * - Write Guard: 覆盖非空文件禁止 write (提示使用 patch)
8
- * - Secret Redact: API Key / Token 自动掩码
4
+ * - Secret Redact: API Key / Token 自动掩码
9
5
  */
10
6
 
11
7
  import type {
@@ -13,12 +9,7 @@ import type {
13
9
  ExtensionContext,
14
10
  ToolResultEvent,
15
11
  } from "@earendil-works/pi-coding-agent";
16
- import * as fs from "node:fs";
17
- import { resolve } from "node:path";
18
12
  import {
19
- checkProtectedPath,
20
- collectBashDangers,
21
- formatBashDangers,
22
13
  detectSecrets,
23
14
  maskSecret,
24
15
  } from "./detect.js";
@@ -48,89 +39,6 @@ function formatRedactionContext(event: ToolResultEvent): string {
48
39
  // ─── Setup ──────────────────────────────────────────────────────────────────
49
40
 
50
41
  export function setupSafety(pi: ExtensionAPI) {
51
- // ── Command Guard + Protected Paths + Write Guard (tool_call) ─────────
52
-
53
- pi.on("tool_call", async (event, ctx) => {
54
-
55
- // Gate 1: 危险命令 + 覆盖写入 + 读取保护路径
56
- if (event.toolName === "bash") {
57
- const command = (event.input as { command?: string }).command;
58
- if (command) {
59
- const dangers = collectBashDangers(command, ctx.cwd);
60
- if (dangers.length > 0) {
61
- const message = formatBashDangers(dangers)!;
62
- if (!ctx.hasUI) {
63
- return { block: true, reason: `⚠ ${message} (non-interactive)` };
64
- }
65
- const choice = await ctx.ui.select(
66
- `⚠️ ${message}\n\nAllow execution?`,
67
- ["Block", "Allow once"],
68
- );
69
- if (!choice || choice === "Block") {
70
- return { block: true, reason: `⚠ ${message}` };
71
- }
72
- }
73
- }
74
- }
75
-
76
- // Gate 2: write/edit/patch 写入保护路径
77
- if (event.toolName === "write" || event.toolName === "edit" || event.toolName === "patch") {
78
- // For write/edit, path is a single field; for patch, check all patches[].path
79
- const filePaths: string[] = event.toolName === "patch"
80
- ? (event.input as any).patches?.filter((p: any) => p?.path).map((p: any) => p.path) ?? []
81
- : [(event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path].filter(Boolean);
82
- for (const filePath of filePaths) {
83
- const danger = checkProtectedPath(filePath);
84
- if (danger) {
85
- if (!ctx.hasUI) {
86
- return { block: true, reason: `🔒 ${danger}\nmay contain sensitive information` };
87
- }
88
- const choice = await ctx.ui.select(
89
- `🔒 ${danger}\nmay contain sensitive information\n\nProceed?`,
90
- ["Block", "Allow once"],
91
- );
92
- if (!choice || choice === "Block") {
93
- return { block: true, reason: `🔒 ${danger}\nmay contain sensitive information` };
94
- }
95
- break; // User approved — skip remaining paths
96
- }
97
- }
98
- }
99
-
100
- // Gate 3: 写保护(已有内容的文件禁止 write,直接返回信息给 agent)
101
- if (event.toolName === "write") {
102
- const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
103
- if (filePath) {
104
- try {
105
- const abs = resolve(ctx.cwd, filePath);
106
- if (fs.existsSync(abs) && fs.readFileSync(abs, "utf8").length > 0) {
107
- return { block: true, reason: "Overwriting a non-empty file is dangerous, use the patch tool instead!" };
108
- }
109
- } catch { /* file doesn't exist */ }
110
- }
111
- }
112
-
113
- // Gate 4: read 工具读取保护路径(bash 读取已在 Gate 1 处理)
114
- if (event.toolName === "read") {
115
- const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
116
- if (filePath) {
117
- const danger = checkProtectedPath(filePath);
118
- if (danger) {
119
- if (!ctx.hasUI) {
120
- return { block: true, reason: `🔒 Reading protected file: ${danger}\nmay contain sensitive information` };
121
- }
122
- const choice = await ctx.ui.select(
123
- `🔒 Reading protected file: ${danger}\nmay contain sensitive information\n\nProceed?`,
124
- ["Block", "Allow once"],
125
- );
126
- if (!choice || choice === "Block") {
127
- return { block: true, reason: `🔒 Reading protected file: ${danger}\nmay contain sensitive information` };
128
- }
129
- }
130
- }
131
- }
132
- });
133
-
134
42
  // ── Secret Redact (tool_result) ────────────────────────────────────────
135
43
 
136
44
  const handleToolResult = async (
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Safety Detection — known secret patterns and safe-pattern exclusions
3
+ */
4
+
5
+ import { basename, extname } from "node:path";
6
+ import {
7
+ type SecretPattern,
8
+ type ConfigStringEntry,
9
+ CONFIG_FILE_EXTENSIONS,
10
+ CONFIG_BASENAME_REGEX,
11
+ SENSITIVE_CONFIG_KEY_REGEX,
12
+ PLACEHOLDER_VALUE_REGEX,
13
+ CONFIG_VALUE_MIN_LENGTH,
14
+ } from "./types.js";
15
+
16
+ // ─── High-confidence Secret Patterns (40+ known formats) ─────────────────
17
+
18
+ export const SECRET_PATTERNS: SecretPattern[] = [
19
+ // AWS
20
+ { name: "AWS Access Key ID", pattern: /AKIA[0-9A-Z]{16}/, minLength: 16, allowsSpaces: false, highConfidence: true },
21
+ { name: "AWS Secret Access Key", pattern: /(?:aws)?_?(?:secret)?_?(?:access)?_?key['"\s:=]+['"]?[0-9a-zA-Z/+]{40}['"]?/i, minLength: 30, allowsSpaces: false, highConfidence: true },
22
+ // GitHub
23
+ { name: "GitHub OAuth Token", pattern: /gho_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
24
+ { name: "GitHub App Token", pattern: /(?:ghu|ghs)_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
25
+ { name: "GitHub PAT", pattern: /ghp_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
26
+ { name: "GitHub Fine-Grained Token", pattern: /github_pat_[0-9a-zA-Z_]{22,}/, minLength: 26, allowsSpaces: false, highConfidence: true },
27
+ // GitLab
28
+ { name: "GitLab PAT", pattern: /glpat-[0-9a-zA-Z\-_]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
29
+ { name: "GitLab Runner Token", pattern: /glrt-[0-9a-zA-Z_\-]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
30
+ // Slack
31
+ { name: "Slack Token", pattern: /xox[baprs]-[0-9a-zA-Z\-]{10,48}/, minLength: 15, allowsSpaces: false, highConfidence: true },
32
+ { name: "Slack Webhook URL", pattern: /https:\/\/hooks\.slack\.com\/services\/T[a-zA-Z0-9_]{8,}\/B[a-zA-Z0-9_]{8,}\/[a-zA-Z0-9_]{24}/, minLength: 60, allowsSpaces: false, highConfidence: true },
33
+ // JWT
34
+ { name: "JSON Web Token", pattern: /eyJ[a-zA-Z0-9_-]{10,}\.eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/, minLength: 36, allowsSpaces: false, highConfidence: true },
35
+ // Google
36
+ { name: "Google API Key", pattern: /AIza[0-9A-Za-z\-_]{35}/, minLength: 35, allowsSpaces: false, highConfidence: true },
37
+ { name: "Google OAuth Token", pattern: /ya29\.[0-9A-Za-z\-_]+/, minLength: 10, allowsSpaces: false, highConfidence: true },
38
+ // Stripe
39
+ { name: "Stripe Secret Key", pattern: /sk_live_[0-9a-zA-Z]{24,}/, minLength: 24, allowsSpaces: false, highConfidence: true },
40
+ { name: "Stripe Restricted Key", pattern: /rk_live_[0-9a-zA-Z]{24,}/, minLength: 24, allowsSpaces: false, highConfidence: true },
41
+ // Twilio / SendGrid / Discord
42
+ { name: "Twilio API Key", pattern: /SK[a-z0-9]{32}/, minLength: 30, allowsSpaces: false, highConfidence: true },
43
+ { name: "SendGrid API Key", pattern: /SG\.[a-zA-Z0-9_-]{22,}\.[a-zA-Z0-9_-]{40,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
44
+ { name: "Discord Bot Token", pattern: /[MN][A-Za-z\d]{23,}\.[\w-]{6}\.[\w-]{27,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
45
+ // OpenAI / Anthropic / Volcengine Ark
46
+ { name: "OpenAI API Key", pattern: /sk-[a-zA-Z0-9]{20,}T3BlbkFJ[a-zA-Z0-9]{20,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
47
+ { name: "OpenAI API Key (New)", pattern: /sk-(?:proj-)?[a-zA-Z0-9\-_]{40,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
48
+ { name: "Anthropic API Key", pattern: /sk-ant-api[0-9]{2}-[a-zA-Z0-9\-_]{80,}/, minLength: 80, allowsSpaces: false, highConfidence: true },
49
+ { name: "Volcengine Ark API Key", pattern: /ark-[a-zA-Z0-9\-_]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
50
+ // NPM / PyPI
51
+ { name: "NPM Token", pattern: /npm_[a-zA-Z0-9]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
52
+ { name: "PyPI Token", pattern: /pypi-[a-zA-Z0-9_\-]{50,}/, minLength: 50, allowsSpaces: false, highConfidence: true },
53
+ // Private Keys
54
+ { name: "RSA Private Key", pattern: /-----BEGIN RSA PRIVATE KEY-----\r?\n(?:[A-Za-z0-9+/=]+\r?\n)+-----END RSA PRIVATE KEY-----/, minLength: 40, allowsSpaces: true, highConfidence: true },
55
+ { name: "OpenSSH Private Key", pattern: /-----BEGIN OPENSSH PRIVATE KEY-----\r?\n(?:[A-Za-z0-9+/=]+\r?\n)+-----END OPENSSH PRIVATE KEY-----/, minLength: 40, allowsSpaces: true, highConfidence: true },
56
+ { name: "EC Private Key", pattern: /-----BEGIN EC PRIVATE KEY-----\r?\n(?:[A-Za-z0-9+/=]+\r?\n)+-----END EC PRIVATE KEY-----/, minLength: 40, allowsSpaces: true, highConfidence: true },
57
+ { name: "PGP Private Key", pattern: /-----BEGIN PGP PRIVATE KEY BLOCK-----\r?\n(?:[A-Za-z0-9+/=]+\r?\n)+-----END PGP PRIVATE KEY BLOCK-----/, minLength: 40, allowsSpaces: true, highConfidence: true },
58
+ { name: "Generic Private Key", pattern: /-----BEGIN (ENCRYPTED )?PRIVATE KEY-----\r?\n(?:[A-Za-z0-9+/=]+\r?\n)+-----END \1PRIVATE KEY-----/, minLength: 40, allowsSpaces: true, highConfidence: true },
59
+ // Database URIs
60
+ { name: "MongoDB Connection String", pattern: /mongodb(?:\+srv)?:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
61
+ { name: "PostgreSQL Connection String", pattern: /postgres(?:ql)?:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
62
+ { name: "MySQL Connection String", pattern: /mysql:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
63
+ { name: "Redis Connection String", pattern: /redis:\/\/[^\s'"]*:[^\s'"]+@[^\s'"]+/, minLength: 15, allowsSpaces: false, highConfidence: true },
64
+ // URL-embedded passwords
65
+ { name: "Password in URL", pattern: /[a-zA-Z]{3,10}:\/\/[^/\s:@]{3,20}:[^/\s:@]{3,20}@[^\s'"]+/, minLength: 15, allowsSpaces: false, highConfidence: true },
66
+ // Generic assignments (lower confidence — checked against SAFE_PATTERNS)
67
+ { name: "Bearer Token", pattern: /[Bb]earer\s+[a-zA-Z0-9\-._~+/]+=*/, minLength: 15, allowsSpaces: false, highConfidence: false },
68
+ { name: "Basic Auth Header", pattern: /[Bb]asic\s+[a-zA-Z0-9+/]{20,}={0,2}/, minLength: 20, allowsSpaces: false, highConfidence: false },
69
+ { name: "API Key Assignment", pattern: /(?:api[_-]?key|apikey|api[_-]?secret)['"\s:=]+['"]?[a-zA-Z0-9\-._]{20,}['"]?/i, minLength: 20, allowsSpaces: false, highConfidence: false },
70
+ { name: "Secret Assignment", pattern: /(?:secret|token|password|passwd|pwd)['"\s:=]+['"]?[a-zA-Z0-9\-._!@#$%^&*]{8,}['"]?/i, minLength: 12, allowsSpaces: false, highConfidence: false },
71
+ ];
72
+
73
+ // ─── Safe Patterns (false-positive exclusion) ────────────────────────────
74
+
75
+ export const SAFE_PATTERNS: RegExp[] = [
76
+ /^https?:\/\/[a-zA-Z0-9.-]+(?:\/[a-zA-Z0-9.\/_\-?&=#%]*)?$/, // URLs without credentials
77
+ /^\.\.?\/[a-zA-Z0-9_\-./]+$/, // Relative file paths
78
+ /^\/[a-zA-Z0-9_\-./]+$/, // Absolute Unix paths
79
+ /^[a-zA-Z]:\\[a-zA-Z0-9_\-\\./]+$/, // Windows paths
80
+ /^[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}$/, // Email addresses
81
+ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/, // UUIDs
82
+ /^v?\d+\.\d+\.\d+(?:-[a-zA-Z0-9.]+)?(?:\+[a-zA-Z0-9.]+)?$/, // Semver
83
+ /^(?:xxx+|your[_-]?(?:api[_-]?)?key|placeholder|example|test|demo|sample)/i, // Placeholders
84
+ /^[0-9a-f]{40}$/i, // Git SHA-1
85
+ /^[0-9a-f]{64}$/i, // SHA-256
86
+ /^@[a-z0-9-]+\/[a-z0-9-]+$/, // npm scoped packages
87
+ ];
88
+
89
+ export function isSafeContent(content: string): boolean {
90
+ for (const pat of SAFE_PATTERNS) {
91
+ if (pat.test(content)) return true;
92
+ }
93
+ return false;
94
+ }
95
+
96
+ // ─── Config-file detection ───────────────────────────────────────────────
97
+
98
+ export function isConfigLikeFile(filePath?: string): boolean {
99
+ if (!filePath) return false;
100
+ const name = basename(filePath);
101
+ if (CONFIG_BASENAME_REGEX.test(name)) return true;
102
+ return CONFIG_FILE_EXTENSIONS.has(extname(name).toLowerCase());
103
+ }
104
+
105
+ const CONFIG_STRING_PATTERNS: RegExp[] = [
106
+ /(?<key>"[^"\r\n]+"|'[^'\r\n]+'|[A-Za-z0-9_.-]+)\s*[:=]\s*"(?<value>(?:\\.|[^"\\])*)"/g,
107
+ /(?<key>"[^"\r\n]+"|'[^'\r\n]+'|[A-Za-z0-9_.-]+)\s*[:=]\s*'(?<value>(?:\\.|[^'\\])*)'/g,
108
+ /(?<key>[A-Za-z0-9_.-]+)\s*=\s*(?<value>[^\r\n#;]+)/g,
109
+ ];
110
+
111
+ export function normalizeConfigKey(key: string): string {
112
+ return key
113
+ .trim()
114
+ .replace(/^['"]|['"]$/g, "")
115
+ .replace(/([A-Z]+)([A-Z][a-z])/g, "$1_$2")
116
+ .replace(/([a-z0-9])([A-Z])/g, "$1_$2")
117
+ .toLowerCase()
118
+ .replace(/[.\-\s]+/g, "_")
119
+ .replace(/_+/g, "_")
120
+ .replace(/^_+|_+$/g, "");
121
+ }
122
+
123
+ export function looksLikeSensitiveConfigValue(value: string): boolean {
124
+ const trimmed = value.trim();
125
+ if (!trimmed) return false;
126
+ if (PLACEHOLDER_VALUE_REGEX.test(trimmed)) return false;
127
+ if (isSafeContent(trimmed)) return false;
128
+ if (/^(?:true|false|null)$/i.test(trimmed)) return false;
129
+ if (/^[+-]?\d+(?:\.\d+)?$/.test(trimmed)) return false;
130
+ return trimmed.length >= CONFIG_VALUE_MIN_LENGTH;
131
+ }
132
+
133
+ export function extractConfigStringEntries(content: string): ConfigStringEntry[] {
134
+ const entries: ConfigStringEntry[] = [];
135
+ const seen = new Set<string>();
136
+
137
+ for (const pattern of CONFIG_STRING_PATTERNS) {
138
+ for (const match of content.matchAll(pattern)) {
139
+ const key = match.groups?.key;
140
+ const value = match.groups?.value;
141
+ if (!key || value === undefined || match.index === undefined) continue;
142
+ const full = match[0] ?? "";
143
+ const rel = full.indexOf(value);
144
+ if (rel < 0) continue;
145
+ const start = match.index + rel;
146
+ const end = start + value.length;
147
+ const dedupeKey = `${start}-${end}`;
148
+ if (seen.has(dedupeKey)) continue;
149
+ seen.add(dedupeKey);
150
+ entries.push({ key, normalizedKey: normalizeConfigKey(key), value, start, end });
151
+ }
152
+ }
153
+
154
+ return entries;
155
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Safety Detection — shared types and constants
3
+ */
4
+
5
+ // ─── Match types ──────────────────────────────────────────────────────────
6
+
7
+ export type SecretMatchSource = "pattern" | "regex" | "entropy";
8
+
9
+ export interface SecretMatch {
10
+ name: string;
11
+ start: number;
12
+ end: number;
13
+ original: string;
14
+ source: SecretMatchSource;
15
+ }
16
+
17
+ export interface SecretPattern {
18
+ name: string;
19
+ pattern: RegExp;
20
+ minLength: number;
21
+ allowsSpaces: boolean;
22
+ /** If true, skip safe-pattern exclusion (unambiguous prefix) */
23
+ highConfidence: boolean;
24
+ }
25
+
26
+ export interface DetectSecretsOptions {
27
+ filePath?: string;
28
+ }
29
+
30
+ // ─── Internal types ──────────────────────────────────────────────────────
31
+
32
+ export interface ConfigStringEntry {
33
+ key: string;
34
+ normalizedKey: string;
35
+ value: string;
36
+ start: number;
37
+ end: number;
38
+ }
39
+
40
+ // ─── Constants ────────────────────────────────────────────────────────────
41
+
42
+ export const MIN_SCAN_LENGTH = 10;
43
+ export const CONFIG_VALUE_MIN_LENGTH = 32;
44
+ export const CONFIG_FILE_EXTENSIONS = new Set([
45
+ ".json", ".jsonc", ".env", ".toml", ".yaml", ".yml",
46
+ ".ini", ".cfg", ".conf", ".properties",
47
+ ]);
48
+ export const CONFIG_BASENAME_REGEX = /^\.env(?:\..+)?$/i;
49
+ export const SENSITIVE_CONFIG_KEY_REGEX = /(?:^|_)(?:apikey|api_(?:key|secret|token)|access_(?:key|token)|refresh_token|client_secret|secret(?:_key)?|private_key|bearer_token|auth(?:orization|_token)?|pass(?:word|wd)?|pwd|token|webhook_secret)(?:_|$)/i;
50
+ export const PLACEHOLDER_VALUE_REGEX = /^(?:\$\{[^}]+\}|\{\{[^}]+\}\}|<[^>]+>|xxx+|placeholder|example|sample|demo|test|changeme|your[_-]?(?:api[_-]?)?key(?:[_-]?here)?)$/i;