decorated-pi 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -34
- package/extensions/file-times.ts +60 -2
- package/extensions/guidance.ts +5 -3
- package/extensions/index.ts +2 -0
- package/extensions/io.ts +210 -29
- package/extensions/lsp/client.ts +181 -428
- package/extensions/lsp/env.ts +45 -12
- package/extensions/lsp/format.ts +102 -237
- package/extensions/lsp/index.ts +8 -11
- package/extensions/lsp/manager.ts +249 -0
- package/extensions/lsp/prompt.ts +3 -42
- package/extensions/lsp/protocol.ts +219 -0
- package/extensions/lsp/servers.ts +80 -160
- package/extensions/lsp/tools.ts +160 -553
- package/extensions/lsp/types.ts +42 -0
- package/extensions/mcp/builtin.ts +126 -0
- package/extensions/mcp/client.ts +106 -0
- package/extensions/mcp/index.ts +123 -0
- package/extensions/patch.ts +291 -73
- package/extensions/providers/ark-coding.ts +2 -0
- package/extensions/safety/detect.ts +20 -744
- package/extensions/safety/entropy.ts +226 -0
- package/extensions/safety/index.ts +1 -93
- package/extensions/safety/patterns.ts +155 -0
- package/extensions/safety/types.ts +50 -0
- package/extensions/settings.ts +8 -0
- package/extensions/slash.ts +161 -7
- package/extensions/smart-at.ts +5 -5
- package/extensions/subdir-agents.ts +43 -13
- package/package.json +2 -3
- package/tsconfig.json +16 -0
- package/extensions/lsp/server-manager.ts +0 -309
- package/extensions/lsp/trust.ts +0 -45
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Safety Detection — Shannon entropy and adjusted entropy analysis
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { isSafeContent } from "./patterns.js";
|
|
6
|
+
|
|
7
|
+
// ─── Character classification ────────────────────────────────────────────
|
|
8
|
+
|
|
9
|
+
/** Character class: U=uppercase, L=lowercase, D=digit, S=dash, X=other */
|
|
10
|
+
export function charClass(c: string): "U" | "L" | "D" | "S" | "X" {
|
|
11
|
+
const code = c.charCodeAt(0);
|
|
12
|
+
if (code >= 65 && code <= 90) return "U";
|
|
13
|
+
if (code >= 97 && code <= 122) return "L";
|
|
14
|
+
if (code >= 48 && code <= 57) return "D";
|
|
15
|
+
if (c === "-") return "S";
|
|
16
|
+
return "X";
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// ─── Shannon entropy ──────────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
/** H(X) = -Σ p(x) · log₂(p(x)) */
|
|
22
|
+
export function shannonEntropy(data: string): number {
|
|
23
|
+
if (data.length === 0) return 0;
|
|
24
|
+
const freq = new Map<string, number>();
|
|
25
|
+
for (const char of data) freq.set(char, (freq.get(char) ?? 0) + 1);
|
|
26
|
+
let entropy = 0;
|
|
27
|
+
const len = data.length;
|
|
28
|
+
for (const count of freq.values()) {
|
|
29
|
+
const p = count / len;
|
|
30
|
+
entropy -= p * Math.log2(p);
|
|
31
|
+
}
|
|
32
|
+
return entropy;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ─── Trigram density ──────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* 3-character sliding window scoring.
|
|
39
|
+
* - Pure digits → 0
|
|
40
|
+
* - Letter↔Digit switch (digit in first position, e.g. 4Vi) → 1.0
|
|
41
|
+
* - Contains '-' with ≥3 distinct classes → 1.0
|
|
42
|
+
* - Case switch AbA pattern (≥2 uppercase + ≥1 lowercase) → 0.8
|
|
43
|
+
*/
|
|
44
|
+
export function trigramScore(c1: string, c2: string, c3: string): number {
|
|
45
|
+
const cls = [charClass(c1), charClass(c2), charClass(c3)];
|
|
46
|
+
if (cls.includes("X")) return 0;
|
|
47
|
+
const unique = new Set(cls);
|
|
48
|
+
if (unique.size === 1 && cls[0] === "D") return 0;
|
|
49
|
+
if (cls.includes("S") && unique.size >= 3) return 1.0;
|
|
50
|
+
const hasDigit = cls.includes("D");
|
|
51
|
+
const hasLetter = cls.includes("L") || cls.includes("U");
|
|
52
|
+
if (hasDigit && hasLetter && cls[0] === "D") return 1.0;
|
|
53
|
+
const uCount = cls.filter(c => c === "U").length;
|
|
54
|
+
const lCount = cls.filter(c => c === "L").length;
|
|
55
|
+
if (uCount >= 2 && lCount >= 1) return 0.8;
|
|
56
|
+
return 0;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Split a token by X-class characters into independent segments. */
|
|
60
|
+
export function splitByXClass(token: string): string[] {
|
|
61
|
+
const segments: string[] = [];
|
|
62
|
+
let current = "";
|
|
63
|
+
for (const c of token) {
|
|
64
|
+
if (charClass(c) === "X") {
|
|
65
|
+
if (current.length >= 3) segments.push(current);
|
|
66
|
+
current = "";
|
|
67
|
+
} else {
|
|
68
|
+
current += c;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
if (current.length >= 3) segments.push(current);
|
|
72
|
+
return segments;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** Average trigram density for a single segment. */
|
|
76
|
+
export function segmentDensity(segment: string): number {
|
|
77
|
+
if (segment.length < 3) return 0;
|
|
78
|
+
let totalScore = 0;
|
|
79
|
+
for (let i = 0; i <= segment.length - 3; i++) {
|
|
80
|
+
totalScore += trigramScore(segment[i]!, segment[i + 1]!, segment[i + 2]!);
|
|
81
|
+
}
|
|
82
|
+
return totalScore / (segment.length - 2);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/** Maximum segment density across all X-split segments. */
|
|
86
|
+
export function maxSegmentDensity(token: string): number {
|
|
87
|
+
const segments = splitByXClass(token);
|
|
88
|
+
if (segments.length === 0) return 0;
|
|
89
|
+
let maxD = 0;
|
|
90
|
+
for (const seg of segments) {
|
|
91
|
+
const d = segmentDensity(seg);
|
|
92
|
+
if (d > maxD) maxD = d;
|
|
93
|
+
}
|
|
94
|
+
return maxD;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ─── Word / dictionary / hex ratios ───────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Word ratio: fraction of token in vowel-containing alphabetic fragments
|
|
101
|
+
* ≥3 characters. Natural language words reduce secret likelihood.
|
|
102
|
+
*/
|
|
103
|
+
export function computeWordRatio(token: string): number {
|
|
104
|
+
const letterSeqs: string[] = [];
|
|
105
|
+
let current = "";
|
|
106
|
+
for (const c of token) {
|
|
107
|
+
const cls = charClass(c);
|
|
108
|
+
if (cls === "L" || cls === "U") {
|
|
109
|
+
current += c.toLowerCase();
|
|
110
|
+
} else {
|
|
111
|
+
if (current.length >= 3) letterSeqs.push(current);
|
|
112
|
+
current = "";
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (current.length >= 3) letterSeqs.push(current);
|
|
116
|
+
const vowels = /[aeiou]/;
|
|
117
|
+
const words = letterSeqs.filter(seq => vowels.test(seq));
|
|
118
|
+
return words.length > 0 ? words.reduce((sum, w) => sum + w.length, 0) / token.length : 0;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export function computeHexRatio(token: string): number {
|
|
122
|
+
let hexCount = 0;
|
|
123
|
+
const len = token.length;
|
|
124
|
+
if (len === 0) return 0;
|
|
125
|
+
for (const c of token) {
|
|
126
|
+
if (/[0-9a-fA-F-]/.test(c)) hexCount++;
|
|
127
|
+
}
|
|
128
|
+
return hexCount / len;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/** 2121 English + tech words for dictionary coverage check */
|
|
132
|
+
const DICT_WORDS: ReadonlySet<string> = new Set(
|
|
133
|
+
// prettier-ignore
|
|
134
|
+
JSON.parse(`["ability","able","about","above","abstract","abuse","academic","accept","acceptance","accepted","access","accessories","accommodation","according","account","accounting","accounts","across","action","actions","active","activities","activity","actual","actually","added","addition","additional","address","adm","admin","administration","administrative","adult","advance","advanced","adventure","advertise","advertisement","advertising","advice","aes","affairs","affiliate","affiliates","africa","african","after","again","against","agencies","agency","agent","agents","agree","agreement","airport","album","allow","allowed","allows","almost","alone","along","already","also","alternative","although","always","amateur","amazon","america","american","among","amount","analysis","angeles","animal","animals","announcements","annual","another","answer","answers","anti","anyone","anything","apartments","api","apparel","appear","apple","application","applications","applied","apply","approach","appropriate","approval","approved","approximately","april","architecture","archive","archives","area","areas","argument","arizona","army","around","article","articles","artist","artists","arts","asia","asian","asked","assessment","assistance","assistant","associated","associates","association","attack","attention","attorney","auction","auctions","audio","august","australia","australian","auth","author","authority","authors","auto","automatically","automotive","availability","available","avenue","average","avg","avoid","award","awards","away","baby","back","background","balance","ball","band","bank","base","baseball","based","basic","basis","basket","battery","beach","beautiful","beauty","became","because","become","been","before","began","begin","beginning","behind","being","believe","below","benefit","benefits","best","better","between","beyond","bible","bill","birth","black","block","blog","blogs","blood","blue","board","boards","body","book","books","born","boston","both","bottom","boys","branch","brand","brands","break","breakfast","breast","bridge","bring","british","brought","brown","browse","browser","btn","budget","buf","build","building","built","bush","business","businesses","button","buyer","buying","cable","calendar","california","call","called","calls","came","camera","cameras","camp","campaign","campus","canada","canadian","cancer","canon","capacity","capital","card","cards","care","career","careers","carolina","cars","cart","case","cases","cash","casino","catalog","categories","category","cause","cb","cell","cells","center","centers","central","centre","century","certain","certificate","certified","cfg","chain","chair","challenge","chance","change","changed","changes","channel","chapter","character","characters","charge","charges","charles","chart","chat","cheap","check","chemical","chicago","chief","child","children","china","chinese","choice","choose","chris","christian","christmas","church","cities","city","civil","claim","claims","class","classes","classic","classifieds","clean","clear","cli","click","client","clients","clinical","close","closed","clothing","club","clubs","cnet","cnt","coast","code","codes","coffee","col","cold","collection","college","color","colorado","columbia","column","come","comes","coming","command","comment","comments","commerce","commercial","commission","committee","common","communication","communications","communities","community","companies","company","compare","compared","comparison","competition","complete","completed","complex","compliance","component","components","comprehensive","computer","computers","computing","condition","conditions","conference","configuration","congress","connect","connection","consider","considered","construction","consumer","contact","contacts","contains","content","contents","context","continue","continued","contract","control","cool","copy","copyright","core","corner","corporate","corporation","correct","cost","costs","could","council","count","counter","countries","country","county","couple","course","courses","court","cover","coverage","covered","cpu","create","created","creating","creative","credit","creek","crime","critical","cross","crud","css","csv","cultural","culture","currency","current","currently","custom","customer","customers","daily","damage","dance","dark","data","database","date","dates","dating","david","days","db","dead","deal","deals","death","debt","december","decision","deep","default","defense","define","defined","definition","degree","delivery","demand","department","described","description","design","designated","designed","desktop","detail","detailed","details","determine","determined","dev","develop","developed","developer","developing","development","device","devices","diamond","dictionary","died","diet","difference","different","difficult","digital","dir","direct","directions","directly","director","directory","disclaimer","discount","discuss","discussion","disease","disp","display","distance","distribution","district","division","dlg","dns","doctor","document","documentation","documents","does","doing","dollar","dollars","domain","domestic","done","door","double","down","download","downloads","draft","drive","driver","driving","drop","drug","drugs","dst","during","dvds","each","early","earth","easily","east","eastern","easy","ebay","economic","economy","edge","edit","edition","editor","education","educational","effect","effective","effects","effort","efforts","either","election","electric","electronic","electronics","element","elements","else","email","emergency","emit","employee","employees","employment","enable","ending","energy","engine","engineering","england","english","enjoy","enough","ensure","enter","enterprise","entertainment","entire","entries","entry","env","environment","environmental","equal","equipment","err","error","errors","especially","essential","established","estate","europe","european","evaluation","even","event","events","ever","every","everyone","everything","evidence","evt","example","examples","excellent","except","exchange","executive","exercise","existing","expect","expected","experience","expert","express","ext","extended","extension","external","extra","eyes","face","facilities","facility","fact","factor","factors","facts","faculty","failure","fair","faith","fall","families","family","fantasy","farm","fashion","fast","father","favorite","feat","feature","featured","features","february","federal","feed","feedback","feel","fees","feet","female","fiction","field","fields","figure","file","files","fill","film","films","filter","final","finally","finance","financial","find","finding","fine","fire","firm","first","fish","fishing","fitness","five","fixed","fixme","flag","flash","flat","flight","floor","florida","flow","flowers","focus","follow","following","follows","font","food","foot","football","force","ford","foreign","forest","form","format","former","forms","forum","forums","forward","found","foundation","four","frame","france","francisco","free","freedom","french","fresh","friday","friend","friendly","friends","from","front","ftr","fuel","full","fully","function","functional","functions","fund","funding","funds","furniture","further","future","galleries","gallery","game","games","gamma","garden","gave","gear","general","generally","generated","generation","george","georgia","german","germany","gets","getting","gid","gift","gifts","girl","girls","git","give","given","gives","giving","glass","global","goal","goals","goes","going","gold","golden","golf","gone","good","goods","google","government","gpt","gpu","grade","graduate","grand","grant","graphics","great","greater","green","ground","group","groups","growing","growth","grp","guarantee","guest","gui","guide","guidelines","guides","guitar","guys","hack","hair","half","hall","hand","hands","happy","hard","hardware","have","having","hdr","head","headlines","health","hear","heard","hearing","heart","heat","heavy","held","help","helpful","here","high","higher","highest","highly","hill","himself","hire","historical","history","hits","hold","holiday","holidays","home","homepage","homes","hook","hope","horse","hospital","host","hosting","hotel","hotels","hour","hours","house","housing","houston","however","html","huge","human","icon","idea","ideas","identify","idx","illinois","image","images","img","immediately","impact","implementation","important","improve","improvement","inch","include","included","includes","including","income","increase","increased","independent","index","india","indian","individual","individuals","industrial","industry","info","information","informed","initial","input","inside","install","installation","instead","institute","institutions","instructions","instruments","insurance","int","integrated","intended","interactive","interest","interested","interesting","interests","interface","internal","international","internet","into","introduction","investment","involved","ipod","iraq","ireland","isbn","island","islands","israel","issue","issues","italian","italy","item","items","itself","jack","jackson","james","january","japan","japanese","java","jersey","jesus","jewelry","jobs","john","johnson","join","joined","joint","jones","journal","json","july","jump","june","just","justice","kansas","keep","key","keyword","keywords","kids","kind","kinds","king","kingdom","kitchen","know","knowledge","known","kong","label","labor","lake","lan","land","language","languages","large","larger","largest","last","late","later","latest","latin","laws","lead","leader","leaders","leadership","leading","league","learn","learning","least","leather","leave","left","legal","len","length","lesbian","less","letter","letters","level","levels","lib","library","license","life","light","like","likely","limit","limited","line","lines","link","links","linux","list","listed","listen","listing","listings","lists","literature","little","live","lives","living","llm","load","loan","loans","local","located","location","locations","login","logo","london","long","longer","look","looking","looks","lord","loss","lost","lots","louis","love","lower","lowest","lyrics","mac","machine","machines","made","magazine","magazines","magic","mail","mailing","main","maintenance","major","make","makes","making","male","manage","management","manager","manual","manufacturer","manufacturing","many","maps","march","marine","mark","market","marketing","markets","martin","mary","mass","master","match","matching","material","materials","matter","mature","max","maximum","maybe","mean","means","measures","media","medical","medicine","medium","meet","meeting","meetings","mega","member","members","membership","memory","mental","menu","merchant","message","messages","metal","method","methods","mexico","michael","michigan","micro","microsoft","middle","might","mike","miles","military","million","min","mind","mini","minimum","minister","minnesota","minute","minutes","miss","missing","mission","mobile","mock","mod","mode","model","models","modern","modified","module","moment","monday","money","monitor","monitoring","month","monthly","months","more","morning","mortgage","most","mother","motion","motor","motorola","mount","mountain","move","moved","movement","movie","movies","moving","msg","much","multi","multimedia","multiple","museum","music","musical","must","myself","naked","name","names","nano","nation","national","native","natural","nature","nav","navigation","near","necessary","need","needed","needs","net","network","networking","networks","never","news","newsletter","next","nice","night","nlp","nokia","none","normal","north","northern","note","notes","nothing","notice","november","npm","num","number","numbers","nursing","oauth","object","october","offer","offered","offering","offers","office","officer","official","often","ohio","older","once","ones","online","only","ontario","open","opening","operating","operation","operations","opinion","opportunities","opportunity","ops","option","optional","options","oral","orange","order","orders","oregon","organization","organizations","original","orm","oss","other","others","otherwise","outdoor","output","outside","over","overall","overview","owned","owner","owners","pacific","pack","package","packages","page","pages","paid","pain","palm","panel","paper","paperback","papers","parent","parents","paris","park","parking","part","particular","particularly","parties","partner","partners","parts","party","pass","password","past","patch","path","patient","patients","paul","payment","paypal","peace","pennsylvania","people","percent","perfect","performance","perhaps","period","perm","permission","person","personal","persons","peter","phase","phentermine","phone","phones","photo","photography","photos","physical","pick","pics","picture","pictures","pid","piece","pink","pip","pipe","pkg","place","placed","places","plan","planning","plans","plant","plants","plastic","platform","play","played","player","players","playing","please","plus","pocket","point","points","poker","pol","police","policies","policy","political","politics","pool","poor","pop","popular","population","port","pos","position","positive","possible","post","posted","poster","posters","posts","potential","power","powered","practice","practices","premium","present","presentation","presented","president","press","pressure","pretty","prev","prevent","previous","price","prices","pricing","primary","prime","print","printer","printing","prior","privacy","private","pro","probably","problem","problems","procedure","procedures","process","processes","processing","prod","produce","produced","product","production","products","professional","professor","profile","profit","program","programme","programming","programs","progress","project","projects","properties","property","proposed","protect","protection","protein","provide","provided","provider","providers","provides","providing","ptr","public","publication","publications","published","publisher","publishing","purchase","purpose","purposes","quality","quantity","quarter","question","questions","quick","quickly","quite","quote","quotes","race","racing","radio","ram","random","range","rank","rate","rated","rates","rather","rating","ratings","reach","read","reader","readers","reading","ready","real","really","reason","reasons","receive","received","recent","recently","recipes","recommend","recommendations","recommended","record","records","recovery","reduce","ref","reference","references","regarding","region","regional","register","registered","registration","regular","regulations","related","relations","relationship","release","released","releases","relevant","religion","religious","remember","remote","remove","rent","rental","rentals","repair","replies","reply","report","reported","reporting","reports","republic","req","request","requests","require","required","requirements","requires","res","research","reserve","reserved","resolution","resort","resource","resources","respect","respective","response","responsibility","responsible","rest","restaurant","restaurants","result","results","retail","return","returns","rev","review","reviews","rich","richard","right","rights","ring","ringtones","risk","river","road","robert","rock","rol","role","room","rooms","root","rose","round","row","royal","rsa","rule","rules","running","russia","russian","safe","safety","said","saint","sale","sales","same","sample","samsung","santa","satellite","saturday","save","saying","says","scale","schedule","school","schools","science","sciences","scientific","score","scott","screen","sdk","search","searches","season","seattle","second","seconds","secretary","section","sections","sector","secure","security","seem","seems","seen","select","selected","selection","self","sell","seller","sellers","selling","send","senior","sense","sent","separate","september","sequence","series","serious","serve","server","servers","service","services","session","sets","setting","settings","seven","several","sha","shall","share","sheet","ship","shipping","ships","shirt","shirts","shoes","shop","shopping","shops","short","shot","should","show","showing","shown","shows","sid","side","sign","signed","significant","silver","similar","simple","simply","since","single","site","sitemap","sites","situation","size","skills","skin","skip","small","smart","smith","snow","social","society","soft","software","sold","solid","solution","solutions","some","someone","something","sometimes","song","songs","sony","soon","sorry","sort","sorted","sound","source","sources","south","southern","space","spain","spanish","special","species","specific","specified","speed","spirit","sponsored","sport","sports","spring","sql","square","src","sre","ssd","ssh","ssl","staff","stage","stand","standard","standards","star","stars","start","started","starting","state","statement","statements","states","station","statistics","status","stay","steel","step","steps","steve","still","stock","stone","stop","storage","store","stores","stories","story","str","strategies","strategy","stream","street","string","strong","structure","stub","student","students","studies","studio","study","stuff","style","subject","subjects","submit","submitted","subs","subscribe","success","successful","such","suggest","suite","sum","summary","summer","sunday","super","supplies","supply","support","supported","sure","surface","surgery","survey","switch","system","systems","tab","table","tables","tag","tags","take","taken","takes","taking","talk","talking","target","task","tcp","teacher","teachers","teaching","team","tech","technical","techniques","technologies","technology","teen","teens","telephone","television","tell","temp","temperature","term","terms","test","testing","tests","texas","text","than","thank","thanks","that","their","them","theme","themselves","then","theory","therapy","there","therefore","these","they","thing","things","think","thinking","third","this","thomas","those","though","thought","thoughts","thousands","thread","three","through","throughout","thursday","thus","tickets","tid","time","times","tip","tips","title","titles","tls","tmp","today","todo","together","told","took","tool","tools","topic","topics","total","touch","tour","tours","towards","town","toys","track","trade","trademarks","trading","traditional","traffic","training","transfer","transport","transportation","travel","treatment","tree","trial","trip","true","trust","truth","trying","tuesday","turn","type","types","udp","uid","under","understand","understanding","union","unique","unit","united","units","universal","university","unknown","unless","until","update","updated","updates","upgrade","upon","upper","urban","url","used","useful","user","username","users","uses","using","usr","usually","vacation","val","valid","valley","value","values","variable","variety","various","vegas","vehicle","vehicles","ver","version","very","video","videos","view","viewed","views","village","virginia","virtual","virus","vision","visit","visitors","visual","voice","volume","vote","vpn","wait","walk","wall","wan","want","wanted","warning","washington","waste","watch","watches","water","ways","weather","website","websites","wedding","wednesday","week","weekend","weekly","weeks","weight","welcome","well","went","were","west","western","what","when","where","whether","which","while","white","whole","wholesale","whose","wide","wife","wild","will","william","williams","wind","window","windows","wine","winter","wireless","wish","with","within","without","woman","women","wood","word","words","work","worked","workers","working","works","workshop","world","worldwide","worth","would","write","writing","written","wrong","wrote","xbox","xml","yahoo","yaml","year","years","yellow","yesterday","york","young","your","yourself","youth","zealand","zone"]`)
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Dict ratio: fraction covered by dictionary words.
|
|
139
|
+
* High dict ratio → likely English text / identifier, not a secret.
|
|
140
|
+
*/
|
|
141
|
+
export function computeDictRatio(token: string): number {
|
|
142
|
+
// Extract alphabetic sequences (>= 3 chars), case-insensitive
|
|
143
|
+
const lowerSeqs: string[] = [];
|
|
144
|
+
let current = "";
|
|
145
|
+
for (const c of token) {
|
|
146
|
+
const cls = charClass(c);
|
|
147
|
+
if (cls === "L" || cls === "U") {
|
|
148
|
+
current += c.toLowerCase();
|
|
149
|
+
} else {
|
|
150
|
+
if (current.length >= 3) lowerSeqs.push(current);
|
|
151
|
+
current = "";
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (current.length >= 3) lowerSeqs.push(current);
|
|
155
|
+
|
|
156
|
+
if (lowerSeqs.length === 0) return 0;
|
|
157
|
+
|
|
158
|
+
// Greedy match: find longest word at each position, then skip past it
|
|
159
|
+
let matchedChars = 0;
|
|
160
|
+
for (const seq of lowerSeqs) {
|
|
161
|
+
let pos = 0;
|
|
162
|
+
while (pos < seq.length) {
|
|
163
|
+
let longestMatch = 0;
|
|
164
|
+
for (let end = seq.length; end > pos; end--) {
|
|
165
|
+
if (DICT_WORDS.has(seq.slice(pos, end))) {
|
|
166
|
+
longestMatch = end - pos;
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
if (longestMatch > 0) {
|
|
171
|
+
matchedChars += longestMatch;
|
|
172
|
+
pos += longestMatch;
|
|
173
|
+
} else {
|
|
174
|
+
pos++;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return token.length > 0 ? matchedChars / token.length : 0;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ─── Adjusted entropy ─────────────────────────────────────────────────────
|
|
183
|
+
|
|
184
|
+
export const ENTROPY_THRESHOLD = 5.5;
|
|
185
|
+
export const MIN_ENTROPY_TOKEN_LENGTH = 32;
|
|
186
|
+
const W1_DENSITY = 3.0;
|
|
187
|
+
const W2_WORD = 3.0;
|
|
188
|
+
const W3_DICT = 4.0;
|
|
189
|
+
const HEX_PENALTY = 2.5;
|
|
190
|
+
const HEX_RATIO_THRESHOLD = 0.9;
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Adjusted entropy:
|
|
194
|
+
* adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
|
|
195
|
+
*
|
|
196
|
+
* Hex penalty only applies for hyphenated UUID-like tokens
|
|
197
|
+
* (>90% hex AND contains '-').
|
|
198
|
+
*/
|
|
199
|
+
export function calculateAdjustedEntropy(data: string): number {
|
|
200
|
+
const base = shannonEntropy(data);
|
|
201
|
+
const density = maxSegmentDensity(data);
|
|
202
|
+
const wordRatio = computeWordRatio(data);
|
|
203
|
+
const dictRatio = computeDictRatio(data);
|
|
204
|
+
const hexRatio = computeHexRatio(data);
|
|
205
|
+
|
|
206
|
+
const densityBoost = density * W1_DENSITY;
|
|
207
|
+
const wordPenalty = wordRatio * W2_WORD;
|
|
208
|
+
const dictPenalty = dictRatio * W3_DICT;
|
|
209
|
+
const hp = (hexRatio > HEX_RATIO_THRESHOLD && data.includes("-")) ? HEX_PENALTY : 0;
|
|
210
|
+
return base + densityBoost - wordPenalty - dictPenalty - hp;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
export function isHighEntropy(data: string): boolean {
|
|
214
|
+
if (data.length < MIN_ENTROPY_TOKEN_LENGTH) return false;
|
|
215
|
+
if (isSafeContent(data)) return false;
|
|
216
|
+
return calculateAdjustedEntropy(data) > ENTROPY_THRESHOLD;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Split by whitespace — the most conservative tokenization.
|
|
221
|
+
* Preserves JSON structure, URLs, and connection strings.
|
|
222
|
+
*/
|
|
223
|
+
export function findHighEntropyTokens(content: string): string[] {
|
|
224
|
+
const tokens = content.split(/[\s\[\]{}"',\/\\|()&#@!<>?]+/);
|
|
225
|
+
return tokens.filter(t => t.length >= MIN_ENTROPY_TOKEN_LENGTH && isHighEntropy(t));
|
|
226
|
+
}
|
|
@@ -1,11 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Safety — Pi 集成层
|
|
3
3
|
*
|
|
4
|
-
* -
|
|
5
|
-
* - Redirect Guard: bash 覆盖写入提示确认
|
|
6
|
-
* - Protected Paths: write/edit/patch/read 保护路径提示确认
|
|
7
|
-
* - Write Guard: 覆盖非空文件禁止 write (提示使用 patch)
|
|
8
|
-
* - Secret Redact: API Key / Token 自动掩码
|
|
4
|
+
* - Secret Redact: API Key / Token 自动掩码
|
|
9
5
|
*/
|
|
10
6
|
|
|
11
7
|
import type {
|
|
@@ -13,12 +9,7 @@ import type {
|
|
|
13
9
|
ExtensionContext,
|
|
14
10
|
ToolResultEvent,
|
|
15
11
|
} from "@earendil-works/pi-coding-agent";
|
|
16
|
-
import * as fs from "node:fs";
|
|
17
|
-
import { resolve } from "node:path";
|
|
18
12
|
import {
|
|
19
|
-
checkProtectedPath,
|
|
20
|
-
collectBashDangers,
|
|
21
|
-
formatBashDangers,
|
|
22
13
|
detectSecrets,
|
|
23
14
|
maskSecret,
|
|
24
15
|
} from "./detect.js";
|
|
@@ -48,89 +39,6 @@ function formatRedactionContext(event: ToolResultEvent): string {
|
|
|
48
39
|
// ─── Setup ──────────────────────────────────────────────────────────────────
|
|
49
40
|
|
|
50
41
|
export function setupSafety(pi: ExtensionAPI) {
|
|
51
|
-
// ── Command Guard + Protected Paths + Write Guard (tool_call) ─────────
|
|
52
|
-
|
|
53
|
-
pi.on("tool_call", async (event, ctx) => {
|
|
54
|
-
|
|
55
|
-
// Gate 1: 危险命令 + 覆盖写入 + 读取保护路径
|
|
56
|
-
if (event.toolName === "bash") {
|
|
57
|
-
const command = (event.input as { command?: string }).command;
|
|
58
|
-
if (command) {
|
|
59
|
-
const dangers = collectBashDangers(command, ctx.cwd);
|
|
60
|
-
if (dangers.length > 0) {
|
|
61
|
-
const message = formatBashDangers(dangers)!;
|
|
62
|
-
if (!ctx.hasUI) {
|
|
63
|
-
return { block: true, reason: `⚠ ${message} (non-interactive)` };
|
|
64
|
-
}
|
|
65
|
-
const choice = await ctx.ui.select(
|
|
66
|
-
`⚠️ ${message}\n\nAllow execution?`,
|
|
67
|
-
["Block", "Allow once"],
|
|
68
|
-
);
|
|
69
|
-
if (!choice || choice === "Block") {
|
|
70
|
-
return { block: true, reason: `⚠ ${message}` };
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// Gate 2: write/edit/patch 写入保护路径
|
|
77
|
-
if (event.toolName === "write" || event.toolName === "edit" || event.toolName === "patch") {
|
|
78
|
-
// For write/edit, path is a single field; for patch, check all patches[].path
|
|
79
|
-
const filePaths: string[] = event.toolName === "patch"
|
|
80
|
-
? (event.input as any).patches?.filter((p: any) => p?.path).map((p: any) => p.path) ?? []
|
|
81
|
-
: [(event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path].filter(Boolean);
|
|
82
|
-
for (const filePath of filePaths) {
|
|
83
|
-
const danger = checkProtectedPath(filePath);
|
|
84
|
-
if (danger) {
|
|
85
|
-
if (!ctx.hasUI) {
|
|
86
|
-
return { block: true, reason: `🔒 ${danger}\nmay contain sensitive information` };
|
|
87
|
-
}
|
|
88
|
-
const choice = await ctx.ui.select(
|
|
89
|
-
`🔒 ${danger}\nmay contain sensitive information\n\nProceed?`,
|
|
90
|
-
["Block", "Allow once"],
|
|
91
|
-
);
|
|
92
|
-
if (!choice || choice === "Block") {
|
|
93
|
-
return { block: true, reason: `🔒 ${danger}\nmay contain sensitive information` };
|
|
94
|
-
}
|
|
95
|
-
break; // User approved — skip remaining paths
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
// Gate 3: 写保护(已有内容的文件禁止 write,直接返回信息给 agent)
|
|
101
|
-
if (event.toolName === "write") {
|
|
102
|
-
const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
|
|
103
|
-
if (filePath) {
|
|
104
|
-
try {
|
|
105
|
-
const abs = resolve(ctx.cwd, filePath);
|
|
106
|
-
if (fs.existsSync(abs) && fs.readFileSync(abs, "utf8").length > 0) {
|
|
107
|
-
return { block: true, reason: "Overwriting a non-empty file is dangerous, use the patch tool instead!" };
|
|
108
|
-
}
|
|
109
|
-
} catch { /* file doesn't exist */ }
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
// Gate 4: read 工具读取保护路径(bash 读取已在 Gate 1 处理)
|
|
114
|
-
if (event.toolName === "read") {
|
|
115
|
-
const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
|
|
116
|
-
if (filePath) {
|
|
117
|
-
const danger = checkProtectedPath(filePath);
|
|
118
|
-
if (danger) {
|
|
119
|
-
if (!ctx.hasUI) {
|
|
120
|
-
return { block: true, reason: `🔒 Reading protected file: ${danger}\nmay contain sensitive information` };
|
|
121
|
-
}
|
|
122
|
-
const choice = await ctx.ui.select(
|
|
123
|
-
`🔒 Reading protected file: ${danger}\nmay contain sensitive information\n\nProceed?`,
|
|
124
|
-
["Block", "Allow once"],
|
|
125
|
-
);
|
|
126
|
-
if (!choice || choice === "Block") {
|
|
127
|
-
return { block: true, reason: `🔒 Reading protected file: ${danger}\nmay contain sensitive information` };
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
});
|
|
133
|
-
|
|
134
42
|
// ── Secret Redact (tool_result) ────────────────────────────────────────
|
|
135
43
|
|
|
136
44
|
const handleToolResult = async (
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Safety Detection — known secret patterns and safe-pattern exclusions
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { basename, extname } from "node:path";
|
|
6
|
+
import {
|
|
7
|
+
type SecretPattern,
|
|
8
|
+
type ConfigStringEntry,
|
|
9
|
+
CONFIG_FILE_EXTENSIONS,
|
|
10
|
+
CONFIG_BASENAME_REGEX,
|
|
11
|
+
SENSITIVE_CONFIG_KEY_REGEX,
|
|
12
|
+
PLACEHOLDER_VALUE_REGEX,
|
|
13
|
+
CONFIG_VALUE_MIN_LENGTH,
|
|
14
|
+
} from "./types.js";
|
|
15
|
+
|
|
16
|
+
// ─── High-confidence Secret Patterns (40+ known formats) ─────────────────
|
|
17
|
+
|
|
18
|
+
export const SECRET_PATTERNS: SecretPattern[] = [
|
|
19
|
+
// AWS
|
|
20
|
+
{ name: "AWS Access Key ID", pattern: /AKIA[0-9A-Z]{16}/, minLength: 16, allowsSpaces: false, highConfidence: true },
|
|
21
|
+
{ name: "AWS Secret Access Key", pattern: /(?:aws)?_?(?:secret)?_?(?:access)?_?key['"\s:=]+['"]?[0-9a-zA-Z/+]{40}['"]?/i, minLength: 30, allowsSpaces: false, highConfidence: true },
|
|
22
|
+
// GitHub
|
|
23
|
+
{ name: "GitHub OAuth Token", pattern: /gho_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
24
|
+
{ name: "GitHub App Token", pattern: /(?:ghu|ghs)_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
25
|
+
{ name: "GitHub PAT", pattern: /ghp_[0-9a-zA-Z]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
26
|
+
{ name: "GitHub Fine-Grained Token", pattern: /github_pat_[0-9a-zA-Z_]{22,}/, minLength: 26, allowsSpaces: false, highConfidence: true },
|
|
27
|
+
// GitLab
|
|
28
|
+
{ name: "GitLab PAT", pattern: /glpat-[0-9a-zA-Z\-_]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
29
|
+
{ name: "GitLab Runner Token", pattern: /glrt-[0-9a-zA-Z_\-]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
30
|
+
// Slack
|
|
31
|
+
{ name: "Slack Token", pattern: /xox[baprs]-[0-9a-zA-Z\-]{10,48}/, minLength: 15, allowsSpaces: false, highConfidence: true },
|
|
32
|
+
{ name: "Slack Webhook URL", pattern: /https:\/\/hooks\.slack\.com\/services\/T[a-zA-Z0-9_]{8,}\/B[a-zA-Z0-9_]{8,}\/[a-zA-Z0-9_]{24}/, minLength: 60, allowsSpaces: false, highConfidence: true },
|
|
33
|
+
// JWT
|
|
34
|
+
{ name: "JSON Web Token", pattern: /eyJ[a-zA-Z0-9_-]{10,}\.eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
35
|
+
// Google
|
|
36
|
+
{ name: "Google API Key", pattern: /AIza[0-9A-Za-z\-_]{35}/, minLength: 35, allowsSpaces: false, highConfidence: true },
|
|
37
|
+
{ name: "Google OAuth Token", pattern: /ya29\.[0-9A-Za-z\-_]+/, minLength: 10, allowsSpaces: false, highConfidence: true },
|
|
38
|
+
// Stripe
|
|
39
|
+
{ name: "Stripe Secret Key", pattern: /sk_live_[0-9a-zA-Z]{24,}/, minLength: 24, allowsSpaces: false, highConfidence: true },
|
|
40
|
+
{ name: "Stripe Restricted Key", pattern: /rk_live_[0-9a-zA-Z]{24,}/, minLength: 24, allowsSpaces: false, highConfidence: true },
|
|
41
|
+
// Twilio / SendGrid / Discord
|
|
42
|
+
{ name: "Twilio API Key", pattern: /SK[a-z0-9]{32}/, minLength: 30, allowsSpaces: false, highConfidence: true },
|
|
43
|
+
{ name: "SendGrid API Key", pattern: /SG\.[a-zA-Z0-9_-]{22,}\.[a-zA-Z0-9_-]{40,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
44
|
+
{ name: "Discord Bot Token", pattern: /[MN][A-Za-z\d]{23,}\.[\w-]{6}\.[\w-]{27,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
45
|
+
// OpenAI / Anthropic / Volcengine Ark
|
|
46
|
+
{ name: "OpenAI API Key", pattern: /sk-[a-zA-Z0-9]{20,}T3BlbkFJ[a-zA-Z0-9]{20,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
47
|
+
{ name: "OpenAI API Key (New)", pattern: /sk-(?:proj-)?[a-zA-Z0-9\-_]{40,}/, minLength: 40, allowsSpaces: false, highConfidence: true },
|
|
48
|
+
{ name: "Anthropic API Key", pattern: /sk-ant-api[0-9]{2}-[a-zA-Z0-9\-_]{80,}/, minLength: 80, allowsSpaces: false, highConfidence: true },
|
|
49
|
+
{ name: "Volcengine Ark API Key", pattern: /ark-[a-zA-Z0-9\-_]{20,}/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
50
|
+
// NPM / PyPI
|
|
51
|
+
{ name: "NPM Token", pattern: /npm_[a-zA-Z0-9]{36}/, minLength: 36, allowsSpaces: false, highConfidence: true },
|
|
52
|
+
{ name: "PyPI Token", pattern: /pypi-[a-zA-Z0-9_\-]{50,}/, minLength: 50, allowsSpaces: false, highConfidence: true },
|
|
53
|
+
// Private Keys
|
|
54
|
+
{ name: "RSA Private Key", pattern: /-----BEGIN RSA PRIVATE KEY-----\r?\n(?:[A-Za-z0-9+/=]+\r?\n)+-----END RSA PRIVATE KEY-----/, minLength: 40, allowsSpaces: true, highConfidence: true },
|
|
55
|
+
{ name: "OpenSSH Private Key", pattern: /-----BEGIN OPENSSH PRIVATE KEY-----\r?\n(?:[A-Za-z0-9+/=]+\r?\n)+-----END OPENSSH PRIVATE KEY-----/, minLength: 40, allowsSpaces: true, highConfidence: true },
|
|
56
|
+
{ name: "EC Private Key", pattern: /-----BEGIN EC PRIVATE KEY-----\r?\n(?:[A-Za-z0-9+/=]+\r?\n)+-----END EC PRIVATE KEY-----/, minLength: 40, allowsSpaces: true, highConfidence: true },
|
|
57
|
+
{ name: "PGP Private Key", pattern: /-----BEGIN PGP PRIVATE KEY BLOCK-----\r?\n(?:[A-Za-z0-9+/=]+\r?\n)+-----END PGP PRIVATE KEY BLOCK-----/, minLength: 40, allowsSpaces: true, highConfidence: true },
|
|
58
|
+
{ name: "Generic Private Key", pattern: /-----BEGIN (ENCRYPTED )?PRIVATE KEY-----\r?\n(?:[A-Za-z0-9+/=]+\r?\n)+-----END \1PRIVATE KEY-----/, minLength: 40, allowsSpaces: true, highConfidence: true },
|
|
59
|
+
// Database URIs
|
|
60
|
+
{ name: "MongoDB Connection String", pattern: /mongodb(?:\+srv)?:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
61
|
+
{ name: "PostgreSQL Connection String", pattern: /postgres(?:ql)?:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
62
|
+
{ name: "MySQL Connection String", pattern: /mysql:\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/, minLength: 20, allowsSpaces: false, highConfidence: true },
|
|
63
|
+
{ name: "Redis Connection String", pattern: /redis:\/\/[^\s'"]*:[^\s'"]+@[^\s'"]+/, minLength: 15, allowsSpaces: false, highConfidence: true },
|
|
64
|
+
// URL-embedded passwords
|
|
65
|
+
{ name: "Password in URL", pattern: /[a-zA-Z]{3,10}:\/\/[^/\s:@]{3,20}:[^/\s:@]{3,20}@[^\s'"]+/, minLength: 15, allowsSpaces: false, highConfidence: true },
|
|
66
|
+
// Generic assignments (lower confidence — checked against SAFE_PATTERNS)
|
|
67
|
+
{ name: "Bearer Token", pattern: /[Bb]earer\s+[a-zA-Z0-9\-._~+/]+=*/, minLength: 15, allowsSpaces: false, highConfidence: false },
|
|
68
|
+
{ name: "Basic Auth Header", pattern: /[Bb]asic\s+[a-zA-Z0-9+/]{20,}={0,2}/, minLength: 20, allowsSpaces: false, highConfidence: false },
|
|
69
|
+
{ name: "API Key Assignment", pattern: /(?:api[_-]?key|apikey|api[_-]?secret)['"\s:=]+['"]?[a-zA-Z0-9\-._]{20,}['"]?/i, minLength: 20, allowsSpaces: false, highConfidence: false },
|
|
70
|
+
{ name: "Secret Assignment", pattern: /(?:secret|token|password|passwd|pwd)['"\s:=]+['"]?[a-zA-Z0-9\-._!@#$%^&*]{8,}['"]?/i, minLength: 12, allowsSpaces: false, highConfidence: false },
|
|
71
|
+
];
|
|
72
|
+
|
|
73
|
+
// ─── Safe Patterns (false-positive exclusion) ────────────────────────────
|
|
74
|
+
|
|
75
|
+
export const SAFE_PATTERNS: RegExp[] = [
|
|
76
|
+
/^https?:\/\/[a-zA-Z0-9.-]+(?:\/[a-zA-Z0-9.\/_\-?&=#%]*)?$/, // URLs without credentials
|
|
77
|
+
/^\.\.?\/[a-zA-Z0-9_\-./]+$/, // Relative file paths
|
|
78
|
+
/^\/[a-zA-Z0-9_\-./]+$/, // Absolute Unix paths
|
|
79
|
+
/^[a-zA-Z]:\\[a-zA-Z0-9_\-\\./]+$/, // Windows paths
|
|
80
|
+
/^[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}$/, // Email addresses
|
|
81
|
+
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/, // UUIDs
|
|
82
|
+
/^v?\d+\.\d+\.\d+(?:-[a-zA-Z0-9.]+)?(?:\+[a-zA-Z0-9.]+)?$/, // Semver
|
|
83
|
+
/^(?:xxx+|your[_-]?(?:api[_-]?)?key|placeholder|example|test|demo|sample)/i, // Placeholders
|
|
84
|
+
/^[0-9a-f]{40}$/i, // Git SHA-1
|
|
85
|
+
/^[0-9a-f]{64}$/i, // SHA-256
|
|
86
|
+
/^@[a-z0-9-]+\/[a-z0-9-]+$/, // npm scoped packages
|
|
87
|
+
];
|
|
88
|
+
|
|
89
|
+
export function isSafeContent(content: string): boolean {
|
|
90
|
+
for (const pat of SAFE_PATTERNS) {
|
|
91
|
+
if (pat.test(content)) return true;
|
|
92
|
+
}
|
|
93
|
+
return false;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// ─── Config-file detection ───────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
export function isConfigLikeFile(filePath?: string): boolean {
|
|
99
|
+
if (!filePath) return false;
|
|
100
|
+
const name = basename(filePath);
|
|
101
|
+
if (CONFIG_BASENAME_REGEX.test(name)) return true;
|
|
102
|
+
return CONFIG_FILE_EXTENSIONS.has(extname(name).toLowerCase());
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const CONFIG_STRING_PATTERNS: RegExp[] = [
|
|
106
|
+
/(?<key>"[^"\r\n]+"|'[^'\r\n]+'|[A-Za-z0-9_.-]+)\s*[:=]\s*"(?<value>(?:\\.|[^"\\])*)"/g,
|
|
107
|
+
/(?<key>"[^"\r\n]+"|'[^'\r\n]+'|[A-Za-z0-9_.-]+)\s*[:=]\s*'(?<value>(?:\\.|[^'\\])*)'/g,
|
|
108
|
+
/(?<key>[A-Za-z0-9_.-]+)\s*=\s*(?<value>[^\r\n#;]+)/g,
|
|
109
|
+
];
|
|
110
|
+
|
|
111
|
+
export function normalizeConfigKey(key: string): string {
|
|
112
|
+
return key
|
|
113
|
+
.trim()
|
|
114
|
+
.replace(/^['"]|['"]$/g, "")
|
|
115
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, "$1_$2")
|
|
116
|
+
.replace(/([a-z0-9])([A-Z])/g, "$1_$2")
|
|
117
|
+
.toLowerCase()
|
|
118
|
+
.replace(/[.\-\s]+/g, "_")
|
|
119
|
+
.replace(/_+/g, "_")
|
|
120
|
+
.replace(/^_+|_+$/g, "");
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export function looksLikeSensitiveConfigValue(value: string): boolean {
|
|
124
|
+
const trimmed = value.trim();
|
|
125
|
+
if (!trimmed) return false;
|
|
126
|
+
if (PLACEHOLDER_VALUE_REGEX.test(trimmed)) return false;
|
|
127
|
+
if (isSafeContent(trimmed)) return false;
|
|
128
|
+
if (/^(?:true|false|null)$/i.test(trimmed)) return false;
|
|
129
|
+
if (/^[+-]?\d+(?:\.\d+)?$/.test(trimmed)) return false;
|
|
130
|
+
return trimmed.length >= CONFIG_VALUE_MIN_LENGTH;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
export function extractConfigStringEntries(content: string): ConfigStringEntry[] {
|
|
134
|
+
const entries: ConfigStringEntry[] = [];
|
|
135
|
+
const seen = new Set<string>();
|
|
136
|
+
|
|
137
|
+
for (const pattern of CONFIG_STRING_PATTERNS) {
|
|
138
|
+
for (const match of content.matchAll(pattern)) {
|
|
139
|
+
const key = match.groups?.key;
|
|
140
|
+
const value = match.groups?.value;
|
|
141
|
+
if (!key || value === undefined || match.index === undefined) continue;
|
|
142
|
+
const full = match[0] ?? "";
|
|
143
|
+
const rel = full.indexOf(value);
|
|
144
|
+
if (rel < 0) continue;
|
|
145
|
+
const start = match.index + rel;
|
|
146
|
+
const end = start + value.length;
|
|
147
|
+
const dedupeKey = `${start}-${end}`;
|
|
148
|
+
if (seen.has(dedupeKey)) continue;
|
|
149
|
+
seen.add(dedupeKey);
|
|
150
|
+
entries.push({ key, normalizedKey: normalizeConfigKey(key), value, start, end });
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return entries;
|
|
155
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Safety Detection — shared types and constants
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
// ─── Match types ──────────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
export type SecretMatchSource = "pattern" | "regex" | "entropy";
|
|
8
|
+
|
|
9
|
+
export interface SecretMatch {
|
|
10
|
+
name: string;
|
|
11
|
+
start: number;
|
|
12
|
+
end: number;
|
|
13
|
+
original: string;
|
|
14
|
+
source: SecretMatchSource;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface SecretPattern {
|
|
18
|
+
name: string;
|
|
19
|
+
pattern: RegExp;
|
|
20
|
+
minLength: number;
|
|
21
|
+
allowsSpaces: boolean;
|
|
22
|
+
/** If true, skip safe-pattern exclusion (unambiguous prefix) */
|
|
23
|
+
highConfidence: boolean;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface DetectSecretsOptions {
|
|
27
|
+
filePath?: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// ─── Internal types ──────────────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
export interface ConfigStringEntry {
|
|
33
|
+
key: string;
|
|
34
|
+
normalizedKey: string;
|
|
35
|
+
value: string;
|
|
36
|
+
start: number;
|
|
37
|
+
end: number;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// ─── Constants ────────────────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
export const MIN_SCAN_LENGTH = 10;
|
|
43
|
+
export const CONFIG_VALUE_MIN_LENGTH = 32;
|
|
44
|
+
export const CONFIG_FILE_EXTENSIONS = new Set([
|
|
45
|
+
".json", ".jsonc", ".env", ".toml", ".yaml", ".yml",
|
|
46
|
+
".ini", ".cfg", ".conf", ".properties",
|
|
47
|
+
]);
|
|
48
|
+
export const CONFIG_BASENAME_REGEX = /^\.env(?:\..+)?$/i;
|
|
49
|
+
export const SENSITIVE_CONFIG_KEY_REGEX = /(?:^|_)(?:apikey|api_(?:key|secret|token)|access_(?:key|token)|refresh_token|client_secret|secret(?:_key)?|private_key|bearer_token|auth(?:orization|_token)?|pass(?:word|wd)?|pwd|token|webhook_secret)(?:_|$)/i;
|
|
50
|
+
export const PLACEHOLDER_VALUE_REGEX = /^(?:\$\{[^}]+\}|\{\{[^}]+\}\}|<[^>]+>|xxx+|placeholder|example|sample|demo|test|changeme|your[_-]?(?:api[_-]?)?key(?:[_-]?here)?)$/i;
|