decorated-pi 0.2.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -74
- package/extensions/file-times.ts +124 -0
- package/extensions/guidance.ts +5 -3
- package/extensions/index.ts +6 -2
- package/extensions/io.ts +587 -0
- package/extensions/lsp/client.ts +181 -428
- package/extensions/lsp/env.ts +45 -12
- package/extensions/lsp/format.ts +102 -237
- package/extensions/lsp/index.ts +8 -11
- package/extensions/lsp/manager.ts +249 -0
- package/extensions/lsp/prompt.ts +3 -42
- package/extensions/lsp/protocol.ts +219 -0
- package/extensions/lsp/servers.ts +80 -160
- package/extensions/lsp/tools.ts +175 -510
- package/extensions/lsp/types.ts +42 -0
- package/extensions/mcp/builtin.ts +126 -0
- package/extensions/mcp/client.ts +106 -0
- package/extensions/mcp/index.ts +123 -0
- package/extensions/{extend-model.ts → model-integration.ts} +127 -4
- package/extensions/patch.ts +842 -0
- package/extensions/providers/ark-coding.ts +2 -0
- package/extensions/safety/detect.ts +78 -707
- package/extensions/safety/entropy.ts +226 -0
- package/extensions/safety/index.ts +44 -97
- package/extensions/safety/patterns.ts +155 -0
- package/extensions/safety/types.ts +50 -0
- package/extensions/settings.ts +10 -0
- package/extensions/slash.ts +165 -9
- package/extensions/smart-at.ts +339 -111
- package/extensions/subdir-agents.ts +43 -13
- package/package.json +3 -4
- package/tsconfig.json +16 -0
- package/extensions/lsp/server-manager.ts +0 -309
- package/extensions/lsp/trust.ts +0 -45
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Safety Detection — Shannon entropy and adjusted entropy analysis
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { isSafeContent } from "./patterns.js";
|
|
6
|
+
|
|
7
|
+
// ─── Character classification ────────────────────────────────────────────
|
|
8
|
+
|
|
9
|
+
/** Character class: U=uppercase, L=lowercase, D=digit, S=dash, X=other */
|
|
10
|
+
export function charClass(c: string): "U" | "L" | "D" | "S" | "X" {
|
|
11
|
+
const code = c.charCodeAt(0);
|
|
12
|
+
if (code >= 65 && code <= 90) return "U";
|
|
13
|
+
if (code >= 97 && code <= 122) return "L";
|
|
14
|
+
if (code >= 48 && code <= 57) return "D";
|
|
15
|
+
if (c === "-") return "S";
|
|
16
|
+
return "X";
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// ─── Shannon entropy ──────────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
/** H(X) = -Σ p(x) · log₂(p(x)) */
|
|
22
|
+
export function shannonEntropy(data: string): number {
|
|
23
|
+
if (data.length === 0) return 0;
|
|
24
|
+
const freq = new Map<string, number>();
|
|
25
|
+
for (const char of data) freq.set(char, (freq.get(char) ?? 0) + 1);
|
|
26
|
+
let entropy = 0;
|
|
27
|
+
const len = data.length;
|
|
28
|
+
for (const count of freq.values()) {
|
|
29
|
+
const p = count / len;
|
|
30
|
+
entropy -= p * Math.log2(p);
|
|
31
|
+
}
|
|
32
|
+
return entropy;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ─── Trigram density ──────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* 3-character sliding window scoring.
|
|
39
|
+
* - Pure digits → 0
|
|
40
|
+
* - Letter↔Digit switch (digit in first position, e.g. 4Vi) → 1.0
|
|
41
|
+
* - Contains '-' with ≥3 distinct classes → 1.0
|
|
42
|
+
* - Case switch AbA pattern (≥2 uppercase + ≥1 lowercase) → 0.8
|
|
43
|
+
*/
|
|
44
|
+
export function trigramScore(c1: string, c2: string, c3: string): number {
|
|
45
|
+
const cls = [charClass(c1), charClass(c2), charClass(c3)];
|
|
46
|
+
if (cls.includes("X")) return 0;
|
|
47
|
+
const unique = new Set(cls);
|
|
48
|
+
if (unique.size === 1 && cls[0] === "D") return 0;
|
|
49
|
+
if (cls.includes("S") && unique.size >= 3) return 1.0;
|
|
50
|
+
const hasDigit = cls.includes("D");
|
|
51
|
+
const hasLetter = cls.includes("L") || cls.includes("U");
|
|
52
|
+
if (hasDigit && hasLetter && cls[0] === "D") return 1.0;
|
|
53
|
+
const uCount = cls.filter(c => c === "U").length;
|
|
54
|
+
const lCount = cls.filter(c => c === "L").length;
|
|
55
|
+
if (uCount >= 2 && lCount >= 1) return 0.8;
|
|
56
|
+
return 0;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Split a token by X-class characters into independent segments. */
|
|
60
|
+
export function splitByXClass(token: string): string[] {
|
|
61
|
+
const segments: string[] = [];
|
|
62
|
+
let current = "";
|
|
63
|
+
for (const c of token) {
|
|
64
|
+
if (charClass(c) === "X") {
|
|
65
|
+
if (current.length >= 3) segments.push(current);
|
|
66
|
+
current = "";
|
|
67
|
+
} else {
|
|
68
|
+
current += c;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
if (current.length >= 3) segments.push(current);
|
|
72
|
+
return segments;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** Average trigram density for a single segment. */
|
|
76
|
+
export function segmentDensity(segment: string): number {
|
|
77
|
+
if (segment.length < 3) return 0;
|
|
78
|
+
let totalScore = 0;
|
|
79
|
+
for (let i = 0; i <= segment.length - 3; i++) {
|
|
80
|
+
totalScore += trigramScore(segment[i]!, segment[i + 1]!, segment[i + 2]!);
|
|
81
|
+
}
|
|
82
|
+
return totalScore / (segment.length - 2);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/** Maximum segment density across all X-split segments. */
|
|
86
|
+
export function maxSegmentDensity(token: string): number {
|
|
87
|
+
const segments = splitByXClass(token);
|
|
88
|
+
if (segments.length === 0) return 0;
|
|
89
|
+
let maxD = 0;
|
|
90
|
+
for (const seg of segments) {
|
|
91
|
+
const d = segmentDensity(seg);
|
|
92
|
+
if (d > maxD) maxD = d;
|
|
93
|
+
}
|
|
94
|
+
return maxD;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ─── Word / dictionary / hex ratios ───────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Word ratio: fraction of token in vowel-containing alphabetic fragments
|
|
101
|
+
* ≥3 characters. Natural language words reduce secret likelihood.
|
|
102
|
+
*/
|
|
103
|
+
export function computeWordRatio(token: string): number {
|
|
104
|
+
const letterSeqs: string[] = [];
|
|
105
|
+
let current = "";
|
|
106
|
+
for (const c of token) {
|
|
107
|
+
const cls = charClass(c);
|
|
108
|
+
if (cls === "L" || cls === "U") {
|
|
109
|
+
current += c.toLowerCase();
|
|
110
|
+
} else {
|
|
111
|
+
if (current.length >= 3) letterSeqs.push(current);
|
|
112
|
+
current = "";
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (current.length >= 3) letterSeqs.push(current);
|
|
116
|
+
const vowels = /[aeiou]/;
|
|
117
|
+
const words = letterSeqs.filter(seq => vowels.test(seq));
|
|
118
|
+
return words.length > 0 ? words.reduce((sum, w) => sum + w.length, 0) / token.length : 0;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export function computeHexRatio(token: string): number {
|
|
122
|
+
let hexCount = 0;
|
|
123
|
+
const len = token.length;
|
|
124
|
+
if (len === 0) return 0;
|
|
125
|
+
for (const c of token) {
|
|
126
|
+
if (/[0-9a-fA-F-]/.test(c)) hexCount++;
|
|
127
|
+
}
|
|
128
|
+
return hexCount / len;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/** 2121 English + tech words for dictionary coverage check */
|
|
132
|
+
const DICT_WORDS: ReadonlySet<string> = new Set(
|
|
133
|
+
// prettier-ignore
|
|
134
|
+
JSON.parse(`["ability","able","about","above","abstract","abuse","academic","accept","acceptance","accepted","access","accessories","accommodation","according","account","accounting","accounts","across","action","actions","active","activities","activity","actual","actually","added","addition","additional","address","adm","admin","administration","administrative","adult","advance","advanced","adventure","advertise","advertisement","advertising","advice","aes","affairs","affiliate","affiliates","africa","african","after","again","against","agencies","agency","agent","agents","agree","agreement","airport","album","allow","allowed","allows","almost","alone","along","already","also","alternative","although","always","amateur","amazon","america","american","among","amount","analysis","angeles","animal","animals","announcements","annual","another","answer","answers","anti","anyone","anything","apartments","api","apparel","appear","apple","application","applications","applied","apply","approach","appropriate","approval","approved","approximately","april","architecture","archive","archives","area","areas","argument","arizona","army","around","article","articles","artist","artists","arts","asia","asian","asked","assessment","assistance","assistant","associated","associates","association","attack","attention","attorney","auction","auctions","audio","august","australia","australian","auth","author","authority","authors","auto","automatically","automotive","availability","available","avenue","average","avg","avoid","award","awards","away","baby","back","background","balance","ball","band","bank","base","baseball","based","basic","basis","basket","battery","beach","beautiful","beauty","became","because","become","been","before","began","begin","beginning","behind","being","believe","below","benefit","benefits","best","better","between","beyond","bible","bill","birth","black","block","blog","blogs","blood","blue","board","boards","body","book","books","born","boston","both","bottom","boys","branch","brand","brands","break","breakfast","breast","bridge","bring","british","brought","brown","browse","browser","btn","budget","buf","build","building","built","bush","business","businesses","button","buyer","buying","cable","calendar","california","call","called","calls","came","camera","cameras","camp","campaign","campus","canada","canadian","cancer","canon","capacity","capital","card","cards","care","career","careers","carolina","cars","cart","case","cases","cash","casino","catalog","categories","category","cause","cb","cell","cells","center","centers","central","centre","century","certain","certificate","certified","cfg","chain","chair","challenge","chance","change","changed","changes","channel","chapter","character","characters","charge","charges","charles","chart","chat","cheap","check","chemical","chicago","chief","child","children","china","chinese","choice","choose","chris","christian","christmas","church","cities","city","civil","claim","claims","class","classes","classic","classifieds","clean","clear","cli","click","client","clients","clinical","close","closed","clothing","club","clubs","cnet","cnt","coast","code","codes","coffee","col","cold","collection","college","color","colorado","columbia","column","come","comes","coming","command","comment","comments","commerce","commercial","commission","committee","common","communication","communications","communities","community","companies","company","compare","compared","comparison","competition","complete","completed","complex","compliance","component","components","comprehensive","computer","computers","computing","condition","conditions","conference","configuration","congress","connect","connection","consider","considered","construction","consumer","contact","contacts","contains","content","contents","context","continue","continued","contract","control","cool","copy","copyright","core","corner","corporate","corporation","correct","cost","costs","could","council","count","counter","countries","country","county","couple","course","courses","court","cover","coverage","covered","cpu","create","created","creating","creative","credit","creek","crime","critical","cross","crud","css","csv","cultural","culture","currency","current","currently","custom","customer","customers","daily","damage","dance","dark","data","database","date","dates","dating","david","days","db","dead","deal","deals","death","debt","december","decision","deep","default","defense","define","defined","definition","degree","delivery","demand","department","described","description","design","designated","designed","desktop","detail","detailed","details","determine","determined","dev","develop","developed","developer","developing","development","device","devices","diamond","dictionary","died","diet","difference","different","difficult","digital","dir","direct","directions","directly","director","directory","disclaimer","discount","discuss","discussion","disease","disp","display","distance","distribution","district","division","dlg","dns","doctor","document","documentation","documents","does","doing","dollar","dollars","domain","domestic","done","door","double","down","download","downloads","draft","drive","driver","driving","drop","drug","drugs","dst","during","dvds","each","early","earth","easily","east","eastern","easy","ebay","economic","economy","edge","edit","edition","editor","education","educational","effect","effective","effects","effort","efforts","either","election","electric","electronic","electronics","element","elements","else","email","emergency","emit","employee","employees","employment","enable","ending","energy","engine","engineering","england","english","enjoy","enough","ensure","enter","enterprise","entertainment","entire","entries","entry","env","environment","environmental","equal","equipment","err","error","errors","especially","essential","established","estate","europe","european","evaluation","even","event","events","ever","every","everyone","everything","evidence","evt","example","examples","excellent","except","exchange","executive","exercise","existing","expect","expected","experience","expert","express","ext","extended","extension","external","extra","eyes","face","facilities","facility","fact","factor","factors","facts","faculty","failure","fair","faith","fall","families","family","fantasy","farm","fashion","fast","father","favorite","feat","feature","featured","features","february","federal","feed","feedback","feel","fees","feet","female","fiction","field","fields","figure","file","files","fill","film","films","filter","final","finally","finance","financial","find","finding","fine","fire","firm","first","fish","fishing","fitness","five","fixed","fixme","flag","flash","flat","flight","floor","florida","flow","flowers","focus","follow","following","follows","font","food","foot","football","force","ford","foreign","forest","form","format","former","forms","forum","forums","forward","found","foundation","four","frame","france","francisco","free","freedom","french","fresh","friday","friend","friendly","friends","from","front","ftr","fuel","full","fully","function","functional","functions","fund","funding","funds","furniture","further","future","galleries","gallery","game","games","gamma","garden","gave","gear","general","generally","generated","generation","george","georgia","german","germany","gets","getting","gid","gift","gifts","girl","girls","git","give","given","gives","giving","glass","global","goal","goals","goes","going","gold","golden","golf","gone","good","goods","google","government","gpt","gpu","grade","graduate","grand","grant","graphics","great","greater","green","ground","group","groups","growing","growth","grp","guarantee","guest","gui","guide","guidelines","guides","guitar","guys","hack","hair","half","hall","hand","hands","happy","hard","hardware","have","having","hdr","head","headlines","health","hear","heard","hearing","heart","heat","heavy","held","help","helpful","here","high","higher","highest","highly","hill","himself","hire","historical","history","hits","hold","holiday","holidays","home","homepage","homes","hook","hope","horse","hospital","host","hosting","hotel","hotels","hour","hours","house","housing","houston","however","html","huge","human","icon","idea","ideas","identify","idx","illinois","image","images","img","immediately","impact","implementation","important","improve","improvement","inch","include","included","includes","including","income","increase","increased","independent","index","india","indian","individual","individuals","industrial","industry","info","information","informed","initial","input","inside","install","installation","instead","institute","institutions","instructions","instruments","insurance","int","integrated","intended","interactive","interest","interested","interesting","interests","interface","internal","international","internet","into","introduction","investment","involved","ipod","iraq","ireland","isbn","island","islands","israel","issue","issues","italian","italy","item","items","itself","jack","jackson","james","january","japan","japanese","java","jersey","jesus","jewelry","jobs","john","johnson","join","joined","joint","jones","journal","json","july","jump","june","just","justice","kansas","keep","key","keyword","keywords","kids","kind","kinds","king","kingdom","kitchen","know","knowledge","known","kong","label","labor","lake","lan","land","language","languages","large","larger","largest","last","late","later","latest","latin","laws","lead","leader","leaders","leadership","leading","league","learn","learning","least","leather","leave","left","legal","len","length","lesbian","less","letter","letters","level","levels","lib","library","license","life","light","like","likely","limit","limited","line","lines","link","links","linux","list","listed","listen","listing","listings","lists","literature","little","live","lives","living","llm","load","loan","loans","local","located","location","locations","login","logo","london","long","longer","look","looking","looks","lord","loss","lost","lots","louis","love","lower","lowest","lyrics","mac","machine","machines","made","magazine","magazines","magic","mail","mailing","main","maintenance","major","make","makes","making","male","manage","management","manager","manual","manufacturer","manufacturing","many","maps","march","marine","mark","market","marketing","markets","martin","mary","mass","master","match","matching","material","materials","matter","mature","max","maximum","maybe","mean","means","measures","media","medical","medicine","medium","meet","meeting","meetings","mega","member","members","membership","memory","mental","menu","merchant","message","messages","metal","method","methods","mexico","michael","michigan","micro","microsoft","middle","might","mike","miles","military","million","min","mind","mini","minimum","minister","minnesota","minute","minutes","miss","missing","mission","mobile","mock","mod","mode","model","models","modern","modified","module","moment","monday","money","monitor","monitoring","month","monthly","months","more","morning","mortgage","most","mother","motion","motor","motorola","mount","mountain","move","moved","movement","movie","movies","moving","msg","much","multi","multimedia","multiple","museum","music","musical","must","myself","naked","name","names","nano","nation","national","native","natural","nature","nav","navigation","near","necessary","need","needed","needs","net","network","networking","networks","never","news","newsletter","next","nice","night","nlp","nokia","none","normal","north","northern","note","notes","nothing","notice","november","npm","num","number","numbers","nursing","oauth","object","october","offer","offered","offering","offers","office","officer","official","often","ohio","older","once","ones","online","only","ontario","open","opening","operating","operation","operations","opinion","opportunities","opportunity","ops","option","optional","options","oral","orange","order","orders","oregon","organization","organizations","original","orm","oss","other","others","otherwise","outdoor","output","outside","over","overall","overview","owned","owner","owners","pacific","pack","package","packages","page","pages","paid","pain","palm","panel","paper","paperback","papers","parent","parents","paris","park","parking","part","particular","particularly","parties","partner","partners","parts","party","pass","password","past","patch","path","patient","patients","paul","payment","paypal","peace","pennsylvania","people","percent","perfect","performance","perhaps","period","perm","permission","person","personal","persons","peter","phase","phentermine","phone","phones","photo","photography","photos","physical","pick","pics","picture","pictures","pid","piece","pink","pip","pipe","pkg","place","placed","places","plan","planning","plans","plant","plants","plastic","platform","play","played","player","players","playing","please","plus","pocket","point","points","poker","pol","police","policies","policy","political","politics","pool","poor","pop","popular","population","port","pos","position","positive","possible","post","posted","poster","posters","posts","potential","power","powered","practice","practices","premium","present","presentation","presented","president","press","pressure","pretty","prev","prevent","previous","price","prices","pricing","primary","prime","print","printer","printing","prior","privacy","private","pro","probably","problem","problems","procedure","procedures","process","processes","processing","prod","produce","produced","product","production","products","professional","professor","profile","profit","program","programme","programming","programs","progress","project","projects","properties","property","proposed","protect","protection","protein","provide","provided","provider","providers","provides","providing","ptr","public","publication","publications","published","publisher","publishing","purchase","purpose","purposes","quality","quantity","quarter","question","questions","quick","quickly","quite","quote","quotes","race","racing","radio","ram","random","range","rank","rate","rated","rates","rather","rating","ratings","reach","read","reader","readers","reading","ready","real","really","reason","reasons","receive","received","recent","recently","recipes","recommend","recommendations","recommended","record","records","recovery","reduce","ref","reference","references","regarding","region","regional","register","registered","registration","regular","regulations","related","relations","relationship","release","released","releases","relevant","religion","religious","remember","remote","remove","rent","rental","rentals","repair","replies","reply","report","reported","reporting","reports","republic","req","request","requests","require","required","requirements","requires","res","research","reserve","reserved","resolution","resort","resource","resources","respect","respective","response","responsibility","responsible","rest","restaurant","restaurants","result","results","retail","return","returns","rev","review","reviews","rich","richard","right","rights","ring","ringtones","risk","river","road","robert","rock","rol","role","room","rooms","root","rose","round","row","royal","rsa","rule","rules","running","russia","russian","safe","safety","said","saint","sale","sales","same","sample","samsung","santa","satellite","saturday","save","saying","says","scale","schedule","school","schools","science","sciences","scientific","score","scott","screen","sdk","search","searches","season","seattle","second","seconds","secretary","section","sections","sector","secure","security","seem","seems","seen","select","selected","selection","self","sell","seller","sellers","selling","send","senior","sense","sent","separate","september","sequence","series","serious","serve","server","servers","service","services","session","sets","setting","settings","seven","several","sha","shall","share","sheet","ship","shipping","ships","shirt","shirts","shoes","shop","shopping","shops","short","shot","should","show","showing","shown","shows","sid","side","sign","signed","significant","silver","similar","simple","simply","since","single","site","sitemap","sites","situation","size","skills","skin","skip","small","smart","smith","snow","social","society","soft","software","sold","solid","solution","solutions","some","someone","something","sometimes","song","songs","sony","soon","sorry","sort","sorted","sound","source","sources","south","southern","space","spain","spanish","special","species","specific","specified","speed","spirit","sponsored","sport","sports","spring","sql","square","src","sre","ssd","ssh","ssl","staff","stage","stand","standard","standards","star","stars","start","started","starting","state","statement","statements","states","station","statistics","status","stay","steel","step","steps","steve","still","stock","stone","stop","storage","store","stores","stories","story","str","strategies","strategy","stream","street","string","strong","structure","stub","student","students","studies","studio","study","stuff","style","subject","subjects","submit","submitted","subs","subscribe","success","successful","such","suggest","suite","sum","summary","summer","sunday","super","supplies","supply","support","supported","sure","surface","surgery","survey","switch","system","systems","tab","table","tables","tag","tags","take","taken","takes","taking","talk","talking","target","task","tcp","teacher","teachers","teaching","team","tech","technical","techniques","technologies","technology","teen","teens","telephone","television","tell","temp","temperature","term","terms","test","testing","tests","texas","text","than","thank","thanks","that","their","them","theme","themselves","then","theory","therapy","there","therefore","these","they","thing","things","think","thinking","third","this","thomas","those","though","thought","thoughts","thousands","thread","three","through","throughout","thursday","thus","tickets","tid","time","times","tip","tips","title","titles","tls","tmp","today","todo","together","told","took","tool","tools","topic","topics","total","touch","tour","tours","towards","town","toys","track","trade","trademarks","trading","traditional","traffic","training","transfer","transport","transportation","travel","treatment","tree","trial","trip","true","trust","truth","trying","tuesday","turn","type","types","udp","uid","under","understand","understanding","union","unique","unit","united","units","universal","university","unknown","unless","until","update","updated","updates","upgrade","upon","upper","urban","url","used","useful","user","username","users","uses","using","usr","usually","vacation","val","valid","valley","value","values","variable","variety","various","vegas","vehicle","vehicles","ver","version","very","video","videos","view","viewed","views","village","virginia","virtual","virus","vision","visit","visitors","visual","voice","volume","vote","vpn","wait","walk","wall","wan","want","wanted","warning","washington","waste","watch","watches","water","ways","weather","website","websites","wedding","wednesday","week","weekend","weekly","weeks","weight","welcome","well","went","were","west","western","what","when","where","whether","which","while","white","whole","wholesale","whose","wide","wife","wild","will","william","williams","wind","window","windows","wine","winter","wireless","wish","with","within","without","woman","women","wood","word","words","work","worked","workers","working","works","workshop","world","worldwide","worth","would","write","writing","written","wrong","wrote","xbox","xml","yahoo","yaml","year","years","yellow","yesterday","york","young","your","yourself","youth","zealand","zone"]`)
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Dict ratio: fraction covered by dictionary words.
|
|
139
|
+
* High dict ratio → likely English text / identifier, not a secret.
|
|
140
|
+
*/
|
|
141
|
+
export function computeDictRatio(token: string): number {
|
|
142
|
+
// Extract alphabetic sequences (>= 3 chars), case-insensitive
|
|
143
|
+
const lowerSeqs: string[] = [];
|
|
144
|
+
let current = "";
|
|
145
|
+
for (const c of token) {
|
|
146
|
+
const cls = charClass(c);
|
|
147
|
+
if (cls === "L" || cls === "U") {
|
|
148
|
+
current += c.toLowerCase();
|
|
149
|
+
} else {
|
|
150
|
+
if (current.length >= 3) lowerSeqs.push(current);
|
|
151
|
+
current = "";
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (current.length >= 3) lowerSeqs.push(current);
|
|
155
|
+
|
|
156
|
+
if (lowerSeqs.length === 0) return 0;
|
|
157
|
+
|
|
158
|
+
// Greedy match: find longest word at each position, then skip past it
|
|
159
|
+
let matchedChars = 0;
|
|
160
|
+
for (const seq of lowerSeqs) {
|
|
161
|
+
let pos = 0;
|
|
162
|
+
while (pos < seq.length) {
|
|
163
|
+
let longestMatch = 0;
|
|
164
|
+
for (let end = seq.length; end > pos; end--) {
|
|
165
|
+
if (DICT_WORDS.has(seq.slice(pos, end))) {
|
|
166
|
+
longestMatch = end - pos;
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
if (longestMatch > 0) {
|
|
171
|
+
matchedChars += longestMatch;
|
|
172
|
+
pos += longestMatch;
|
|
173
|
+
} else {
|
|
174
|
+
pos++;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return token.length > 0 ? matchedChars / token.length : 0;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ─── Adjusted entropy ─────────────────────────────────────────────────────
|
|
183
|
+
|
|
184
|
+
export const ENTROPY_THRESHOLD = 5.5;
|
|
185
|
+
export const MIN_ENTROPY_TOKEN_LENGTH = 32;
|
|
186
|
+
const W1_DENSITY = 3.0;
|
|
187
|
+
const W2_WORD = 3.0;
|
|
188
|
+
const W3_DICT = 4.0;
|
|
189
|
+
const HEX_PENALTY = 2.5;
|
|
190
|
+
const HEX_RATIO_THRESHOLD = 0.9;
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Adjusted entropy:
|
|
194
|
+
* adjusted = baseShannon + trigramDensity×W1 - wordRatio×W2 - dictRatio×W3 - hexPenalty
|
|
195
|
+
*
|
|
196
|
+
* Hex penalty only applies for hyphenated UUID-like tokens
|
|
197
|
+
* (>90% hex AND contains '-').
|
|
198
|
+
*/
|
|
199
|
+
export function calculateAdjustedEntropy(data: string): number {
|
|
200
|
+
const base = shannonEntropy(data);
|
|
201
|
+
const density = maxSegmentDensity(data);
|
|
202
|
+
const wordRatio = computeWordRatio(data);
|
|
203
|
+
const dictRatio = computeDictRatio(data);
|
|
204
|
+
const hexRatio = computeHexRatio(data);
|
|
205
|
+
|
|
206
|
+
const densityBoost = density * W1_DENSITY;
|
|
207
|
+
const wordPenalty = wordRatio * W2_WORD;
|
|
208
|
+
const dictPenalty = dictRatio * W3_DICT;
|
|
209
|
+
const hp = (hexRatio > HEX_RATIO_THRESHOLD && data.includes("-")) ? HEX_PENALTY : 0;
|
|
210
|
+
return base + densityBoost - wordPenalty - dictPenalty - hp;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
export function isHighEntropy(data: string): boolean {
|
|
214
|
+
if (data.length < MIN_ENTROPY_TOKEN_LENGTH) return false;
|
|
215
|
+
if (isSafeContent(data)) return false;
|
|
216
|
+
return calculateAdjustedEntropy(data) > ENTROPY_THRESHOLD;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Split by whitespace — the most conservative tokenization.
|
|
221
|
+
* Preserves JSON structure, URLs, and connection strings.
|
|
222
|
+
*/
|
|
223
|
+
export function findHighEntropyTokens(content: string): string[] {
|
|
224
|
+
const tokens = content.split(/[\s\[\]{}"',\/\\|()&#@!<>?]+/);
|
|
225
|
+
return tokens.filter(t => t.length >= MIN_ENTROPY_TOKEN_LENGTH && isHighEntropy(t));
|
|
226
|
+
}
|
|
@@ -1,11 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Safety — Pi 集成层
|
|
3
3
|
*
|
|
4
|
-
* -
|
|
5
|
-
* - Redirect Guard: bash 覆盖写入提示确认
|
|
6
|
-
* - Protected Paths: write/edit/read 保护路径提示确认
|
|
7
|
-
* - Write Guard: 覆盖非空文件禁止 write
|
|
8
|
-
* - Secret Redact: API Key / Token 自动掩码
|
|
4
|
+
* - Secret Redact: API Key / Token 自动掩码
|
|
9
5
|
*/
|
|
10
6
|
|
|
11
7
|
import type {
|
|
@@ -13,100 +9,36 @@ import type {
|
|
|
13
9
|
ExtensionContext,
|
|
14
10
|
ToolResultEvent,
|
|
15
11
|
} from "@earendil-works/pi-coding-agent";
|
|
16
|
-
import * as fs from "node:fs";
|
|
17
|
-
import { resolve } from "node:path";
|
|
18
12
|
import {
|
|
19
|
-
checkProtectedPath,
|
|
20
|
-
collectBashDangers,
|
|
21
|
-
formatBashDangers,
|
|
22
13
|
detectSecrets,
|
|
23
14
|
maskSecret,
|
|
24
15
|
} from "./detect.js";
|
|
25
16
|
|
|
26
17
|
type ToolTextContent = Extract<NonNullable<ToolResultEvent["content"]>[number], { type: "text" }>;
|
|
27
18
|
|
|
19
|
+
function summarizeCommand(command: string, maxLength = 48): string {
|
|
20
|
+
const singleLine = command.replace(/\s+/g, " ").trim();
|
|
21
|
+
if (singleLine.length <= maxLength) return singleLine;
|
|
22
|
+
return `${singleLine.slice(0, maxLength - 1)}…`;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function formatRedactionContext(event: ToolResultEvent): string {
|
|
26
|
+
if (event.toolName === "read") {
|
|
27
|
+
const filePath = (event.input as any)?.path ?? (event.input as any)?.file ?? (event.input as any)?.file_path;
|
|
28
|
+
return filePath ? `read ${filePath}` : "read";
|
|
29
|
+
}
|
|
30
|
+
if (event.toolName === "bash") {
|
|
31
|
+
const command = (event.input as any)?.command;
|
|
32
|
+
return typeof command === "string" && command.trim().length > 0
|
|
33
|
+
? `bash ${summarizeCommand(command)}`
|
|
34
|
+
: "bash";
|
|
35
|
+
}
|
|
36
|
+
return event.toolName;
|
|
37
|
+
}
|
|
38
|
+
|
|
28
39
|
// ─── Setup ──────────────────────────────────────────────────────────────────
|
|
29
40
|
|
|
30
41
|
export function setupSafety(pi: ExtensionAPI) {
|
|
31
|
-
// ── Command Guard + Protected Paths + Write Guard (tool_call) ─────────
|
|
32
|
-
|
|
33
|
-
pi.on("tool_call", async (event, ctx) => {
|
|
34
|
-
|
|
35
|
-
// Gate 1: 危险命令 + 覆盖写入 + 读取保护路径
|
|
36
|
-
if (event.toolName === "bash") {
|
|
37
|
-
const command = (event.input as { command?: string }).command;
|
|
38
|
-
if (command) {
|
|
39
|
-
const dangers = collectBashDangers(command, ctx.cwd);
|
|
40
|
-
if (dangers.length > 0) {
|
|
41
|
-
const message = formatBashDangers(dangers)!;
|
|
42
|
-
if (!ctx.hasUI) {
|
|
43
|
-
return { block: true, reason: `⚠ ${message} (non-interactive)` };
|
|
44
|
-
}
|
|
45
|
-
const choice = await ctx.ui.select(
|
|
46
|
-
`⚠️ ${message}\n\nAllow execution?`,
|
|
47
|
-
["Block", "Allow once"],
|
|
48
|
-
);
|
|
49
|
-
if (!choice || choice === "Block") {
|
|
50
|
-
return { block: true, reason: `⚠ ${message}` };
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// Gate 2: write/edit 写入保护路径
|
|
57
|
-
if (event.toolName === "write" || event.toolName === "edit") {
|
|
58
|
-
const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
|
|
59
|
-
if (filePath) {
|
|
60
|
-
const danger = checkProtectedPath(filePath);
|
|
61
|
-
if (danger) {
|
|
62
|
-
if (!ctx.hasUI) {
|
|
63
|
-
return { block: true, reason: `🔒 ${danger}\nmay contain sensitive information` };
|
|
64
|
-
}
|
|
65
|
-
const choice = await ctx.ui.select(
|
|
66
|
-
`🔒 ${danger}\nmay contain sensitive information\n\nProceed?`,
|
|
67
|
-
["Block", "Allow once"],
|
|
68
|
-
);
|
|
69
|
-
if (!choice || choice === "Block") {
|
|
70
|
-
return { block: true, reason: `🔒 ${danger}\nmay contain sensitive information` };
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// Gate 3: 写保护(已有内容的文件禁止 write,直接返回信息给 agent)
|
|
77
|
-
if (event.toolName === "write") {
|
|
78
|
-
const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
|
|
79
|
-
if (filePath) {
|
|
80
|
-
try {
|
|
81
|
-
const abs = resolve(ctx.cwd, filePath);
|
|
82
|
-
if (fs.existsSync(abs) && fs.readFileSync(abs, "utf8").length > 0) {
|
|
83
|
-
return { block: true, reason: "Overwriting a non-empty file is dangerous, use the edit tool instead!" };
|
|
84
|
-
}
|
|
85
|
-
} catch { /* file doesn't exist */ }
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
// Gate 4: read 工具读取保护路径(bash 读取已在 Gate 1 处理)
|
|
90
|
-
if (event.toolName === "read") {
|
|
91
|
-
const filePath = (event.input as any).path ?? (event.input as any).file ?? (event.input as any).file_path;
|
|
92
|
-
if (filePath) {
|
|
93
|
-
const danger = checkProtectedPath(filePath);
|
|
94
|
-
if (danger) {
|
|
95
|
-
if (!ctx.hasUI) {
|
|
96
|
-
return { block: true, reason: `🔒 Reading protected file: ${danger}\nmay contain sensitive information` };
|
|
97
|
-
}
|
|
98
|
-
const choice = await ctx.ui.select(
|
|
99
|
-
`🔒 Reading protected file: ${danger}\nmay contain sensitive information\n\nProceed?`,
|
|
100
|
-
["Block", "Allow once"],
|
|
101
|
-
);
|
|
102
|
-
if (!choice || choice === "Block") {
|
|
103
|
-
return { block: true, reason: `🔒 Reading protected file: ${danger}\nmay contain sensitive information` };
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
});
|
|
109
|
-
|
|
110
42
|
// ── Secret Redact (tool_result) ────────────────────────────────────────
|
|
111
43
|
|
|
112
44
|
const handleToolResult = async (
|
|
@@ -115,9 +47,10 @@ export function setupSafety(pi: ExtensionAPI) {
|
|
|
115
47
|
): Promise<{ content?: NonNullable<ToolResultEvent["content"]> } | void> => {
|
|
116
48
|
if (!event.content || !Array.isArray(event.content)) return;
|
|
117
49
|
|
|
118
|
-
//
|
|
119
|
-
//
|
|
120
|
-
|
|
50
|
+
// Scan read + bash tool output. Skip write/edit/patch because they mainly
|
|
51
|
+
// produce diffs or generated file bodies, which are handled elsewhere and are
|
|
52
|
+
// more prone to noisy false positives.
|
|
53
|
+
if (event.toolName !== "read" && event.toolName !== "bash") return;
|
|
121
54
|
|
|
122
55
|
const textParts: Array<{ index: number; text: string; item: ToolTextContent }> = [];
|
|
123
56
|
for (let i = 0; i < event.content.length; i++) {
|
|
@@ -129,17 +62,25 @@ export function setupSafety(pi: ExtensionAPI) {
|
|
|
129
62
|
if (textParts.length === 0) return;
|
|
130
63
|
|
|
131
64
|
let totalCount = 0;
|
|
65
|
+
const counts: Record<"pattern" | "regex" | "entropy", number> = {
|
|
66
|
+
pattern: 0,
|
|
67
|
+
regex: 0,
|
|
68
|
+
entropy: 0,
|
|
69
|
+
};
|
|
132
70
|
const newContent = [...event.content];
|
|
133
71
|
|
|
72
|
+
const filePath = (event.input as any)?.path ?? (event.input as any)?.file ?? (event.input as any)?.file_path;
|
|
73
|
+
|
|
134
74
|
for (const { index, text, item } of textParts) {
|
|
135
|
-
const matches = detectSecrets(text);
|
|
75
|
+
const matches = detectSecrets(text, { filePath });
|
|
136
76
|
if (matches.length === 0) continue;
|
|
137
77
|
|
|
138
78
|
totalCount += matches.length;
|
|
139
79
|
let redacted = text;
|
|
140
|
-
for (const { start, end } of matches) {
|
|
80
|
+
for (const { start, end, source } of matches) {
|
|
81
|
+
counts[source] += 1;
|
|
141
82
|
const original = redacted.slice(start, end);
|
|
142
|
-
redacted = redacted.slice(0, start) + maskSecret(original) + redacted.slice(end);
|
|
83
|
+
redacted = redacted.slice(0, start) + maskSecret(original, source) + redacted.slice(end);
|
|
143
84
|
}
|
|
144
85
|
const updatedItem: ToolTextContent = { ...item, text: redacted };
|
|
145
86
|
newContent[index] = updatedItem;
|
|
@@ -147,9 +88,15 @@ export function setupSafety(pi: ExtensionAPI) {
|
|
|
147
88
|
|
|
148
89
|
if (totalCount === 0) return;
|
|
149
90
|
const label = totalCount === 1 ? "1 secret" : `${totalCount} secrets`;
|
|
150
|
-
|
|
91
|
+
const breakdown: string[] = [];
|
|
92
|
+
if (counts.pattern > 0) breakdown.push(`*:pattern=${counts.pattern}`);
|
|
93
|
+
if (counts.regex > 0) breakdown.push(`#:regex=${counts.regex}`);
|
|
94
|
+
if (counts.entropy > 0) breakdown.push(`?:entropy=${counts.entropy}`);
|
|
95
|
+
const suffix = breakdown.length > 0 ? ` · ${breakdown.join(" ")}` : "";
|
|
96
|
+
const contextLabel = formatRedactionContext(event);
|
|
97
|
+
ctx.ui.notify(`🔒 [${contextLabel}] Redacted ${label}${suffix}`, "warning");
|
|
151
98
|
return { content: newContent };
|
|
152
99
|
};
|
|
153
100
|
|
|
154
101
|
pi.on("tool_result", handleToolResult);
|
|
155
|
-
}
|
|
102
|
+
}
|