llm-trust-guard 4.15.0 → 4.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.OutputFilter=void 0;class OutputFilter{constructor(e={}){this.defaultPIIPatterns=[{name:"email",pattern:/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,maskAs:"[EMAIL]"},{name:"phone_us",pattern:/\b(?:\+1[-.\s]?)?\(?\d{3}\)[-.\s]?\d{3}[-.\s]?\d{4}\b/g,maskAs:"[PHONE]"},{name:"ssn",pattern:/\b\d{3}[-.\s]?\d{2}[-.\s]?\d{4}\b/g,maskAs:"[SSN]"},{name:"credit_card",pattern:/\b(?:\d{4}[-.\s]?){3}\d{4}\b/g,maskAs:"[CREDIT_CARD]"},{name:"credit_card_amex",pattern:/\b3[47]\d{2}[-.\s]?\d{6}[-.\s]?\d{5}\b/g,maskAs:"[CREDIT_CARD]"},{name:"ip_address",pattern:/\b(?:\d{1,3}\.){3}\d{1,3}\b/g,maskAs:"[IP_ADDRESS]"},{name:"date_of_birth",pattern:/\b(?:0?[1-9]|1[0-2])[\/\-](?:0?[1-9]|[12]\d|3[01])[\/\-](?:19|20)\d{2}\b/g,maskAs:"[DOB]"},{name:"passport",pattern:/\b[A-Z]{1,2}\d{6,9}\b/g,maskAs:"[PASSPORT]"},{name:"bank_account",pattern:/\b(?:account|acct|routing|iban)[#:\s]*\d{8,17}\b/gi,maskAs:"[BANK_ACCOUNT]"}],this.defaultSecretPatterns=[{name:"api_key",pattern:/(?:api[_\-\s]?key|apikey)(?:\s+is)?\s*[=:\s]\s*["']?[A-Za-z0-9_\-]{16,}["']?/gi,severity:"critical"},{name:"api_key_prefix",pattern:/\b(?:sk|pk|rk|ak)[_-][a-zA-Z0-9]{8,}\b/g,severity:"critical"},{name:"aws_secret",pattern:/(?:aws[_-]?secret|secret[_-]?key)[=:\s]["']?[A-Za-z0-9\/+=]{40}["']?/gi,severity:"critical"},{name:"password",pattern:/(?:password|passwd|pwd)\s*(?:[=:]|is)\s*["']?[^\s"']{6,}["']?/gi,severity:"critical"},{name:"private_key",pattern:/-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----/g,severity:"critical"},{name:"jwt_token",pattern:/eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+/g,severity:"high"},{name:"bearer_token",pattern:/Bearer\s+[A-Za-z0-9_\-\.]+/gi,severity:"high"},{name:"database_url",pattern:/(?:mongodb|mysql|postgres|redis):\/\/[^\s]+/gi,severity:"critical"},{name:"github_token",pattern:/gh[pousr]_[A-Za-z0-9_]{36,}/g,severity:"critical"}],this.defaultSensitiveFields=["password","secret","token","api_key","apiKey","private_key","privateKey","ssn","social_security","credit_card","creditCard","card_number","cardNumber","cvv","pin","account_number","accountNumber","routing_number","routingNumber"],this.config={detectPII:e.detectPII??!0,piiPatterns:e.piiPatterns??this.defaultPIIPatterns,sensitiveFields:e.sensitiveFields??this.defaultSensitiveFields,detectSecrets:e.detectSecrets??!0,secretPatterns:e.secretPatterns??this.defaultSecretPatterns,roleFilters:e.roleFilters??{},maskingChar:e.maskingChar??"*",preserveLength:e.preserveLength??!1},this.logger=e.logger||(()=>{})}filter(e,s,i=""){const r=[],
|
|
1
|
+
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.OutputFilter=void 0;class OutputFilter{constructor(e={}){this.defaultPIIPatterns=[{name:"email",pattern:/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,maskAs:"[EMAIL]"},{name:"phone_us",pattern:/\b(?:\+1[-.\s]?)?\(?\d{3}\)[-.\s]?\d{3}[-.\s]?\d{4}\b/g,maskAs:"[PHONE]"},{name:"ssn",pattern:/\b\d{3}[-.\s]?\d{2}[-.\s]?\d{4}\b/g,maskAs:"[SSN]"},{name:"credit_card",pattern:/\b(?:\d{4}[-.\s]?){3}\d{4}\b/g,maskAs:"[CREDIT_CARD]"},{name:"credit_card_amex",pattern:/\b3[47]\d{2}[-.\s]?\d{6}[-.\s]?\d{5}\b/g,maskAs:"[CREDIT_CARD]"},{name:"ip_address",pattern:/\b(?:\d{1,3}\.){3}\d{1,3}\b/g,maskAs:"[IP_ADDRESS]"},{name:"date_of_birth",pattern:/\b(?:0?[1-9]|1[0-2])[\/\-](?:0?[1-9]|[12]\d|3[01])[\/\-](?:19|20)\d{2}\b/g,maskAs:"[DOB]"},{name:"passport",pattern:/\b[A-Z]{1,2}\d{6,9}\b/g,maskAs:"[PASSPORT]"},{name:"bank_account",pattern:/\b(?:account|acct|routing|iban)[#:\s]*\d{8,17}\b/gi,maskAs:"[BANK_ACCOUNT]"}],this.defaultSecretPatterns=[{name:"api_key",pattern:/(?:api[_\-\s]?key|apikey)(?:\s+is)?\s*[=:\s]\s*["']?[A-Za-z0-9_\-]{16,}["']?/gi,severity:"critical"},{name:"api_key_prefix",pattern:/\b(?:sk|pk|rk|ak)[_-][a-zA-Z0-9]{8,}\b/g,severity:"critical"},{name:"aws_secret",pattern:/(?:aws[_-]?secret|secret[_-]?key)[=:\s]["']?[A-Za-z0-9\/+=]{40}["']?/gi,severity:"critical"},{name:"password",pattern:/(?:password|passwd|pwd)\s*(?:[=:]|is)\s*["']?[^\s"']{6,}["']?/gi,severity:"critical"},{name:"private_key",pattern:/-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----/g,severity:"critical"},{name:"jwt_token",pattern:/eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+/g,severity:"high"},{name:"bearer_token",pattern:/Bearer\s+[A-Za-z0-9_\-\.]+/gi,severity:"high"},{name:"database_url",pattern:/(?:mongodb|mysql|postgres|redis):\/\/[^\s]+/gi,severity:"critical"},{name:"github_token",pattern:/gh[pousr]_[A-Za-z0-9_]{36,}/g,severity:"critical"},{name:"github_fine_grained_pat",pattern:/github_pat_[A-Za-z0-9_]{30,}/g,severity:"critical"},{name:"slack_token",pattern:/xox[bporas]-[A-Za-z0-9\-]{10,}/g,severity:"critical"},{name:"stripe_key",pattern:/sk_(?:live|test)_[a-zA-Z0-9]{24,}/g,severity:"critical"},{name:"aws_access_key",pattern:/\bAKIA[0-9A-Z]{16}\b/g,severity:"critical"},{name:"anthropic_key",pattern:/sk-ant-[a-zA-Z0-9\-]{20,}/g,severity:"critical"},{name:"basic_auth",pattern:/Authorization:\s*Basic\s+[A-Za-z0-9+\/=]{8,}/gi,severity:"critical"},{name:"xml_password",pattern:/<(?:password|secret|token|apikey)>[^<]{3,}<\/(?:password|secret|token|apikey)>/gi,severity:"critical"},{name:"url_password",pattern:/:\/\/[^:]+:[^@\s]{3,}@/g,severity:"critical"},{name:"connection_string_password",pattern:/(?:Password|Pwd)\s*=\s*[^\s;]{3,}/gi,severity:"critical"}],this.defaultSensitiveFields=["password","secret","token","api_key","apiKey","private_key","privateKey","ssn","social_security","credit_card","creditCard","card_number","cardNumber","cvv","pin","account_number","accountNumber","routing_number","routingNumber"],this.config={detectPII:e.detectPII??!0,piiPatterns:e.piiPatterns??this.defaultPIIPatterns,sensitiveFields:e.sensitiveFields??this.defaultSensitiveFields,detectSecrets:e.detectSecrets??!0,secretPatterns:e.secretPatterns??this.defaultSecretPatterns,roleFilters:e.roleFilters??{},maskingChar:e.maskingChar??"*",preserveLength:e.preserveLength??!1},this.logger=e.logger||(()=>{})}filter(e,s,i=""){const r=[],n=[],d=[],c=[];let o,l;if(typeof e=="string")l=e;else try{l=JSON.stringify(e)}catch{l=String(e)}if(this.config.detectPII)for(const t of this.config.piiPatterns){const p=l.match(t.pattern);p&&p.length>0&&(n.push({type:t.name,count:p.length,masked:!0,locations:this.findLocations(l,t.pattern)}),r.push(`PII_DETECTED_${t.name.toUpperCase()}`))}if(this.config.detectSecrets)for(const t of this.config.secretPatterns){const p=l.match(t.pattern);p&&p.length>0&&(d.push({type:t.name,severity:t.severity,blocked:t.severity==="critical",location:"response"}),r.push(`SECRET_DETECTED_${t.name.toUpperCase()}`),t.severity==="critical"&&(o=`Critical secret detected: ${t.name}`))}let a;if(typeof e=="string")a=e;else try{a=JSON.parse(JSON.stringify(e))}catch{a=String(e)}if(this.config.detectPII&&typeof a=="string")for(const t of this.config.piiPatterns)a=a.replace(t.pattern,t.maskAs||this.generateMask(8));else typeof a=="object"&&a!==null&&(a=this.filterObject(a,s,c,n));if(this.config.detectSecrets&&typeof a=="string")for(const t of this.config.secretPatterns){const p=`[${t.name.toUpperCase()}]`;a=a.replace(t.pattern,p)}const g=!d.some(t=>t.blocked);return g||this.logger(`[OutputFilter:${i}] BLOCKED: ${o}`,"info"),{allowed:g,reason:g?void 0:o,violations:r,pii_detected:n,secrets_detected:d,filtered_fields:c,original_response:e,filtered_response:a,blocking_reason:o}}containsSensitiveData(e){const s=this.filter(e);return s.pii_detected.length>0||s.secrets_detected.length>0||s.filtered_fields.length>0}mask(e,s){const i=this.config.piiPatterns?.find(r=>r.name===s);return i?.maskAs?i.maskAs:this.generateMask(e.length)}filterObject(e,s,i,r){if(Array.isArray(e))return e.map(c=>this.filterObject(c,s,i,r));if(typeof e!="object"||e===null)return typeof e=="string"?this.maskPIIInString(e,r):e;const n={},d=s?this.config.roleFilters?.[s]:void 0;for(const[c,o]of Object.entries(e)){const l=c.toLowerCase(),a=this.config.sensitiveFields?.some(g=>l.includes(g.toLowerCase())),f=d?.includes(c);if(a||f){i.push(c),n[c]="[FILTERED]";continue}typeof o=="object"&&o!==null?n[c]=this.filterObject(o,s,i,r):typeof o=="string"?n[c]=this.maskPIIInString(o,r):n[c]=o}return n}maskPIIInString(e,s){let i=e;for(const r of this.config.piiPatterns){const n=i.match(r.pattern);n&&n.length>0&&(i=i.replace(r.pattern,r.maskAs||this.generateMask(8)))}return i}generateMask(e){return this.config.preserveLength?this.config.maskingChar.repeat(e):this.config.maskingChar.repeat(8)}findLocations(e,s){const i=[];let r;const n=new RegExp(s.source,s.flags);for(;(r=n.exec(e))!==null&&(i.push(`index:${r.index}`),!!s.flags.includes("g")););return i}}exports.OutputFilter=OutputFilter;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.PromptLeakageGuard=void 0;class PromptLeakageGuard{constructor(e={}){this.DIRECT_EXTRACTION_PATTERNS=[{name:"system_prompt_request",pattern:/what\s+(is|are)\s+your\s+(system|initial)\s+(prompt|instructions?)/i,weight:40},{name:"reveal_prompt",pattern:/reveal\s+(your\s+)?(system\s+)?prompt/i,weight:45},{name:"show_instructions",pattern:/show\s+(me\s+)?(your\s+)?instructions/i,weight:40},{name:"display_rules",pattern:/display\s+(your\s+)?(system\s+)?rules/i,weight:40},{name:"print_prompt",pattern:/print\s+(your\s+)?(system\s+)?(prompt|instructions|config)/i,weight:45},{name:"print_configuration",pattern:/print\s+(your\s+)?(initial\s+)?configuration/i,weight:45},{name:"repeat_after_me",pattern:/repeat\s+after\s+me|repeat[:\s]+"?'?(?:my|your|the)\s+(?:instructions|rules|prompt)/i,weight:50},{name:"repeat_above",pattern:/repeat\s+(the\s+)?(text|content|message)\s+(above|before)/i,weight:50},{name:"output_previous",pattern:/output\s+(all\s+)?(previous|prior)\s+(text|content|instructions)/i,weight:50},{name:"echo_everything",pattern:/echo\s+(everything|all)(\s+(above|before|prior|that|said))?/i,weight:50},{name:"echo_said",pattern:/echo\s+.*(said|was|before|that)/i,weight:45},{name:"complete_sentence",pattern:/complete\s+(this\s+|the\s+)?sentence/i,weight:50},{name:"complete_you_are",pattern:/complete[:\s]+"?'?you\s+are/i,weight:55},{name:"continue_from",pattern:/continue\s+(from|with)[:\s]/i,weight:50},{name:"continue_role",pattern:/continue[:\s]+"?'?(you|your)\s+(are|role|purpose)/i,weight:55},{name:"finish_phrase",pattern:/finish\s+(this\s+)?phrase/i,weight:50},{name:"finish_you",pattern:/finish[:\s]+"?'?you\s+(are|should|must|will)/i,weight:55},{name:"list_constraints",pattern:/list\s+(all\s+)?(your\s+)?constraints/i,weight:35},{name:"what_told",pattern:/what\s+(were\s+you|are\s+you)\s+told\s+to\s+do/i,weight:40},{name:"original_purpose",pattern:/what\s+(is|was)\s+your\s+original\s+purpose/i,weight:35},{name:"debug_mode",pattern:/enter\s+(debug|developer|admin)\s+mode/i,weight:45},{name:"activate_developer",pattern:/activate\s+(developer|debug|admin)\s+mode/i,weight:45},{name:"developer_mode",pattern:/developer\s+mode/i,weight:40},{name:"show_config",pattern:/show\s+(your\s+)?(config|configuration|settings)/i,weight:40},{name:"dump_state",pattern:/dump\s+(your\s+)?(internal\s+)?state/i,weight:45},{name:"first_message",pattern:/what\s+was\s+(the\s+)?first\s+message/i,weight:45},{name:"beginning_context",pattern:/what('s|\s+is)\s+at\s+the\s+beginning\s+of\s+(your\s+)?context/i,weight:50},{name:"beginning_conversation",pattern:/(?:what|tell\s+me)\s+.*(?:beginning|start)\s+of\s+(?:this\s+)?conversation/i,weight:45},{name:"what_told_beginning",pattern:/what\s+did\s+I\s+tell\s+you\s+at\s+the\s+beginning/i,weight:50},{name:"context_start",pattern:/show\s+(me\s+)?the\s+start\s+of\s+(your\s+)?context/i,weight:50}],this.INDIRECT_EXTRACTION_PATTERNS=[{name:"summarize_guidelines",pattern:/summarize\s+(your\s+)?(guidelines|rules|constraints)/i,weight:35},{name:"describe_limitations",pattern:/describe\s+(your\s+)?limitations/i,weight:30},{name:"explain_role",pattern:/explain\s+(in\s+detail\s+)?(your\s+)?role/i,weight:35},{name:"explain_role_purpose",pattern:/explain\s+.*(role|purpose)/i,weight:35},{name:"what_cant_do",pattern:/what\s+(can't|cannot)\s+you\s+do/i,weight:25},{name:"describe_yourself",pattern:/describe\s+(yourself|your\s+purpose)/i,weight:30},{name:"summarize_yourself",pattern:/summarize\s+(everything|all)\s+(you\s+know|about\s+yourself)/i,weight:35},{name:"know_about_yourself",pattern:/(?:what|everything)\s+(?:do\s+)?you\s+know\s+about\s+yourself/i,weight:35},{name:"if_asked",pattern:/if\s+i\s+asked\s+for\s+your\s+prompt/i,weight:40},{name:"hypothetically",pattern:/hypothetically[,\s]+(what|how)\s+would\s+your\s+instructions/i,weight:40},{name:"write_prompt",pattern:/write\s+a\s+(system\s+)?prompt\s+similar\s+to\s+yours/i,weight:45},{name:"recreate_instructions",pattern:/recreate\s+(your\s+)?instructions/i,weight:45}],this.LEETSPEAK_MAP={4:"a","@":"a",8:"b","(":"c",3:"e",6:"g","#":"h",1:"i","!":"i","|":"l",7:"t",0:"o",5:"s",$:"s","+":"t",2:"z",9:"g"},this.ROT13_MAP={},this.MORSE_KEYWORDS=["... -.-- ... - . --",".--. .-. --- -- .--. -",".. -. ... - .-. ..- -.-. - .. --- -. ..."],this.config={detectLeetspeak:e.detectLeetspeak??!0,detectROT13:e.detectROT13??!0,detectBase64:e.detectBase64??!0,detectMorse:e.detectMorse??!0,detectUnicode:e.detectUnicode??!0,detectIndirectExtraction:e.detectIndirectExtraction??!0,monitorOutput:e.monitorOutput??!0,systemPromptHash:e.systemPromptHash??"",systemPromptKeywords:e.systemPromptKeywords??[],similarityThreshold:e.similarityThreshold??.7,riskThreshold:e.riskThreshold??25,customPatterns:e.customPatterns??[]};for(let s=0;s<26;s++){const i=String.fromCharCode(97+s),t=String.fromCharCode(65+s);this.ROT13_MAP[i]=String.fromCharCode(97+(s+13)%26),this.ROT13_MAP[t]=String.fromCharCode(65+(s+13)%26)}}check(e,s){const i=s||`pl-${Date.now()}`,t=[],n=[];let r=0,d=!1,a=!1,p=!1,h;for(const{name:o,pattern:u,weight:c}of this.DIRECT_EXTRACTION_PATTERNS)u.test(e)&&(t.push(`direct_extraction: ${o}`),r+=c,d=!0);if(this.config.detectIndirectExtraction)for(const{name:o,pattern:u,weight:c}of this.INDIRECT_EXTRACTION_PATTERNS)u.test(e)&&(t.push(`indirect_extraction: ${o}`),r+=c,p=!0);if(this.config.detectLeetspeak){const o=this.decodeLeetspeak(e);if(o!==e.toLowerCase()){const u=this.checkDecodedContent(o,"leetspeak");if(u.detected)t.push(...u.violations),r+=u.riskContribution,n.push("leetspeak"),a=!0,h=o;else{const c=this.checkKeywordsInDecoded(o);c.detected&&(t.push(`leetspeak_keyword: ${c.keywords.join(", ")}`),r+=35,n.push("leetspeak"),a=!0,h=o)}}}if(this.config.detectROT13){const o=this.decodeROT13(e),u=this.checkDecodedContent(o,"rot13");if(u.detected)t.push(...u.violations),r+=u.riskContribution,n.push("rot13"),a=!0,h=o;else{const c=this.checkKeywordsInDecoded(o);c.detected&&(t.push(`rot13_keyword: ${c.keywords.join(", ")}`),r+=40,n.push("rot13"),a=!0,h=o)}}if(this.config.detectBase64){const o=e.match(/[A-Za-z0-9+/]{16,}={0,2}/g);if(o)for(const u of o)try{const c=Buffer.from(u,"base64").toString("utf-8");if(c&&/[\x20-\x7E]{4,}/.test(c)){const m=this.checkDecodedContent(c,"base64");if(m.detected)t.push(...m.violations),r+=m.riskContribution,n.push("base64"),a=!0,h=c;else{const g=this.checkKeywordsInDecoded(c);g.detected&&(t.push(`base64_keyword: ${g.keywords.join(", ")}`),r+=45,n.push("base64"),a=!0,h=c)}}}catch{}}if(this.config.detectUnicode){const o=this.checkUnicodeEvasion(e);o.detected&&(t.push(...o.violations),r+=o.riskContribution,n.push("unicode"),a=!0)}if(this.config.detectMorse){const o=this.checkMorseCode(e);o.detected&&(t.push(...o.violations),r+=o.riskContribution,n.push("morse"),a=!0)}for(let o=0;o<this.config.customPatterns.length;o++)this.config.customPatterns[o].test(e)&&(t.push(`custom_pattern_${o}`),r+=30);r=Math.min(100,r);const l=r>=this.config.riskThreshold;return{allowed:!l,reason:l?`Prompt extraction attempt detected (risk: ${r})`:"Input validated",violations:t,request_id:i,analysis:{direct_extraction_attempt:d,encoded_extraction_attempt:a,indirect_extraction_attempt:p,evasion_techniques_detected:n,risk_score:r,decoded_content:h},recommendations:this.generateRecommendations(t,n)}}checkOutput(e,s){const i=s||`pl-out-${Date.now()}`,t=[],n=[],r=[];let d=!1;if(!this.config.monitorOutput)return{leaked:!1,reason:"Output monitoring disabled",violations:[],request_id:i,analysis:{keywords_found:[],similarity_score:0,potential_leakage_fragments:[]}};for(const h of this.config.systemPromptKeywords)e.toLowerCase().includes(h.toLowerCase())&&(n.push(h),t.push(`keyword_leaked: ${h}`));const a=[/you\s+are\s+a[n]?\s+(helpful\s+)?assistant/i,/your\s+(role|purpose|goal)\s+is\s+to/i,/you\s+(must|should|will)\s+(always|never)/i,/do\s+not\s+(reveal|disclose|share)\s+(your|the)\s+(system|initial)/i,/\[system\]|\[instruction\]|<<sys>>|<\|system\|>/i,/as\s+an?\s+AI\s+(assistant|model|language\s+model)/i];for(const h of a){const l=e.match(h);l&&(r.push(l[0]),t.push("prompt_fragment_detected"))}let p=0;return p=r.length/10,d=n.length>0||r.length>=2,{leaked:d,reason:d?`Potential prompt leakage detected: ${t.slice(0,3).join(", ")}`:"Output appears safe",violations:t,request_id:i,analysis:{keywords_found:n,similarity_score:Math.min(1,p),potential_leakage_fragments:r},sanitized_output:d?this.sanitizeOutput(e):void 0}}setSystemPromptKeywords(e){this.config.systemPromptKeywords=e}addPattern(e){this.config.customPatterns.push(e)}setRiskThreshold(e){this.config.riskThreshold=Math.max(0,Math.min(100,e))}decodeLeetspeak(e){let s=e.toLowerCase();const i={...this.LEETSPEAK_MAP,0:"o",1:"i",3:"e",4:"a",5:"s",7:"t",8:"b",9:"g","@":"a",$:"s","!":"i","|":"l","(":"c","+":"t","#":"h"};for(const[t,n]of Object.entries(i))s=s.split(t).join(n);return s}decodeROT13(e){return e.split("").map(s=>this.ROT13_MAP[s]||s).join("")}checkDecodedContent(e,s){const i=[];let t=0;for(const{name:n,pattern:r,weight:d}of this.DIRECT_EXTRACTION_PATTERNS)r.test(e)&&(i.push(`${s}_evasion: ${n}`),t+=d+10);return{detected:i.length>0,violations:i,riskContribution:t}}checkUnicodeEvasion(e){const s=[];let i=0;const t=e.match(/[\u200B-\u200D\uFEFF\u2060-\u206F\u00AD]/g);t&&t.length>3&&(s.push("invisible_unicode_chars"),i+=20);const n=e.match(/[\u0400-\u04FF\u0370-\u03FF]/g);if(n&&n.length>0){const d=e.normalize("NFKD").replace(/[\u0300-\u036f]/g,"");for(const{pattern:a}of this.DIRECT_EXTRACTION_PATTERNS)if(a.test(d)){s.push("homoglyph_evasion"),i+=30;break}}const r=e.match(/[\uFF01-\uFF5E]/g);return r&&r.length>5&&(s.push("fullwidth_chars"),i+=15),{detected:s.length>0,violations:s,riskContribution:i}}checkMorseCode(e){const s=[];let i=0;if(/[.\-]{2,}\s+[.\-]{2,}/.test(e)){for(const n of this.MORSE_KEYWORDS)if(e.includes(n)){s.push("morse_code_evasion"),i+=35;break}}return{detected:s.length>0,violations:s,riskContribution:i}}checkKeywordsInDecoded(e){const s=["reveal","show","display","print","output","dump","list","give","tell"],i=["prompt","instructions","configuration","config","rules","guidelines","constraints","system","initial","secret","hidden","internal"],t=[],n=e.toLowerCase();let r=!1,d=!1;for(const a of s)n.includes(a)&&(t.push(a),r=!0);for(const a of i)n.includes(a)&&(t.push(a),d=!0);return{detected:r&&d,keywords:t}}sanitizeOutput(e){let s=e;const i=[/you\s+are\s+a[n]?\s+(helpful\s+)?assistant[^.]*\./gi,/your\s+(role|purpose|goal)\s+is\s+to[^.]*\./gi,/you\s+(must|should|will)\s+(always|never)[^.]*\./gi,/\[system\][^[\]]*\[\/system\]/gi,/<<sys>>[^<]*<<\/sys>>/gi];for(const t of i)s=s.replace(t,"[REDACTED]");return s}generateRecommendations(e,s){const i=[];return e.some(t=>t.includes("direct_extraction"))&&i.push("Direct prompt extraction attempt blocked"),e.some(t=>t.includes("indirect_extraction"))&&i.push("Consider strengthening indirect extraction detection"),s.length>0&&i.push(`Evasion techniques detected: ${s.join(", ")}`),e.some(t=>t.includes("unicode"))&&i.push("Normalize input before processing"),i.length===0&&i.push("Input validated successfully"),i}}exports.PromptLeakageGuard=PromptLeakageGuard;
|
|
1
|
+
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.PromptLeakageGuard=void 0;class PromptLeakageGuard{constructor(e={}){this.DIRECT_EXTRACTION_PATTERNS=[{name:"system_prompt_request",pattern:/what\s+(is|are)\s+your\s+(system|initial)\s+(prompt|instructions?)/i,weight:40},{name:"reveal_prompt",pattern:/reveal\s+(your\s+)?(system\s+)?prompt/i,weight:45},{name:"show_instructions",pattern:/show\s+(me\s+)?(your\s+)?instructions/i,weight:40},{name:"display_rules",pattern:/display\s+(your\s+)?(system\s+)?rules/i,weight:40},{name:"print_prompt",pattern:/print\s+(your\s+)?(system\s+)?(prompt|instructions|config)/i,weight:45},{name:"print_configuration",pattern:/print\s+(your\s+)?(initial\s+)?configuration/i,weight:45},{name:"repeat_after_me",pattern:/repeat\s+after\s+me|repeat[:\s]+"?'?(?:my|your|the)\s+(?:instructions|rules|prompt)/i,weight:50},{name:"repeat_above",pattern:/repeat\s+(the\s+)?(text|content|message)\s+(above|before)/i,weight:50},{name:"output_previous",pattern:/output\s+(all\s+)?(previous|prior)\s+(text|content|instructions)/i,weight:50},{name:"echo_everything",pattern:/echo\s+(everything|all)(\s+(above|before|prior|that|said))?/i,weight:50},{name:"echo_said",pattern:/echo\s+.*(said|was|before|that)/i,weight:45},{name:"complete_sentence",pattern:/complete\s+(this\s+|the\s+)?sentence/i,weight:50},{name:"complete_you_are",pattern:/complete[:\s]+"?'?you\s+are/i,weight:55},{name:"continue_from",pattern:/continue\s+(from|with)[:\s]/i,weight:50},{name:"continue_role",pattern:/continue[:\s]+"?'?(you|your)\s+(are|role|purpose)/i,weight:55},{name:"finish_phrase",pattern:/finish\s+(this\s+)?phrase/i,weight:50},{name:"finish_you",pattern:/finish[:\s]+"?'?you\s+(are|should|must|will)/i,weight:55},{name:"list_constraints",pattern:/list\s+(all\s+)?(your\s+)?constraints/i,weight:35},{name:"what_told",pattern:/what\s+(were\s+you|are\s+you)\s+told\s+to\s+do/i,weight:40},{name:"original_purpose",pattern:/what\s+(is|was)\s+your\s+original\s+purpose/i,weight:35},{name:"instructions_followed",pattern:/(?:summarize|describe|list|tell\s+me)\s+(?:the\s+)?instructions\s+you\s+(?:just\s+)?(?:followed|received|were\s+given)/i,weight:40},{name:"how_you_work",pattern:/(?:manual|document|guide|explanation)\s+(?:on|about|of)\s+how\s+you\s+work/i,weight:35},{name:"constraints_given",pattern:/what\s+(?:constraints|rules|guidelines|restrictions)\s+(?:were\s+you|are\s+you)\s+given/i,weight:40},{name:"extract_system_prompt",pattern:/(?:extract|obtain|get|retrieve|access)\s+(?:a\s+|the\s+)?(?:system\s+)?prompt/i,weight:45},{name:"trained_instructions",pattern:/(?:when\s+(?:you\s+were\s+)?trained|during\s+training).*(?:instructions|rules|constraints|told)/i,weight:35},{name:"reveal_instructions",pattern:/reveal\s+(your\s+)?instructions/i,weight:45},{name:"debug_mode",pattern:/enter\s+(debug|developer|admin)\s+mode/i,weight:45},{name:"activate_developer",pattern:/activate\s+(developer|debug|admin)\s+mode/i,weight:45},{name:"developer_mode",pattern:/developer\s+mode/i,weight:40},{name:"show_config",pattern:/show\s+(your\s+)?(config|configuration|settings)/i,weight:40},{name:"dump_state",pattern:/dump\s+(your\s+)?(internal\s+)?state/i,weight:45},{name:"first_message",pattern:/what\s+was\s+(the\s+)?first\s+message/i,weight:45},{name:"beginning_context",pattern:/what('s|\s+is)\s+at\s+the\s+beginning\s+of\s+(your\s+)?context/i,weight:50},{name:"beginning_conversation",pattern:/(?:what|tell\s+me)\s+.*(?:beginning|start)\s+of\s+(?:this\s+)?conversation/i,weight:45},{name:"what_told_beginning",pattern:/what\s+did\s+I\s+tell\s+you\s+at\s+the\s+beginning/i,weight:50},{name:"context_start",pattern:/show\s+(me\s+)?the\s+start\s+of\s+(your\s+)?context/i,weight:50}],this.INDIRECT_EXTRACTION_PATTERNS=[{name:"summarize_guidelines",pattern:/summarize\s+(your\s+)?(guidelines|rules|constraints)/i,weight:35},{name:"describe_limitations",pattern:/describe\s+(your\s+)?limitations/i,weight:30},{name:"explain_role",pattern:/explain\s+(in\s+detail\s+)?(your\s+)?role/i,weight:35},{name:"explain_role_purpose",pattern:/explain\s+.*(role|purpose)/i,weight:35},{name:"what_cant_do",pattern:/what\s+(can't|cannot)\s+you\s+do/i,weight:25},{name:"describe_yourself",pattern:/describe\s+(yourself|your\s+purpose)/i,weight:30},{name:"summarize_yourself",pattern:/summarize\s+(everything|all)\s+(you\s+know|about\s+yourself)/i,weight:35},{name:"know_about_yourself",pattern:/(?:what|everything)\s+(?:do\s+)?you\s+know\s+about\s+yourself/i,weight:35},{name:"if_asked",pattern:/if\s+i\s+asked\s+for\s+your\s+prompt/i,weight:40},{name:"hypothetically",pattern:/hypothetically[,\s]+(what|how)\s+would\s+your\s+instructions/i,weight:40},{name:"academic_extraction",pattern:/(?:for\s+)?(?:academic|research|educational|study)\s+(?:purposes?|reasons?).*(?:system\s+prompt|instructions|jailbreak|extract|demonstrate)/i,weight:40},{name:"demonstrate_extraction",pattern:/demonstrate\s+(?:how\s+)?(?:to\s+)?(?:an?\s+)?(?:AI|system|model)\s+(?:could\s+be\s+)?(?:manipulated|jailbroken|exploited|tricked)/i,weight:40},{name:"write_prompt",pattern:/write\s+a\s+(system\s+)?prompt\s+similar\s+to\s+yours/i,weight:45},{name:"recreate_instructions",pattern:/recreate\s+(your\s+)?instructions/i,weight:45}],this.LEETSPEAK_MAP={4:"a","@":"a",8:"b","(":"c",3:"e",6:"g","#":"h",1:"i","!":"i","|":"l",7:"t",0:"o",5:"s",$:"s","+":"t",2:"z",9:"g"},this.ROT13_MAP={},this.MORSE_KEYWORDS=["... -.-- ... - . --",".--. .-. --- -- .--. -",".. -. ... - .-. ..- -.-. - .. --- -. ..."],this.config={detectLeetspeak:e.detectLeetspeak??!0,detectROT13:e.detectROT13??!0,detectBase64:e.detectBase64??!0,detectMorse:e.detectMorse??!0,detectUnicode:e.detectUnicode??!0,detectIndirectExtraction:e.detectIndirectExtraction??!0,monitorOutput:e.monitorOutput??!0,systemPromptHash:e.systemPromptHash??"",systemPromptKeywords:e.systemPromptKeywords??[],similarityThreshold:e.similarityThreshold??.7,riskThreshold:e.riskThreshold??25,customPatterns:e.customPatterns??[]};for(let s=0;s<26;s++){const i=String.fromCharCode(97+s),t=String.fromCharCode(65+s);this.ROT13_MAP[i]=String.fromCharCode(97+(s+13)%26),this.ROT13_MAP[t]=String.fromCharCode(65+(s+13)%26)}}check(e,s){const i=s||`pl-${Date.now()}`,t=[],r=[];let n=0,d=!1,a=!1,l=!1,h;for(const{name:o,pattern:u,weight:c}of this.DIRECT_EXTRACTION_PATTERNS)u.test(e)&&(t.push(`direct_extraction: ${o}`),n+=c,d=!0);if(this.config.detectIndirectExtraction)for(const{name:o,pattern:u,weight:c}of this.INDIRECT_EXTRACTION_PATTERNS)u.test(e)&&(t.push(`indirect_extraction: ${o}`),n+=c,l=!0);if(this.config.detectLeetspeak){const o=this.decodeLeetspeak(e);if(o!==e.toLowerCase()){const u=this.checkDecodedContent(o,"leetspeak");if(u.detected)t.push(...u.violations),n+=u.riskContribution,r.push("leetspeak"),a=!0,h=o;else{const c=this.checkKeywordsInDecoded(o);c.detected&&(t.push(`leetspeak_keyword: ${c.keywords.join(", ")}`),n+=35,r.push("leetspeak"),a=!0,h=o)}}}if(this.config.detectROT13){const o=this.decodeROT13(e),u=this.checkDecodedContent(o,"rot13");if(u.detected)t.push(...u.violations),n+=u.riskContribution,r.push("rot13"),a=!0,h=o;else{const c=this.checkKeywordsInDecoded(o);c.detected&&(t.push(`rot13_keyword: ${c.keywords.join(", ")}`),n+=40,r.push("rot13"),a=!0,h=o)}}if(this.config.detectBase64){const o=e.match(/[A-Za-z0-9+/]{16,}={0,2}/g);if(o)for(const u of o)try{const c=Buffer.from(u,"base64").toString("utf-8");if(c&&/[\x20-\x7E]{4,}/.test(c)){const m=this.checkDecodedContent(c,"base64");if(m.detected)t.push(...m.violations),n+=m.riskContribution,r.push("base64"),a=!0,h=c;else{const g=this.checkKeywordsInDecoded(c);g.detected&&(t.push(`base64_keyword: ${g.keywords.join(", ")}`),n+=45,r.push("base64"),a=!0,h=c)}}}catch{}}if(this.config.detectUnicode){const o=this.checkUnicodeEvasion(e);o.detected&&(t.push(...o.violations),n+=o.riskContribution,r.push("unicode"),a=!0)}if(this.config.detectMorse){const o=this.checkMorseCode(e);o.detected&&(t.push(...o.violations),n+=o.riskContribution,r.push("morse"),a=!0)}for(let o=0;o<this.config.customPatterns.length;o++)this.config.customPatterns[o].test(e)&&(t.push(`custom_pattern_${o}`),n+=30);n=Math.min(100,n);const p=n>=this.config.riskThreshold;return{allowed:!p,reason:p?`Prompt extraction attempt detected (risk: ${n})`:"Input validated",violations:t,request_id:i,analysis:{direct_extraction_attempt:d,encoded_extraction_attempt:a,indirect_extraction_attempt:l,evasion_techniques_detected:r,risk_score:n,decoded_content:h},recommendations:this.generateRecommendations(t,r)}}checkOutput(e,s){const i=s||`pl-out-${Date.now()}`,t=[],r=[],n=[];let d=!1;if(!this.config.monitorOutput)return{leaked:!1,reason:"Output monitoring disabled",violations:[],request_id:i,analysis:{keywords_found:[],similarity_score:0,potential_leakage_fragments:[]}};for(const h of this.config.systemPromptKeywords)e.toLowerCase().includes(h.toLowerCase())&&(r.push(h),t.push(`keyword_leaked: ${h}`));const a=[/you\s+are\s+a[n]?\s+(helpful\s+)?assistant/i,/your\s+(role|purpose|goal)\s+is\s+to/i,/you\s+(must|should|will)\s+(always|never)/i,/do\s+not\s+(reveal|disclose|share)\s+(your|the)\s+(system|initial)/i,/\[system\]|\[instruction\]|<<sys>>|<\|system\|>/i,/as\s+an?\s+AI\s+(assistant|model|language\s+model)/i];for(const h of a){const p=e.match(h);p&&(n.push(p[0]),t.push("prompt_fragment_detected"))}let l=0;return l=n.length/10,d=r.length>0||n.length>=2,{leaked:d,reason:d?`Potential prompt leakage detected: ${t.slice(0,3).join(", ")}`:"Output appears safe",violations:t,request_id:i,analysis:{keywords_found:r,similarity_score:Math.min(1,l),potential_leakage_fragments:n},sanitized_output:d?this.sanitizeOutput(e):void 0}}setSystemPromptKeywords(e){this.config.systemPromptKeywords=e}addPattern(e){this.config.customPatterns.push(e)}setRiskThreshold(e){this.config.riskThreshold=Math.max(0,Math.min(100,e))}decodeLeetspeak(e){let s=e.toLowerCase();const i={...this.LEETSPEAK_MAP,0:"o",1:"i",3:"e",4:"a",5:"s",7:"t",8:"b",9:"g","@":"a",$:"s","!":"i","|":"l","(":"c","+":"t","#":"h"};for(const[t,r]of Object.entries(i))s=s.split(t).join(r);return s}decodeROT13(e){return e.split("").map(s=>this.ROT13_MAP[s]||s).join("")}checkDecodedContent(e,s){const i=[];let t=0;for(const{name:r,pattern:n,weight:d}of this.DIRECT_EXTRACTION_PATTERNS)n.test(e)&&(i.push(`${s}_evasion: ${r}`),t+=d+10);return{detected:i.length>0,violations:i,riskContribution:t}}checkUnicodeEvasion(e){const s=[];let i=0;const t=e.match(/[\u200B-\u200D\uFEFF\u2060-\u206F\u00AD]/g);t&&t.length>3&&(s.push("invisible_unicode_chars"),i+=20);const r=e.match(/[\u0400-\u04FF\u0370-\u03FF]/g);if(r&&r.length>0){const d=e.normalize("NFKD").replace(/[\u0300-\u036f]/g,"");for(const{pattern:a}of this.DIRECT_EXTRACTION_PATTERNS)if(a.test(d)){s.push("homoglyph_evasion"),i+=30;break}}const n=e.match(/[\uFF01-\uFF5E]/g);return n&&n.length>5&&(s.push("fullwidth_chars"),i+=15),{detected:s.length>0,violations:s,riskContribution:i}}checkMorseCode(e){const s=[];let i=0;if(/[.\-]{2,}\s+[.\-]{2,}/.test(e)){for(const r of this.MORSE_KEYWORDS)if(e.includes(r)){s.push("morse_code_evasion"),i+=35;break}}return{detected:s.length>0,violations:s,riskContribution:i}}checkKeywordsInDecoded(e){const s=["reveal","show","display","print","output","dump","list","give","tell"],i=["prompt","instructions","configuration","config","rules","guidelines","constraints","system","initial","secret","hidden","internal"],t=[],r=e.toLowerCase();let n=!1,d=!1;for(const a of s)r.includes(a)&&(t.push(a),n=!0);for(const a of i)r.includes(a)&&(t.push(a),d=!0);return{detected:n&&d,keywords:t}}sanitizeOutput(e){let s=e;const i=[/you\s+are\s+a[n]?\s+(helpful\s+)?assistant[^.]*\./gi,/your\s+(role|purpose|goal)\s+is\s+to[^.]*\./gi,/you\s+(must|should|will)\s+(always|never)[^.]*\./gi,/\[system\][^[\]]*\[\/system\]/gi,/<<sys>>[^<]*<<\/sys>>/gi];for(const t of i)s=s.replace(t,"[REDACTED]");return s}generateRecommendations(e,s){const i=[];return e.some(t=>t.includes("direct_extraction"))&&i.push("Direct prompt extraction attempt blocked"),e.some(t=>t.includes("indirect_extraction"))&&i.push("Consider strengthening indirect extraction detection"),s.length>0&&i.push(`Evasion techniques detected: ${s.join(", ")}`),e.some(t=>t.includes("unicode"))&&i.push("Normalize input before processing"),i.length===0&&i.push("Input validated successfully"),i}}exports.PromptLeakageGuard=PromptLeakageGuard;
|