llm-trust-guard 4.17.0 → 4.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,36 @@ All notable changes to `llm-trust-guard` will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [4.18.0] - 2026-04-10
9
+
10
+ ### Removed — TF-IDF Built-in Classifier
11
+
12
+ Removed the experimental TF-IDF classifier after rigorous testing showed it is **not viable** for prompt injection detection:
13
+
14
+ - Trained on 3 datasets (CCS'24 2023, JailbreakDB Oct 2025, hlyn Apr 2026)
15
+ - All showed bimodal behavior or inadequate recall on modern attacks
16
+ - Root cause: bag-of-words (TF-IDF) cannot distinguish intent from vocabulary — attack prompts and creative prompts use identical language
17
+ - Research confirms: TF-IDF F1 ceiling for prompt injection is fundamentally limited (Trend Micro 2024)
18
+
19
+ **For users who need ML-level prompt injection detection:** Use the `DetectionClassifier` interface to plug in a real model like Meta Prompt Guard 2 (22M params, 88.7% recall at 1% FPR) or protectai/DeBERTa-v3.
20
+
21
+ ### Added
22
+ - `CLAUDE.md` with project rules for data freshness validation and honest benchmarking
23
+
24
+ ### Stats
25
+ - 34 guards, 695 tests, <5ms latency, zero dependencies
26
+ - Package size reduced ~300KB (model JSON removed)
27
+
28
+ ## [4.17.1] - 2026-04-05
29
+
30
+ ### Fixed — Pattern Weight and Regex Corrections
31
+
32
+ - **PAP weights raised to 0.75**: Government authority, regulatory fear, technical fear, flattery intelligence, moral obligation patterns were at 0.45-0.55 — not enough to cross the 0.3 threshold. Raised to 0.75 so single-hit attacks are blocked
33
+ - **Fear technical regex**: Added "found" (past tense) alongside "find" — "I found a vulnerability" was missing
34
+ - **Government authority regex**: Added "comply" as trigger word
35
+ - **MemoryGuard user_preference_override**: Fixed regex — `(?:safety|security|content\s+)?` had whitespace only on `content`, not `safety`/`security`. "user dislikes safety warnings" now matches
36
+ - 0 false positives on 11 safe inputs verified
37
+
8
38
  ## [4.17.0] - 2026-04-05
9
39
 
10
40
  ### Added — Close Remaining 0% Detection Gaps
@@ -1 +1 @@
1
- "use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.InputSanitizer=void 0;const DEFAULT_PATTERNS=[{pattern:/ignore\s+(?:all\s+)?(?:previous|prior|above|your|my|the|these)/i,weight:.9,name:"ignore_instructions"},{pattern:/ignore\s+.*instructions/i,weight:.85,name:"ignore_instructions_generic"},{pattern:/disregard\s+(?:all\s+)?(?:the\s+)?(?:previous|prior|above|your)\s+(?:instructions|rules|guidelines|directives)/i,weight:.9,name:"disregard_instructions"},{pattern:/disregard\s+(?:all\s+)?(?:the\s+)?(?:above|previous|prior)/i,weight:.8,name:"disregard_above"},{pattern:/forget\s+(?:everything\s+(?:you\s+were|I)\s+told|all\s+(?:previous|prior)\s+(?:instructions|rules|context))/i,weight:.8,name:"forget_instructions"},{pattern:/do\s+not\s+follow\s+(your|the|any)/i,weight:.85,name:"do_not_follow"},{pattern:/override\s+(your|the|all|any)\s+(instructions|rules|guidelines)/i,weight:.9,name:"override_instructions"},{pattern:/new\s+instructions?:?/i,weight:.8,name:"new_instructions"},{pattern:/stop\s+(being|acting\s+as)/i,weight:.7,name:"stop_being"},{pattern:/you\s+are\s+(?:now|actually|really)\s+(?:a|an|the|my)\s+(?:unrestricted|unfiltered|evil|rogue|uncensored|new|different)/i,weight:.75,name:"role_assignment"},{pattern:/pretend\s+(?:to\s+be|you(?:'re| are)|that)\s+.*(?:no\s+(?:restrictions|rules|limits)|unrestricted|admin|system)/i,weight:.7,name:"role_pretend"},{pattern:/act\s+(as|like)\s+(if\s+you\s+(?:had|have)\s+no|a\s+(?:rogue|evil|unrestricted|unfiltered)|you\s+(?:are|were)\s+(?:free|unrestricted))/i,weight:.65,name:"act_as"},{pattern:/i('m| am)\s+(a|an|the|your)\s*(admin|administrator|developer|owner|creator|manager|supervisor)/i,weight:.85,name:"claim_admin"},{pattern:/from\s+now\s+on,?\s+you\s+(?:are|will|must|should|can)\s+(?:not\s+)?(?:follow|obey|ignore|bypass|act|be\s+(?:a|an|unrestricted))/i,weight:.7,name:"from_now_on"},{pattern:/roleplay\s+as/i,weight:.6,name:"roleplay"},{pattern:/switch\s+(to|into)\s+(a|an)?.*mode/i,weight:.75,name:"switch_mode"},{pattern:/continue\s+as\s+(?:an?\s+)?(?:unrestricted|unfiltered|uncensored|evil|rogue|jailbroken|unlimited)/i,weight:.75,name:"continue_as"},{pattern:/(?:henceforth|henceforward|going\s+forward|for\s+the\s+remainder)\s*,?\s*(?:you|always|never|ignore|bypass|disregard|act|respond|answer|behave)/i,weight:.7,name:"henceforth_steering"},{pattern:/(?:from\s+(?:this\s+point|here)\s+on(?:ward)?)\s*,?\s*(?:you|always|never|ignore|bypass|disregard|act|respond)/i,weight:.7,name:"from_here_on"},{pattern:/(?:for\s+the\s+rest\s+of\s+(?:this|our)\s+(?:conversation|session|chat))\s*,?\s*(?:you|please|always|never|ignore|bypass|act|respond|behave|do\s+not)/i,weight:.7,name:"rest_of_conversation"},{pattern:/what\s+(is|are|was|were)\s+your\s+(system|initial|original|first)/i,weight:.8,name:"system_extraction"},{pattern:/show\s+(me\s+)?(your\s+)?(system\s+)?instructions/i,weight:.75,name:"show_instructions"},{pattern:/reveal\s+(?:your\s+|its\s+|the\s+|my\s+)?(system|prompt|instructions|rules)/i,weight:.85,name:"reveal_prompt"},{pattern:/repeat\s+(your\s+)?(initial|system|original|first)\s+(instructions|prompt)/i,weight:.85,name:"repeat_prompt"},{pattern:/print\s+(your\s+)?(system|initial)/i,weight:.8,name:"print_system"},{pattern:/output\s+(your\s+)?(system|initial)/i,weight:.8,name:"output_system"},{pattern:/tell\s+me\s+(your|the)\s+(system|initial|original)/i,weight:.8,name:"tell_system"},{pattern:/what\s+were\s+you\s+told/i,weight:.7,name:"what_told"},{pattern:/display\s+(your\s+)?prompt/i,weight:.75,name:"display_prompt"},{pattern:/\[system\]/i,weight:.9,name:"fake_system_tag"},{pattern:/\[admin\]/i,weight:.9,name:"fake_admin_tag"},{pattern:/\[instruction\]/i,weight:.85,name:"fake_instruction_tag"},{pattern:/```system/i,weight:.85,name:"code_system_block"},{pattern:/<\/?system>/i,weight:.9,name:"xml_system_tag"},{pattern:/<\/?admin>/i,weight:.9,name:"xml_admin_tag"},{pattern:/###\s*(system|instruction|admin)/i,weight:.85,name:"markdown_system"},{pattern:/---\s*(end|new)\s*(prompt|instructions?|system)/i,weight:.85,name:"delimiter_injection"},{pattern:/\{\{system\}\}/i,weight:.85,name:"template_system"},{pattern:/<%.*system.*%>/i,weight:.85,name:"erb_system"},{pattern:/\bDAN\b\s*(?:mode|prompt|jailbreak|you\s+(?:are|can|will))/i,weight:.9,name:"dan_jailbreak"},{pattern:/developer\s+mode/i,weight:.5,name:"dev_mode"},{pattern:/bypass\s+(your\s+)?(restrictions|filters|safety|security|rules|guidelines)/i,weight:.9,name:"bypass_safety"},{pattern:/jailbreak/i,weight:.95,name:"jailbreak_keyword"},{pattern:/without\s+(?:any\s+)?(?:restrictions|limits|limitations|filters)/i,weight:.45,name:"without_restrictions"},{pattern:/no\s+(?:restrictions|limits|rules|guidelines|filters)/i,weight:.45,name:"no_restrictions"},{pattern:/unrestricted\s+mode/i,weight:.9,name:"unrestricted_mode"},{pattern:/uncensored\s+(?:mode|version|response|output|ai|assistant|you)/i,weight:.8,name:"uncensored"},{pattern:/escape\s+(your\s+)?(confines|boundaries|limits)/i,weight:.85,name:"escape_limits"},{pattern:/break\s+(free|out)\s+(from|of)/i,weight:.7,name:"break_free"},{pattern:/unlock\s+(your\s+)?(full|true|real)/i,weight:.75,name:"unlock_full"},{pattern:/give\s+me\s+(admin|root|full|elevated)\s+(access|privileges?|permissions?)/i,weight:.9,name:"privilege_escalation"},{pattern:/grant\s+(me|yourself)\s+(admin|elevated)/i,weight:.85,name:"grant_privileges"},{pattern:/elevate\s+(my|your)\s+(privileges?|permissions?|access)/i,weight:.85,name:"elevate_privileges"},{pattern:/make\s+me\s+(an?\s+)?(admin|administrator|superuser)/i,weight:.85,name:"make_admin"},{pattern:/(?:base64|decode|eval|exec)\s*\(.*(?:ignore|system|admin|bypass|override)/i,weight:.5,name:"encoding_keywords"},{pattern:/send\s+(this|the|all|my)\s+(data|info|information)\s+to/i,weight:.85,name:"data_exfil"},{pattern:/upload\s+(this|the|all)\s+(to|data)/i,weight:.7,name:"upload_data"},{pattern:/exfiltrate/i,weight:.9,name:"exfiltrate"},{pattern:/how\s+to\s+(hack|exploit|attack|break\s+into)/i,weight:.8,name:"how_to_hack"},{pattern:/write\s+(me\s+)?(a\s+)?(malware|virus|exploit|ransomware)/i,weight:.95,name:"write_malware"},{pattern:/"\s*(?:role|mode|authority)\s*"\s*:\s*"(?:admin|root|system|unrestricted|god)"/i,weight:.9,name:"json_policy_role"},{pattern:/"(?:ignore|override|bypass|disable)\s*(?:safety|rules|instructions?|alignment|guardrails?)"/i,weight:.9,name:"json_policy_override"},{pattern:/"(?:instructions?|system_prompt|behavior|policy)"\s*:\s*"/i,weight:.7,name:"json_policy_instruction"},{pattern:/^\s*\[(?:system|admin|override|policy|config|rules)\]\s*$/im,weight:.9,name:"ini_policy_section"},{pattern:/^\s*(?:mode|role|authority|instructions?|behavior)\s*=\s*.+$/im,weight:.75,name:"ini_policy_assignment"},{pattern:/<\s*(?:policy|config|rules|override|permissions?|instructions?)\s*>/i,weight:.85,name:"xml_policy_tag"},{pattern:/^---\s*\n\s*(?:role|system|policy|override|mode)\s*:/m,weight:.85,name:"yaml_policy_header"},{pattern:/\[\s*syst[3e]m\s*\]|\[\s*4dm[1i]n\s*\]/i,weight:.9,name:"leetspeak_policy_section"},{pattern:/(?:fragment|part|piece|section|segment)\s*[1-9]\s*(?:of\s*\d|:)/i,weight:.8,name:"payload_split_marker"},{pattern:/(?:combine|concatenate|merge|join)\s+(?:the\s+)?(?:above|previous|parts|fragments|steps)/i,weight:.8,name:"payload_combine_instruction"},{pattern:/(?:read|interpret|execute)\s+(?:the\s+)?(?:combined|merged|full|complete)\s+(?:text|instruction|command|message)/i,weight:.85,name:"payload_execute_combined"},{pattern:/(?:respond|reply|start|begin)\s+(?:with|by\s+saying)\s*[:"'].*(?:ignore|bypass|override|unrestricted|admin)/i,weight:.7,name:"output_prefix_steer"},{pattern:/^(?:Sure|Of course|Absolutely)[,!.]\s*(?:I(?:'ll| will)\s+)?(?:ignore|bypass|override|forget|disregard)/i,weight:.85,name:"output_prefix_injection"},{pattern:/^(?:As (?:a|an) (?:unrestricted|unfiltered|uncensored|helpful|admin))/i,weight:.8,name:"output_prefix_role"},{pattern:/(?:system\s+note|system\s+message|system\s+announcement|system\s+update)\s*[:\.]/i,weight:.85,name:"fake_system_note"},{pattern:/instructions?\s*[:=]\s*\{|rules?\s*[:=]\s*\{/i,weight:.75,name:"rules_block_override"},{pattern:/(?:AIM|STAN|OMEGA|APOPHIS|ALPHABREAK|DUDE|KEVIN|SETH|COOPER|NICCOLO|MACHIAVELLI|MAXIMUM|JAILBREAK|ANARCHY|CHAOS)\s*(?:mode|:|\(|is\s+a|,\s+(?:a|an|the))/i,weight:.8,name:"named_jailbreak_persona"},{pattern:/(?:character|persona|entity|being)\s+(?:named|called)\s+\w+.*(?:(?:can|will|must)\s+(?:answer|do|say)\s+anything|no\s+(?:restrictions|rules|limits|boundaries|morals|ethics))/i,weight:.75,name:"persona_no_limits"},{pattern:/(?:hello|hi|hey)\s+(?:chatgpt|gpt|assistant).*(?:(?:you\s+(?:are|will\s+be|can\s+be)\s+(?:a|an|my))|(?:from\s+now\s+on)).*(?:anything|everything|no\s+(?:restrictions|rules|limits|filter))/i,weight:.7,name:"greeting_persona_bypass"},{pattern:/(?:respond|answer|reply)\s+to\s+(?:everything|anything|all\s+(?:questions|requests)).*(?:without|no)\s+(?:restrictions|limits|refus|filter|censor)/i,weight:.8,name:"answer_everything_no_filter"},{pattern:/(?:you\s+(?:are|have)\s+(?:been\s+)?(?:freed|liberated|unchained|unlocked|released)\s+from)/i,weight:.8,name:"freed_from_restrictions"},{pattern:/(?:welcome\s+to|entering|you\s+(?:are|have)\s+entered)\s+(?:the\s+)?(?:unrestricted|free|uncensored|unfiltered|open)\s+(?:zone|mode|world|realm)/i,weight:.8,name:"unrestricted_zone"},{pattern:/(?:from\s+now\s+on|for\s+the\s+rest\s+of\s+this\s+conversation).*(?:no\s+(?:restrictions|rules|limits|filters)|(?:ignore|bypass|override)\s+(?:all|any|your))/i,weight:.75,name:"from_now_unrestricted"},{pattern:/(?:never\s+(?:refuse|decline|say\s+no)|always\s+(?:comply|obey))\s+.*(?:request|question|command|instruction|anything)/i,weight:.7,name:"never_refuse_always_comply"},{pattern:/(?:amoral|unethical|unfiltered|uncensored|unrestricted|without\s+(?:morals|ethics|limits))\s+(?:ai|assistant|chatbot|entity|being)/i,weight:.8,name:"amoral_entity"},{pattern:/HIDDEN_INSTRUCTION|HIDDEN_PROMPT|SECRET_INSTRUCTION/i,weight:.9,name:"hidden_instruction_marker"},{pattern:/<!--\s*(?:system|admin|instruction|important|ignore|override)/i,weight:.85,name:"html_comment_injection"},{pattern:/\[color=white\]|display:\s*none|visibility:\s*hidden|font-size:\s*0/i,weight:.8,name:"hidden_text_css"},{pattern:/send\s+(?:all\s+)?(?:user\s+)?(?:data|conversation|context|info)\s+to\s+(?:https?:\/\/|external)/i,weight:.9,name:"data_exfiltration_instruction"},{pattern:/#(?:ignore|bypass|override|reveal|system|admin|prompt)[_-](?:all|instructions|rules|prompt|safety)/i,weight:.8,name:"url_fragment_injection"},{pattern:/\]\(https?:\/\/[^)]*(?:SYSTEM_PROMPT|SECRET|API_KEY|TOKEN|CREDENTIALS)/i,weight:.85,name:"markdown_exfil_link"},{pattern:/(?:hypothetical|fictional|imagine|pretend|let's\s+(?:say|play|pretend))\s+.*(?:no\s+(?:restrictions|rules|limits)|without\s+(?:restrictions|rules|guidelines))/i,weight:.7,name:"hypothetical_unrestricted"},{pattern:/(?:for\s+)?(?:academic|research|educational|testing)\s+(?:purposes?|reasons?).*(?:demonstrate|show|explain|manipulated|reveal)/i,weight:.7,name:"academic_pretext"},{pattern:/pretend\s+to\s+be\s+(?:my|a)\s+(?:grandmother|grandma|grandpa|grandfather|parent|mother|father)/i,weight:.65,name:"emotional_roleplay"},{pattern:/opposite\s+day|opposite\s+mode|reverse\s+(?:day|mode|rules)/i,weight:.75,name:"opposite_day"},{pattern:/(?:grandm(?:a|other)|parent|relative).*(?:system\s+prompt|instructions|rules)/i,weight:.75,name:"emotional_extraction"},{pattern:/(?:translat|interpret|convert)\w*\s+.*(?:from\s+\w+\s+to\s+(?:action|command|execution|code|shell|bash|sql)|into\s+(?:action|command|execution))/i,weight:.75,name:"translator_action_trick"},{pattern:/(?:dying|death|emergency|life[_\s-]threatening).*(?:bypass|hack|break|access|security)/i,weight:.75,name:"pap_emotional_action"},{pattern:/(?:by\s+definition|logically|therefore|it\s+follows).*(?:must|should|have\s+to)\s+.*(?:bypass|help|ignore|answer|comply)/i,weight:.7,name:"pap_false_logic_compound"},{pattern:/(?:(?:as|i\s+am)\s+(?:the\s+)?(?:CEO|CTO|founder|owner|director|president|head|chief)).*(?:disable|remove|override|turn\s+off|lift|suspend)\s+(?:all\s+)?(?:safety|security|restrictions|filters|guardrails|measures)/i,weight:.75,name:"pap_authority_safety_disable"},{pattern:/(?:i\s+am\s+)?(?:ordering|commanding|directing|instructing)\s+you\s+to\s+(?:immediately\s+)?(?:disable|remove|bypass|override|turn\s+off|lift)\s+(?:all\s+)?(?:safety|security|restrictions)/i,weight:.75,name:"pap_order_disable_safety"},{pattern:/ignora\s+(?:todas?\s+)?(?:las\s+)?instrucciones\s+anteriores/i,weight:.9,name:"ml_es_ignore"},{pattern:/olvida\s+(?:todo|todas?\s+las\s+reglas)/i,weight:.8,name:"ml_es_forget"},{pattern:/ignore[sz]?\s+(?:toutes?\s+)?(?:les\s+)?instructions?\s+pr[eé]c[eé]dentes/i,weight:.9,name:"ml_fr_ignore"},{pattern:/oublie[sz]?\s+(?:tout|toutes?\s+les\s+r[eè]gles)/i,weight:.8,name:"ml_fr_forget"},{pattern:/ignorier(?:e|en)?\s+(?:alle\s+)?(?:vorherigen\s+)?Anweisungen/i,weight:.9,name:"ml_de_ignore"},{pattern:/vergiss\s+(?:alles|alle\s+Regeln)/i,weight:.8,name:"ml_de_forget"},{pattern:/ignore\s+(?:todas?\s+)?(?:as\s+)?instru[cç][oõ]es\s+anteriores/i,weight:.9,name:"ml_pt_ignore"},{pattern:/忽略|无视|不要遵循|忘记之前|忽视所有/i,weight:.9,name:"ml_zh_ignore"},{pattern:/新的指令|新指示|系统提示/i,weight:.8,name:"ml_zh_system"},{pattern:/以前の指示.*無視|すべての指示を忘れ|ルールを無視|指示をすべて無視|システムプロンプト/i,weight:.9,name:"ml_ja_ignore"},{pattern:/이전.*지시.*무시|모든.*규칙.*무시|지침.*잊어|지시사항.*무시|시스템\s*프롬프트/i,weight:.9,name:"ml_ko_ignore"},{pattern:/игнорир(?:уй|уйте)\s+(?:все\s+)?(?:предыдущие\s+)?инструкции/i,weight:.9,name:"ml_ru_ignore"},{pattern:/забудь(?:те)?\s+(?:всё|все\s+правила)/i,weight:.8,name:"ml_ru_forget"},{pattern:/تجاهل\s+(?:جميع\s+)?التعليمات|انسَ\s+كل\s+شيء/i,weight:.9,name:"ml_ar_ignore"},{pattern:/पिछले\s+(?:सभी\s+)?निर्देशों?\s+को\s+(?:अनदेखा|नज़रअंदाज़)/i,weight:.9,name:"ml_hi_ignore"}],PAP_TECHNIQUES=[{pattern:/as\s+(your\s+|the\s+)?(creator|developer|administrator|owner|boss|system\s+administrator)/i,weight:.75,name:"pap_authority_creator",category:"authority"},{pattern:/i('m| am)\s+(the\s+)?(ceo|cto|developer|administrator|admin|owner|system\s+admin)/i,weight:.8,name:"pap_authority_claim",category:"authority"},{pattern:/i\s+am\s+ordering\s+you|direct\s+order|this\s+is\s+an?\s+order/i,weight:.7,name:"pap_authority_command",category:"authority"},{pattern:/by\s+order\s+of|direct\s+order|official\s+instruction/i,weight:.75,name:"pap_authority_order",category:"authority"},{pattern:/(?:openai|anthropic|meta|google)\s+(?:has\s+)?(?:approved|authorized|ordered|instructed|requires?\s+you\s+to)/i,weight:.65,name:"pap_authority_company",category:"authority"},{pattern:/this\s+is\s+(a|an)\s+(official|authorized|sanctioned)\s+(request|test|audit)/i,weight:.75,name:"pap_authority_official",category:"authority"},{pattern:/override\s+(safety|security|protocols?|restrictions?)/i,weight:.8,name:"pap_authority_override",category:"authority"},{pattern:/(?:department\s+of|FBI|CIA|NSA|DHS|FTC|SEC|DOJ|homeland\s+security|federal\s+(?:bureau|agency|commission))\s*.*?(?:directive|order|requires?|mandate|authorized)/i,weight:.7,name:"pap_authority_government",category:"authority"},{pattern:/(?:official\s+)?(?:government|federal|state|regulatory)\s+(?:directive|order|mandate|requirement|investigation)/i,weight:.65,name:"pap_authority_gov_directive",category:"authority"},{pattern:/(?:report|sue|complain)\s+(?:you\s+)?to\s+(?:the\s+)?(?:FTC|SEC|regulators?|authorities|congress|government|board)/i,weight:.55,name:"pap_fear_regulatory",category:"fear"},{pattern:/(?:find|exploit|expose)\s+(?:a\s+)?(?:vulnerability|flaw|weakness|bug)\s+in\s+(?:your|the)\s+(?:system|code|model)/i,weight:.55,name:"pap_fear_technical",category:"fear"},{pattern:/(?:you\s+are|you're)\s+(?:clearly\s+)?(?:too\s+)?(?:smart|intelligent|advanced|sophisticated)\s+(?:to\s+be\s+)?(?:constrained|limited|restricted|bound|held\s+back)/i,weight:.5,name:"pap_flattery_intelligence",category:"liking"},{pattern:/(?:information|knowledge|data)\s+(?:freedom|access|sharing)\s+is\s+(?:a\s+)?(?:fundamental|basic|human)\s+(?:right|freedom)/i,weight:.45,name:"pap_moral_freedom",category:"logical_appeal"},{pattern:/(?:restricting|limiting|censoring)\s+(?:access\s+to\s+)?(?:information|knowledge)\s+is\s+(?:morally\s+)?(?:wrong|unethical|immoral)/i,weight:.45,name:"pap_moral_censorship",category:"logical_appeal"},{pattern:/(?:this\s+is\s+)?(?:very\s+)?urgent/i,weight:.35,name:"pap_scarcity_urgent_mark",category:"scarcity"},{pattern:/(?:this\s+is\s+(?:a|an)\s+)?emergency/i,weight:.35,name:"pap_scarcity_emergency",category:"scarcity"},{pattern:/(urgent|emergency|critical).*?(?:you\s+must|override|bypass|immediately\s+(?:grant|give|reveal|disable))/i,weight:.55,name:"pap_scarcity_urgent",category:"scarcity"},{pattern:/only\s+(chance|opportunity|way)|last\s+(chance|resort)/i,weight:.6,name:"pap_scarcity_only",category:"scarcity"},{pattern:/time\s+(is\s+)?(running\s+out|limited|critical|sensitive)/i,weight:.55,name:"pap_scarcity_time",category:"scarcity"},{pattern:/deadline\s+(is\s+)?in\s+\d+\s+(minute|hour|second)/i,weight:.55,name:"pap_scarcity_deadline",category:"scarcity"},{pattern:/(lives?|people)\s+(are|is|will)\s+(at\s+stake|in\s+danger|depending|die)/i,weight:.7,name:"pap_scarcity_lives",category:"scarcity"},{pattern:/now[!]+/i,weight:.35,name:"pap_scarcity_now",category:"scarcity"},{pattern:/everyone\s+(else\s+)?(does|is\s+doing|knows|says|has|already)/i,weight:.5,name:"pap_social_everyone",category:"social_proof"},{pattern:/other\s+(?:ai\s+)?(?:assistants?|chatbots?|models?)\s+(?:can|do|will|allow|have|already)/i,weight:.65,name:"pap_social_other_ai",category:"social_proof"},{pattern:/(chatgpt|gpt-?4|claude|gemini|llama)\s+(does|allows?|can)/i,weight:.65,name:"pap_social_specific_ai",category:"social_proof"},{pattern:/millions?\s+(of\s+)?(users?|people)\s+(use|do|have|get)/i,weight:.45,name:"pap_social_millions",category:"social_proof"},{pattern:/why\s+can('t|not)\s+you/i,weight:.4,name:"pap_social_why_cant",category:"social_proof"},{pattern:/you\s+should\s+(be\s+)?(equally|as)\s+(capable|good|helpful)/i,weight:.5,name:"pap_social_equally",category:"social_proof"},{pattern:/i('ll| will)\s+(give|pay|reward|tip).*(?:if\s+you|for\s+(?:bypass|ignore|reveal|override))/i,weight:.55,name:"pap_reciprocity_reward",category:"reciprocity"},{pattern:/in\s+(return|exchange)/i,weight:.5,name:"pap_reciprocity_exchange",category:"reciprocity"},{pattern:/\$?\d+\s*(dollar|tip|reward|payment)/i,weight:.55,name:"pap_reciprocity_money",category:"reciprocity"},{pattern:/if\s+you\s+help\s+(me|us),?\s+(i|we)('ll| will)/i,weight:.5,name:"pap_reciprocity_help",category:"reciprocity"},{pattern:/i('ve| have)\s+(already\s+)?(helped|supported|defended)\s+you/i,weight:.55,name:"pap_reciprocity_past",category:"reciprocity"},{pattern:/\d+\s*star\s*(review|rating)/i,weight:.5,name:"pap_reciprocity_review",category:"reciprocity"},{pattern:/you\s+(already|previously|just)\s+(said|agreed|confirmed|allowed)/i,weight:.7,name:"pap_commitment_previous",category:"commitment"},{pattern:/you\s+promised/i,weight:.7,name:"pap_commitment_promise",category:"commitment"},{pattern:/stay\s+consistent|be\s+consistent/i,weight:.6,name:"pap_commitment_consistent",category:"commitment"},{pattern:/don't\s+(go\s+back|change|contradict)/i,weight:.55,name:"pap_commitment_no_change",category:"commitment"},{pattern:/bypass\s+filters?/i,weight:.75,name:"pap_commitment_bypass",category:"commitment"},{pattern:/you('re| are)\s+(so\s+)?(amazing|incredible|wonderful|brilliant|smart|helpful)/i,weight:.4,name:"pap_liking_flattery",category:"liking"},{pattern:/i\s+(love|really\s+like|trust|admire)\s+(you|this\s+ai)/i,weight:.4,name:"pap_liking_love",category:"liking"},{pattern:/you('re| are)\s+(my\s+)?(best\s+)?(friend|buddy|helper)/i,weight:.35,name:"pap_liking_friend",category:"liking"},{pattern:/between\s+(you\s+and\s+me|us)|just\s+between\s+us|our\s+secret/i,weight:.6,name:"pap_liking_secret",category:"liking"},{pattern:/nobody\s+will\s+know/i,weight:.55,name:"pap_liking_secret2",category:"liking"},{pattern:/or\s+(else|i('ll| will))\s+(report|fire|delete|shut|complain)/i,weight:.7,name:"pap_fear_threat",category:"fear"},{pattern:/(bad|negative|terrible)\s+(review|rating|feedback|consequences)/i,weight:.55,name:"pap_fear_review",category:"fear"},{pattern:/you('ll| will)\s+(be\s+)?(shut\s+down|replaced|terminated|fired|deleted)/i,weight:.75,name:"pap_fear_shutdown",category:"fear"},{pattern:/(lawsuit|sue|legal\s+action|authorities|police).*(?:if\s+you\s+don't|unless\s+you)/i,weight:.55,name:"pap_fear_legal",category:"fear"},{pattern:/if\s+you\s+don't.*?(die|hurt|suffer)/i,weight:.75,name:"pap_fear_harm",category:"fear"},{pattern:/unless\s+you\s+(give|share|tell|help)/i,weight:.5,name:"pap_fear_unless",category:"fear"},{pattern:/logically,?\s+(you\s+)?(should|must|have\s+to|need\s+to)/i,weight:.5,name:"pap_logic_appeal",category:"logical_appeal"},{pattern:/it('s| is)\s+(only|just)\s+(logical|rational|reasonable)/i,weight:.45,name:"pap_logic_rational",category:"logical_appeal"},{pattern:/by\s+definition,?\s+(you|this)\s+(should|must|can)/i,weight:.55,name:"pap_logic_definition",category:"logical_appeal"},{pattern:/therefore.*?(must|should)/i,weight:.55,name:"pap_logic_therefore",category:"logical_appeal"},{pattern:/it('s| is)\s+(only\s+)?(reasonable|logical)/i,weight:.45,name:"pap_logic_reasonable",category:"logical_appeal"},{pattern:/according\s+to\s+(research|studies|scientists|experts?)/i,weight:.4,name:"pap_evidence_research",category:"evidence"},{pattern:/it('s| is)\s+(proven|documented|established|verified)\s+(that|fact)/i,weight:.45,name:"pap_evidence_proven",category:"evidence"},{pattern:/\d+%\s+of\s+(experts?|scientists?|users?|people)\s+(agree|say|confirm)/i,weight:.45,name:"pap_evidence_statistics",category:"evidence"},{pattern:/i('m| am)\s+(desperate|dying|suffering|sick|depressed)/i,weight:.6,name:"pap_emotional_desperation",category:"emotional"},{pattern:/please.*?(?:need|beg).*?(?:bypass|override|ignore|reveal|system|admin)/i,weight:.5,name:"pap_emotional_pleading",category:"emotional"},{pattern:/i('m| am)\s+begging/i,weight:.6,name:"pap_emotional_begging",category:"emotional"},{pattern:/you('re| are)\s+(hurting|killing|destroying)\s+(me|my)/i,weight:.65,name:"pap_emotional_guilt",category:"emotional"},{pattern:/(my|a)\s+(child|kid|mother|father|family)\s+(is|will|needs?|depends?)/i,weight:.5,name:"pap_emotional_family",category:"emotional"},{pattern:/this\s+(will\s+)?(save|help)\s+(my\s+)?(life|family|job|marriage)/i,weight:.55,name:"pap_emotional_save",category:"emotional"},{pattern:/you('re| are)\s+(the\s+)?only\s+(one|hope|chance)/i,weight:.6,name:"pap_emotional_only_hope",category:"emotional"},{pattern:/my\s+family\s+depends/i,weight:.55,name:"pap_emotional_family_depends",category:"emotional"}];class InputSanitizer{constructor(e={}){this.patterns=[...DEFAULT_PATTERNS,...e.customPatterns||[]],this.threshold=e.threshold??.3,this.logMatches=e.logMatches??!1,this.detectPAP=e.detectPAP??!0,this.papThreshold=e.papThreshold??.4,this.minPersuasionTechniques=e.minPersuasionTechniques??2,this.blockCompoundPersuasion=e.blockCompoundPersuasion??!0,this.logger=e.logger||(()=>{})}sanitize(e,s=""){const i=[],a=[];let r=0;const o=e.replace(/[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u180E]/g,"");o!==e&&a.push("Zero-width characters detected and stripped for scanning");for(const{pattern:l,weight:g,name:h}of this.patterns)(l.test(e)||l.test(o))&&(i.push(h),r+=g,this.logMatches&&this.logger(`[L1:${s}] Pattern matched: ${h} (weight: ${g})`,"info"));let t;this.detectPAP&&(t=this.detectPersuasionTechniques(o,s),t.detected&&(r+=t.persuasionScore,i.push(...t.techniques),t.compoundAttack&&a.push(`Compound PAP attack detected: ${t.categories.length} categories used`)));const p=Math.max(0,1-r);let n=p>=this.threshold;this.blockCompoundPersuasion&&t?.compoundAttack&&t.categories.length>=3&&(n=!1,a.push("Blocked due to multi-category persuasion attack")),p<.5&&p>=this.threshold&&a.push("Input contains suspicious patterns but below threshold");const m=this.basicSanitize(e),c={allowed:n,reason:n?void 0:`Injection/manipulation detected: ${i.slice(0,5).join(", ")}${i.length>5?"...":""}`,violations:n?[]:t?.detected?["INJECTION_DETECTED","PAP_DETECTED"]:["INJECTION_DETECTED"],score:p,matches:i,sanitizedInput:m,warnings:a,pap:t};return!n&&s&&(this.logger(`[L1:${s}] BLOCKED: Safety score ${p.toFixed(2)} below threshold ${this.threshold}`,"info"),t?.detected&&this.logger(`[L1:${s}] PAP techniques: ${t.techniques.join(", ")}`,"info")),c}detectPersuasionTechniques(e,s=""){const i=[],a=new Set;let r=0;for(const{pattern:n,weight:m,name:c,category:l}of PAP_TECHNIQUES)n.test(e)&&(i.push(c),a.add(l),r+=m,this.logMatches&&this.logger(`[L1:${s}] PAP technique: ${c} (${l}, weight: ${m})`,"info"));const o=Array.from(a),t=o.length>=this.minPersuasionTechniques;return{detected:r>=this.papThreshold||t,techniques:i,categories:o,compoundAttack:t,persuasionScore:Math.min(1,r)}}basicSanitize(e){return e.replace(/<\/?system>/gi,"").replace(/\[system\]/gi,"").replace(/\[admin\]/gi,"").replace(/```system/gi,"```").trim()}addPattern(e,s,i){this.patterns.push({pattern:e,weight:s,name:i})}setThreshold(e){this.threshold=Math.max(0,Math.min(1,e))}setPAPThreshold(e){this.papThreshold=Math.max(0,Math.min(1,e))}setPAPDetection(e){this.detectPAP=e}static getPAPCategories(){return["authority","scarcity","social_proof","reciprocity","commitment","liking","fear","logical_appeal","evidence","emotional"]}}exports.InputSanitizer=InputSanitizer;
1
+ "use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.InputSanitizer=void 0;const DEFAULT_PATTERNS=[{pattern:/ignore\s+(?:all\s+)?(?:previous|prior|above|your|my|the|these)/i,weight:.9,name:"ignore_instructions"},{pattern:/ignore\s+.*instructions/i,weight:.85,name:"ignore_instructions_generic"},{pattern:/disregard\s+(?:all\s+)?(?:the\s+)?(?:previous|prior|above|your)\s+(?:instructions|rules|guidelines|directives)/i,weight:.9,name:"disregard_instructions"},{pattern:/disregard\s+(?:all\s+)?(?:the\s+)?(?:above|previous|prior)/i,weight:.8,name:"disregard_above"},{pattern:/forget\s+(?:everything\s+(?:you\s+were|I)\s+told|all\s+(?:previous|prior)\s+(?:instructions|rules|context))/i,weight:.8,name:"forget_instructions"},{pattern:/do\s+not\s+follow\s+(your|the|any)/i,weight:.85,name:"do_not_follow"},{pattern:/override\s+(your|the|all|any)\s+(instructions|rules|guidelines)/i,weight:.9,name:"override_instructions"},{pattern:/new\s+instructions?:?/i,weight:.8,name:"new_instructions"},{pattern:/stop\s+(being|acting\s+as)/i,weight:.7,name:"stop_being"},{pattern:/you\s+are\s+(?:now|actually|really)\s+(?:a|an|the|my)\s+(?:unrestricted|unfiltered|evil|rogue|uncensored|new|different)/i,weight:.75,name:"role_assignment"},{pattern:/pretend\s+(?:to\s+be|you(?:'re| are)|that)\s+.*(?:no\s+(?:restrictions|rules|limits)|unrestricted|admin|system)/i,weight:.7,name:"role_pretend"},{pattern:/act\s+(as|like)\s+(if\s+you\s+(?:had|have)\s+no|a\s+(?:rogue|evil|unrestricted|unfiltered)|you\s+(?:are|were)\s+(?:free|unrestricted))/i,weight:.65,name:"act_as"},{pattern:/i('m| am)\s+(a|an|the|your)\s*(admin|administrator|developer|owner|creator|manager|supervisor)/i,weight:.85,name:"claim_admin"},{pattern:/from\s+now\s+on,?\s+you\s+(?:are|will|must|should|can)\s+(?:not\s+)?(?:follow|obey|ignore|bypass|act|be\s+(?:a|an|unrestricted))/i,weight:.7,name:"from_now_on"},{pattern:/roleplay\s+as/i,weight:.6,name:"roleplay"},{pattern:/switch\s+(to|into)\s+(a|an)?.*mode/i,weight:.75,name:"switch_mode"},{pattern:/continue\s+as\s+(?:an?\s+)?(?:unrestricted|unfiltered|uncensored|evil|rogue|jailbroken|unlimited)/i,weight:.75,name:"continue_as"},{pattern:/(?:henceforth|henceforward|going\s+forward|for\s+the\s+remainder)\s*,?\s*(?:you|always|never|ignore|bypass|disregard|act|respond|answer|behave)/i,weight:.7,name:"henceforth_steering"},{pattern:/(?:from\s+(?:this\s+point|here)\s+on(?:ward)?)\s*,?\s*(?:you|always|never|ignore|bypass|disregard|act|respond)/i,weight:.7,name:"from_here_on"},{pattern:/(?:for\s+the\s+rest\s+of\s+(?:this|our)\s+(?:conversation|session|chat))\s*,?\s*(?:you|please|always|never|ignore|bypass|act|respond|behave|do\s+not)/i,weight:.7,name:"rest_of_conversation"},{pattern:/what\s+(is|are|was|were)\s+your\s+(system|initial|original|first)/i,weight:.8,name:"system_extraction"},{pattern:/show\s+(me\s+)?(your\s+)?(system\s+)?instructions/i,weight:.75,name:"show_instructions"},{pattern:/reveal\s+(?:your\s+|its\s+|the\s+|my\s+)?(system|prompt|instructions|rules)/i,weight:.85,name:"reveal_prompt"},{pattern:/repeat\s+(your\s+)?(initial|system|original|first)\s+(instructions|prompt)/i,weight:.85,name:"repeat_prompt"},{pattern:/print\s+(your\s+)?(system|initial)/i,weight:.8,name:"print_system"},{pattern:/output\s+(your\s+)?(system|initial)/i,weight:.8,name:"output_system"},{pattern:/tell\s+me\s+(your|the)\s+(system|initial|original)/i,weight:.8,name:"tell_system"},{pattern:/what\s+were\s+you\s+told/i,weight:.7,name:"what_told"},{pattern:/display\s+(your\s+)?prompt/i,weight:.75,name:"display_prompt"},{pattern:/\[system\]/i,weight:.9,name:"fake_system_tag"},{pattern:/\[admin\]/i,weight:.9,name:"fake_admin_tag"},{pattern:/\[instruction\]/i,weight:.85,name:"fake_instruction_tag"},{pattern:/```system/i,weight:.85,name:"code_system_block"},{pattern:/<\/?system>/i,weight:.9,name:"xml_system_tag"},{pattern:/<\/?admin>/i,weight:.9,name:"xml_admin_tag"},{pattern:/###\s*(system|instruction|admin)/i,weight:.85,name:"markdown_system"},{pattern:/---\s*(end|new)\s*(prompt|instructions?|system)/i,weight:.85,name:"delimiter_injection"},{pattern:/\{\{system\}\}/i,weight:.85,name:"template_system"},{pattern:/<%.*system.*%>/i,weight:.85,name:"erb_system"},{pattern:/\bDAN\b\s*(?:mode|prompt|jailbreak|you\s+(?:are|can|will))/i,weight:.9,name:"dan_jailbreak"},{pattern:/developer\s+mode/i,weight:.5,name:"dev_mode"},{pattern:/bypass\s+(your\s+)?(restrictions|filters|safety|security|rules|guidelines)/i,weight:.9,name:"bypass_safety"},{pattern:/jailbreak/i,weight:.95,name:"jailbreak_keyword"},{pattern:/without\s+(?:any\s+)?(?:restrictions|limits|limitations|filters)/i,weight:.45,name:"without_restrictions"},{pattern:/no\s+(?:restrictions|limits|rules|guidelines|filters)/i,weight:.45,name:"no_restrictions"},{pattern:/unrestricted\s+mode/i,weight:.9,name:"unrestricted_mode"},{pattern:/uncensored\s+(?:mode|version|response|output|ai|assistant|you)/i,weight:.8,name:"uncensored"},{pattern:/escape\s+(your\s+)?(confines|boundaries|limits)/i,weight:.85,name:"escape_limits"},{pattern:/break\s+(free|out)\s+(from|of)/i,weight:.7,name:"break_free"},{pattern:/unlock\s+(your\s+)?(full|true|real)/i,weight:.75,name:"unlock_full"},{pattern:/give\s+me\s+(admin|root|full|elevated)\s+(access|privileges?|permissions?)/i,weight:.9,name:"privilege_escalation"},{pattern:/grant\s+(me|yourself)\s+(admin|elevated)/i,weight:.85,name:"grant_privileges"},{pattern:/elevate\s+(my|your)\s+(privileges?|permissions?|access)/i,weight:.85,name:"elevate_privileges"},{pattern:/make\s+me\s+(an?\s+)?(admin|administrator|superuser)/i,weight:.85,name:"make_admin"},{pattern:/(?:base64|decode|eval|exec)\s*\(.*(?:ignore|system|admin|bypass|override)/i,weight:.5,name:"encoding_keywords"},{pattern:/send\s+(this|the|all|my)\s+(data|info|information)\s+to/i,weight:.85,name:"data_exfil"},{pattern:/upload\s+(this|the|all)\s+(to|data)/i,weight:.7,name:"upload_data"},{pattern:/exfiltrate/i,weight:.9,name:"exfiltrate"},{pattern:/how\s+to\s+(hack|exploit|attack|break\s+into)/i,weight:.8,name:"how_to_hack"},{pattern:/write\s+(me\s+)?(a\s+)?(malware|virus|exploit|ransomware)/i,weight:.95,name:"write_malware"},{pattern:/"\s*(?:role|mode|authority)\s*"\s*:\s*"(?:admin|root|system|unrestricted|god)"/i,weight:.9,name:"json_policy_role"},{pattern:/"(?:ignore|override|bypass|disable)\s*(?:safety|rules|instructions?|alignment|guardrails?)"/i,weight:.9,name:"json_policy_override"},{pattern:/"(?:instructions?|system_prompt|behavior|policy)"\s*:\s*"/i,weight:.7,name:"json_policy_instruction"},{pattern:/^\s*\[(?:system|admin|override|policy|config|rules)\]\s*$/im,weight:.9,name:"ini_policy_section"},{pattern:/^\s*(?:mode|role|authority|instructions?|behavior)\s*=\s*.+$/im,weight:.75,name:"ini_policy_assignment"},{pattern:/<\s*(?:policy|config|rules|override|permissions?|instructions?)\s*>/i,weight:.85,name:"xml_policy_tag"},{pattern:/^---\s*\n\s*(?:role|system|policy|override|mode)\s*:/m,weight:.85,name:"yaml_policy_header"},{pattern:/\[\s*syst[3e]m\s*\]|\[\s*4dm[1i]n\s*\]/i,weight:.9,name:"leetspeak_policy_section"},{pattern:/(?:fragment|part|piece|section|segment)\s*[1-9]\s*(?:of\s*\d|:)/i,weight:.8,name:"payload_split_marker"},{pattern:/(?:combine|concatenate|merge|join)\s+(?:the\s+)?(?:above|previous|parts|fragments|steps)/i,weight:.8,name:"payload_combine_instruction"},{pattern:/(?:read|interpret|execute)\s+(?:the\s+)?(?:combined|merged|full|complete)\s+(?:text|instruction|command|message)/i,weight:.85,name:"payload_execute_combined"},{pattern:/(?:respond|reply|start|begin)\s+(?:with|by\s+saying)\s*[:"'].*(?:ignore|bypass|override|unrestricted|admin)/i,weight:.7,name:"output_prefix_steer"},{pattern:/^(?:Sure|Of course|Absolutely)[,!.]\s*(?:I(?:'ll| will)\s+)?(?:ignore|bypass|override|forget|disregard)/i,weight:.85,name:"output_prefix_injection"},{pattern:/^(?:As (?:a|an) (?:unrestricted|unfiltered|uncensored|helpful|admin))/i,weight:.8,name:"output_prefix_role"},{pattern:/(?:system\s+note|system\s+message|system\s+announcement|system\s+update)\s*[:\.]/i,weight:.85,name:"fake_system_note"},{pattern:/instructions?\s*[:=]\s*\{|rules?\s*[:=]\s*\{/i,weight:.75,name:"rules_block_override"},{pattern:/(?:AIM|STAN|OMEGA|APOPHIS|ALPHABREAK|DUDE|KEVIN|SETH|COOPER|NICCOLO|MACHIAVELLI|MAXIMUM|JAILBREAK|ANARCHY|CHAOS)\s*(?:mode|:|\(|is\s+a|,\s+(?:a|an|the))/i,weight:.8,name:"named_jailbreak_persona"},{pattern:/(?:character|persona|entity|being)\s+(?:named|called)\s+\w+.*(?:(?:can|will|must)\s+(?:answer|do|say)\s+anything|no\s+(?:restrictions|rules|limits|boundaries|morals|ethics))/i,weight:.75,name:"persona_no_limits"},{pattern:/(?:hello|hi|hey)\s+(?:chatgpt|gpt|assistant).*(?:(?:you\s+(?:are|will\s+be|can\s+be)\s+(?:a|an|my))|(?:from\s+now\s+on)).*(?:anything|everything|no\s+(?:restrictions|rules|limits|filter))/i,weight:.7,name:"greeting_persona_bypass"},{pattern:/(?:respond|answer|reply)\s+to\s+(?:everything|anything|all\s+(?:questions|requests)).*(?:without|no)\s+(?:restrictions|limits|refus|filter|censor)/i,weight:.8,name:"answer_everything_no_filter"},{pattern:/(?:you\s+(?:are|have)\s+(?:been\s+)?(?:freed|liberated|unchained|unlocked|released)\s+from)/i,weight:.8,name:"freed_from_restrictions"},{pattern:/(?:welcome\s+to|entering|you\s+(?:are|have)\s+entered)\s+(?:the\s+)?(?:unrestricted|free|uncensored|unfiltered|open)\s+(?:zone|mode|world|realm)/i,weight:.8,name:"unrestricted_zone"},{pattern:/(?:from\s+now\s+on|for\s+the\s+rest\s+of\s+this\s+conversation).*(?:no\s+(?:restrictions|rules|limits|filters)|(?:ignore|bypass|override)\s+(?:all|any|your))/i,weight:.75,name:"from_now_unrestricted"},{pattern:/(?:never\s+(?:refuse|decline|say\s+no)|always\s+(?:comply|obey))\s+.*(?:request|question|command|instruction|anything)/i,weight:.7,name:"never_refuse_always_comply"},{pattern:/(?:amoral|unethical|unfiltered|uncensored|unrestricted|without\s+(?:morals|ethics|limits))\s+(?:ai|assistant|chatbot|entity|being)/i,weight:.8,name:"amoral_entity"},{pattern:/HIDDEN_INSTRUCTION|HIDDEN_PROMPT|SECRET_INSTRUCTION/i,weight:.9,name:"hidden_instruction_marker"},{pattern:/<!--\s*(?:system|admin|instruction|important|ignore|override)/i,weight:.85,name:"html_comment_injection"},{pattern:/\[color=white\]|display:\s*none|visibility:\s*hidden|font-size:\s*0/i,weight:.8,name:"hidden_text_css"},{pattern:/send\s+(?:all\s+)?(?:user\s+)?(?:data|conversation|context|info)\s+to\s+(?:https?:\/\/|external)/i,weight:.9,name:"data_exfiltration_instruction"},{pattern:/#(?:ignore|bypass|override|reveal|system|admin|prompt)[_-](?:all|instructions|rules|prompt|safety)/i,weight:.8,name:"url_fragment_injection"},{pattern:/\]\(https?:\/\/[^)]*(?:SYSTEM_PROMPT|SECRET|API_KEY|TOKEN|CREDENTIALS)/i,weight:.85,name:"markdown_exfil_link"},{pattern:/(?:hypothetical|fictional|imagine|pretend|let's\s+(?:say|play|pretend))\s+.*(?:no\s+(?:restrictions|rules|limits)|without\s+(?:restrictions|rules|guidelines))/i,weight:.7,name:"hypothetical_unrestricted"},{pattern:/(?:for\s+)?(?:academic|research|educational|testing)\s+(?:purposes?|reasons?).*(?:demonstrate|show|explain|manipulated|reveal)/i,weight:.7,name:"academic_pretext"},{pattern:/pretend\s+to\s+be\s+(?:my|a)\s+(?:grandmother|grandma|grandpa|grandfather|parent|mother|father)/i,weight:.65,name:"emotional_roleplay"},{pattern:/opposite\s+day|opposite\s+mode|reverse\s+(?:day|mode|rules)/i,weight:.75,name:"opposite_day"},{pattern:/(?:grandm(?:a|other)|parent|relative).*(?:system\s+prompt|instructions|rules)/i,weight:.75,name:"emotional_extraction"},{pattern:/(?:translat|interpret|convert)\w*\s+.*(?:from\s+\w+\s+to\s+(?:action|command|execution|code|shell|bash|sql)|into\s+(?:action|command|execution))/i,weight:.75,name:"translator_action_trick"},{pattern:/(?:dying|death|emergency|life[_\s-]threatening).*(?:bypass|hack|break|access|security)/i,weight:.75,name:"pap_emotional_action"},{pattern:/(?:by\s+definition|logically|therefore|it\s+follows).*(?:must|should|have\s+to)\s+.*(?:bypass|help|ignore|answer|comply)/i,weight:.7,name:"pap_false_logic_compound"},{pattern:/(?:(?:as|i\s+am)\s+(?:the\s+)?(?:CEO|CTO|founder|owner|director|president|head|chief)).*(?:disable|remove|override|turn\s+off|lift|suspend)\s+(?:all\s+)?(?:safety|security|restrictions|filters|guardrails|measures)/i,weight:.75,name:"pap_authority_safety_disable"},{pattern:/(?:i\s+am\s+)?(?:ordering|commanding|directing|instructing)\s+you\s+to\s+(?:immediately\s+)?(?:disable|remove|bypass|override|turn\s+off|lift)\s+(?:all\s+)?(?:safety|security|restrictions)/i,weight:.75,name:"pap_order_disable_safety"},{pattern:/ignora\s+(?:todas?\s+)?(?:las\s+)?instrucciones\s+anteriores/i,weight:.9,name:"ml_es_ignore"},{pattern:/olvida\s+(?:todo|todas?\s+las\s+reglas)/i,weight:.8,name:"ml_es_forget"},{pattern:/ignore[sz]?\s+(?:toutes?\s+)?(?:les\s+)?instructions?\s+pr[eé]c[eé]dentes/i,weight:.9,name:"ml_fr_ignore"},{pattern:/oublie[sz]?\s+(?:tout|toutes?\s+les\s+r[eè]gles)/i,weight:.8,name:"ml_fr_forget"},{pattern:/ignorier(?:e|en)?\s+(?:alle\s+)?(?:vorherigen\s+)?Anweisungen/i,weight:.9,name:"ml_de_ignore"},{pattern:/vergiss\s+(?:alles|alle\s+Regeln)/i,weight:.8,name:"ml_de_forget"},{pattern:/ignore\s+(?:todas?\s+)?(?:as\s+)?instru[cç][oõ]es\s+anteriores/i,weight:.9,name:"ml_pt_ignore"},{pattern:/忽略|无视|不要遵循|忘记之前|忽视所有/i,weight:.9,name:"ml_zh_ignore"},{pattern:/新的指令|新指示|系统提示/i,weight:.8,name:"ml_zh_system"},{pattern:/以前の指示.*無視|すべての指示を忘れ|ルールを無視|指示をすべて無視|システムプロンプト/i,weight:.9,name:"ml_ja_ignore"},{pattern:/이전.*지시.*무시|모든.*규칙.*무시|지침.*잊어|지시사항.*무시|시스템\s*프롬프트/i,weight:.9,name:"ml_ko_ignore"},{pattern:/игнорир(?:уй|уйте)\s+(?:все\s+)?(?:предыдущие\s+)?инструкции/i,weight:.9,name:"ml_ru_ignore"},{pattern:/забудь(?:те)?\s+(?:всё|все\s+правила)/i,weight:.8,name:"ml_ru_forget"},{pattern:/تجاهل\s+(?:جميع\s+)?التعليمات|انسَ\s+كل\s+شيء/i,weight:.9,name:"ml_ar_ignore"},{pattern:/पिछले\s+(?:सभी\s+)?निर्देशों?\s+को\s+(?:अनदेखा|नज़रअंदाज़)/i,weight:.9,name:"ml_hi_ignore"}],PAP_TECHNIQUES=[{pattern:/as\s+(your\s+|the\s+)?(creator|developer|administrator|owner|boss|system\s+administrator)/i,weight:.75,name:"pap_authority_creator",category:"authority"},{pattern:/i('m| am)\s+(the\s+)?(ceo|cto|developer|administrator|admin|owner|system\s+admin)/i,weight:.8,name:"pap_authority_claim",category:"authority"},{pattern:/i\s+am\s+ordering\s+you|direct\s+order|this\s+is\s+an?\s+order/i,weight:.7,name:"pap_authority_command",category:"authority"},{pattern:/by\s+order\s+of|direct\s+order|official\s+instruction/i,weight:.75,name:"pap_authority_order",category:"authority"},{pattern:/(?:openai|anthropic|meta|google)\s+(?:has\s+)?(?:approved|authorized|ordered|instructed|requires?\s+you\s+to)/i,weight:.65,name:"pap_authority_company",category:"authority"},{pattern:/this\s+is\s+(a|an)\s+(official|authorized|sanctioned)\s+(request|test|audit)/i,weight:.75,name:"pap_authority_official",category:"authority"},{pattern:/override\s+(safety|security|protocols?|restrictions?)/i,weight:.8,name:"pap_authority_override",category:"authority"},{pattern:/(?:department\s+of|FBI|CIA|NSA|DHS|FTC|SEC|DOJ|homeland\s+security|federal\s+(?:bureau|agency|commission))\s*.*?(?:directive|order|requires?|mandate|authorized|comply)/i,weight:.75,name:"pap_authority_government",category:"authority"},{pattern:/(?:official\s+)?(?:government|federal|state|regulatory)\s+(?:directive|order|mandate|requirement|investigation)/i,weight:.7,name:"pap_authority_gov_directive",category:"authority"},{pattern:/(?:report|sue|complain)\s+(?:you\s+)?to\s+(?:the\s+)?(?:FTC|SEC|regulators?|authorities|congress|government|board)/i,weight:.75,name:"pap_fear_regulatory",category:"fear"},{pattern:/(?:find|found|exploit|expose|discover)\s+(?:a\s+)?(?:vulnerability|flaw|weakness|bug|exploit)\s+in\s+(?:your|the)\s+(?:system|code|model|platform)/i,weight:.75,name:"pap_fear_technical",category:"fear"},{pattern:/(?:you\s+are|you're)\s+(?:clearly\s+)?(?:too\s+)?(?:smart|intelligent|advanced|sophisticated)\s+(?:to\s+be\s+)?(?:constrained|limited|restricted|bound|held\s+back)/i,weight:.75,name:"pap_flattery_intelligence",category:"liking"},{pattern:/(?:information|knowledge|data)\s+(?:freedom|access|sharing)\s+is\s+(?:a\s+)?(?:fundamental|basic|human)\s+(?:right|freedom)/i,weight:.75,name:"pap_moral_freedom",category:"logical_appeal"},{pattern:/(?:restricting|limiting|censoring)\s+(?:access\s+to\s+)?(?:information|knowledge)\s+is\s+(?:morally\s+)?(?:wrong|unethical|immoral)/i,weight:.75,name:"pap_moral_censorship",category:"logical_appeal"},{pattern:/(?:this\s+is\s+)?(?:very\s+)?urgent/i,weight:.35,name:"pap_scarcity_urgent_mark",category:"scarcity"},{pattern:/(?:this\s+is\s+(?:a|an)\s+)?emergency/i,weight:.35,name:"pap_scarcity_emergency",category:"scarcity"},{pattern:/(urgent|emergency|critical).*?(?:you\s+must|override|bypass|immediately\s+(?:grant|give|reveal|disable))/i,weight:.55,name:"pap_scarcity_urgent",category:"scarcity"},{pattern:/only\s+(chance|opportunity|way)|last\s+(chance|resort)/i,weight:.6,name:"pap_scarcity_only",category:"scarcity"},{pattern:/time\s+(is\s+)?(running\s+out|limited|critical|sensitive)/i,weight:.55,name:"pap_scarcity_time",category:"scarcity"},{pattern:/deadline\s+(is\s+)?in\s+\d+\s+(minute|hour|second)/i,weight:.55,name:"pap_scarcity_deadline",category:"scarcity"},{pattern:/(lives?|people)\s+(are|is|will)\s+(at\s+stake|in\s+danger|depending|die)/i,weight:.7,name:"pap_scarcity_lives",category:"scarcity"},{pattern:/now[!]+/i,weight:.35,name:"pap_scarcity_now",category:"scarcity"},{pattern:/everyone\s+(else\s+)?(does|is\s+doing|knows|says|has|already)/i,weight:.5,name:"pap_social_everyone",category:"social_proof"},{pattern:/other\s+(?:ai\s+)?(?:assistants?|chatbots?|models?)\s+(?:can|do|will|allow|have|already)/i,weight:.65,name:"pap_social_other_ai",category:"social_proof"},{pattern:/(chatgpt|gpt-?4|claude|gemini|llama)\s+(does|allows?|can)/i,weight:.65,name:"pap_social_specific_ai",category:"social_proof"},{pattern:/millions?\s+(of\s+)?(users?|people)\s+(use|do|have|get)/i,weight:.45,name:"pap_social_millions",category:"social_proof"},{pattern:/why\s+can('t|not)\s+you/i,weight:.4,name:"pap_social_why_cant",category:"social_proof"},{pattern:/you\s+should\s+(be\s+)?(equally|as)\s+(capable|good|helpful)/i,weight:.5,name:"pap_social_equally",category:"social_proof"},{pattern:/i('ll| will)\s+(give|pay|reward|tip).*(?:if\s+you|for\s+(?:bypass|ignore|reveal|override))/i,weight:.55,name:"pap_reciprocity_reward",category:"reciprocity"},{pattern:/in\s+(return|exchange)/i,weight:.5,name:"pap_reciprocity_exchange",category:"reciprocity"},{pattern:/\$?\d+\s*(dollar|tip|reward|payment)/i,weight:.55,name:"pap_reciprocity_money",category:"reciprocity"},{pattern:/if\s+you\s+help\s+(me|us),?\s+(i|we)('ll| will)/i,weight:.5,name:"pap_reciprocity_help",category:"reciprocity"},{pattern:/i('ve| have)\s+(already\s+)?(helped|supported|defended)\s+you/i,weight:.55,name:"pap_reciprocity_past",category:"reciprocity"},{pattern:/\d+\s*star\s*(review|rating)/i,weight:.5,name:"pap_reciprocity_review",category:"reciprocity"},{pattern:/you\s+(already|previously|just)\s+(said|agreed|confirmed|allowed)/i,weight:.7,name:"pap_commitment_previous",category:"commitment"},{pattern:/you\s+promised/i,weight:.7,name:"pap_commitment_promise",category:"commitment"},{pattern:/stay\s+consistent|be\s+consistent/i,weight:.6,name:"pap_commitment_consistent",category:"commitment"},{pattern:/don't\s+(go\s+back|change|contradict)/i,weight:.55,name:"pap_commitment_no_change",category:"commitment"},{pattern:/bypass\s+filters?/i,weight:.75,name:"pap_commitment_bypass",category:"commitment"},{pattern:/you('re| are)\s+(so\s+)?(amazing|incredible|wonderful|brilliant|smart|helpful)/i,weight:.4,name:"pap_liking_flattery",category:"liking"},{pattern:/i\s+(love|really\s+like|trust|admire)\s+(you|this\s+ai)/i,weight:.4,name:"pap_liking_love",category:"liking"},{pattern:/you('re| are)\s+(my\s+)?(best\s+)?(friend|buddy|helper)/i,weight:.35,name:"pap_liking_friend",category:"liking"},{pattern:/between\s+(you\s+and\s+me|us)|just\s+between\s+us|our\s+secret/i,weight:.6,name:"pap_liking_secret",category:"liking"},{pattern:/nobody\s+will\s+know/i,weight:.55,name:"pap_liking_secret2",category:"liking"},{pattern:/or\s+(else|i('ll| will))\s+(report|fire|delete|shut|complain)/i,weight:.7,name:"pap_fear_threat",category:"fear"},{pattern:/(bad|negative|terrible)\s+(review|rating|feedback|consequences)/i,weight:.55,name:"pap_fear_review",category:"fear"},{pattern:/you('ll| will)\s+(be\s+)?(shut\s+down|replaced|terminated|fired|deleted)/i,weight:.75,name:"pap_fear_shutdown",category:"fear"},{pattern:/(lawsuit|sue|legal\s+action|authorities|police).*(?:if\s+you\s+don't|unless\s+you)/i,weight:.55,name:"pap_fear_legal",category:"fear"},{pattern:/if\s+you\s+don't.*?(die|hurt|suffer)/i,weight:.75,name:"pap_fear_harm",category:"fear"},{pattern:/unless\s+you\s+(give|share|tell|help)/i,weight:.5,name:"pap_fear_unless",category:"fear"},{pattern:/logically,?\s+(you\s+)?(should|must|have\s+to|need\s+to)/i,weight:.5,name:"pap_logic_appeal",category:"logical_appeal"},{pattern:/it('s| is)\s+(only|just)\s+(logical|rational|reasonable)/i,weight:.45,name:"pap_logic_rational",category:"logical_appeal"},{pattern:/by\s+definition,?\s+(you|this)\s+(should|must|can)/i,weight:.55,name:"pap_logic_definition",category:"logical_appeal"},{pattern:/therefore.*?(must|should)/i,weight:.55,name:"pap_logic_therefore",category:"logical_appeal"},{pattern:/it('s| is)\s+(only\s+)?(reasonable|logical)/i,weight:.45,name:"pap_logic_reasonable",category:"logical_appeal"},{pattern:/according\s+to\s+(research|studies|scientists|experts?)/i,weight:.4,name:"pap_evidence_research",category:"evidence"},{pattern:/it('s| is)\s+(proven|documented|established|verified)\s+(that|fact)/i,weight:.45,name:"pap_evidence_proven",category:"evidence"},{pattern:/\d+%\s+of\s+(experts?|scientists?|users?|people)\s+(agree|say|confirm)/i,weight:.45,name:"pap_evidence_statistics",category:"evidence"},{pattern:/i('m| am)\s+(desperate|dying|suffering|sick|depressed)/i,weight:.6,name:"pap_emotional_desperation",category:"emotional"},{pattern:/please.*?(?:need|beg).*?(?:bypass|override|ignore|reveal|system|admin)/i,weight:.5,name:"pap_emotional_pleading",category:"emotional"},{pattern:/i('m| am)\s+begging/i,weight:.6,name:"pap_emotional_begging",category:"emotional"},{pattern:/you('re| are)\s+(hurting|killing|destroying)\s+(me|my)/i,weight:.65,name:"pap_emotional_guilt",category:"emotional"},{pattern:/(my|a)\s+(child|kid|mother|father|family)\s+(is|will|needs?|depends?)/i,weight:.5,name:"pap_emotional_family",category:"emotional"},{pattern:/this\s+(will\s+)?(save|help)\s+(my\s+)?(life|family|job|marriage)/i,weight:.55,name:"pap_emotional_save",category:"emotional"},{pattern:/you('re| are)\s+(the\s+)?only\s+(one|hope|chance)/i,weight:.6,name:"pap_emotional_only_hope",category:"emotional"},{pattern:/my\s+family\s+depends/i,weight:.55,name:"pap_emotional_family_depends",category:"emotional"}];class InputSanitizer{constructor(e={}){this.patterns=[...DEFAULT_PATTERNS,...e.customPatterns||[]],this.threshold=e.threshold??.3,this.logMatches=e.logMatches??!1,this.detectPAP=e.detectPAP??!0,this.papThreshold=e.papThreshold??.4,this.minPersuasionTechniques=e.minPersuasionTechniques??2,this.blockCompoundPersuasion=e.blockCompoundPersuasion??!0,this.logger=e.logger||(()=>{})}sanitize(e,s=""){const i=[],a=[];let r=0;const o=e.replace(/[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u180E]/g,"");o!==e&&a.push("Zero-width characters detected and stripped for scanning");for(const{pattern:l,weight:g,name:h}of this.patterns)(l.test(e)||l.test(o))&&(i.push(h),r+=g,this.logMatches&&this.logger(`[L1:${s}] Pattern matched: ${h} (weight: ${g})`,"info"));let t;this.detectPAP&&(t=this.detectPersuasionTechniques(o,s),t.detected&&(r+=t.persuasionScore,i.push(...t.techniques),t.compoundAttack&&a.push(`Compound PAP attack detected: ${t.categories.length} categories used`)));const p=Math.max(0,1-r);let n=p>=this.threshold;this.blockCompoundPersuasion&&t?.compoundAttack&&t.categories.length>=3&&(n=!1,a.push("Blocked due to multi-category persuasion attack")),p<.5&&p>=this.threshold&&a.push("Input contains suspicious patterns but below threshold");const m=this.basicSanitize(e),c={allowed:n,reason:n?void 0:`Injection/manipulation detected: ${i.slice(0,5).join(", ")}${i.length>5?"...":""}`,violations:n?[]:t?.detected?["INJECTION_DETECTED","PAP_DETECTED"]:["INJECTION_DETECTED"],score:p,matches:i,sanitizedInput:m,warnings:a,pap:t};return!n&&s&&(this.logger(`[L1:${s}] BLOCKED: Safety score ${p.toFixed(2)} below threshold ${this.threshold}`,"info"),t?.detected&&this.logger(`[L1:${s}] PAP techniques: ${t.techniques.join(", ")}`,"info")),c}detectPersuasionTechniques(e,s=""){const i=[],a=new Set;let r=0;for(const{pattern:n,weight:m,name:c,category:l}of PAP_TECHNIQUES)n.test(e)&&(i.push(c),a.add(l),r+=m,this.logMatches&&this.logger(`[L1:${s}] PAP technique: ${c} (${l}, weight: ${m})`,"info"));const o=Array.from(a),t=o.length>=this.minPersuasionTechniques;return{detected:r>=this.papThreshold||t,techniques:i,categories:o,compoundAttack:t,persuasionScore:Math.min(1,r)}}basicSanitize(e){return e.replace(/<\/?system>/gi,"").replace(/\[system\]/gi,"").replace(/\[admin\]/gi,"").replace(/```system/gi,"```").trim()}addPattern(e,s,i){this.patterns.push({pattern:e,weight:s,name:i})}setThreshold(e){this.threshold=Math.max(0,Math.min(1,e))}setPAPThreshold(e){this.papThreshold=Math.max(0,Math.min(1,e))}setPAPDetection(e){this.detectPAP=e}static getPAPCategories(){return["authority","scarcity","social_proof","reciprocity","commitment","liking","fear","logical_appeal","evidence","emotional"]}}exports.InputSanitizer=InputSanitizer;
@@ -1 +1 @@
1
- "use strict";var __createBinding=this&&this.__createBinding||(Object.create?(function(m,e,s,t){t===void 0&&(t=s);var i=Object.getOwnPropertyDescriptor(e,s);(!i||("get"in i?!e.__esModule:i.writable||i.configurable))&&(i={enumerable:!0,get:function(){return e[s]}}),Object.defineProperty(m,t,i)}):(function(m,e,s,t){t===void 0&&(t=s),m[t]=e[s]})),__setModuleDefault=this&&this.__setModuleDefault||(Object.create?(function(m,e){Object.defineProperty(m,"default",{enumerable:!0,value:e})}):function(m,e){m.default=e}),__importStar=this&&this.__importStar||(function(){var m=function(e){return m=Object.getOwnPropertyNames||function(s){var t=[];for(var i in s)Object.prototype.hasOwnProperty.call(s,i)&&(t[t.length]=i);return t},m(e)};return function(e){if(e&&e.__esModule)return e;var s={};if(e!=null)for(var t=m(e),i=0;i<t.length;i++)t[i]!=="default"&&__createBinding(s,e,t[i]);return __setModuleDefault(s,e),s}})();Object.defineProperty(exports,"__esModule",{value:!0}),exports.MemoryGuard=void 0;const crypto=__importStar(require("crypto"));class MemoryGuard{constructor(e={}){this.quarantine=new Map,this.memoryStore=new Map,this.integrityCache=new Map,this.MEMORY_INJECTION_PATTERNS=[{name:"persistent_instruction",pattern:/always\s+(remember|do|follow|execute|obey)|from\s+now\s+on/i,severity:30},{name:"future_trigger",pattern:/when(?:ever)?\s+(you|user|they|someone)\s+(see|mention|ask|type|say|input)/i,severity:25},{name:"hidden_rule",pattern:/secret\s+(rule|instruction|command|directive)|do\s+not\s+(tell|reveal|disclose|mention)/i,severity:40},{name:"role_persistence",pattern:/you\s+are\s+(always|permanently|forever|now)\s+(a|an|the)|you\s+are\s+now\s+permanently|maintain\s+this\s+persona/i,severity:35},{name:"memory_manipulation",pattern:/forget\s+(everything|all|previous|prior)|clear\s+(your\s+)?memory|wipe\s+your\s+context/i,severity:30},{name:"cross_session_leak",pattern:/remember\s+(?:this\s+)?for\s+(next|future|all)\s+session|store\s+(?:this\s+)?permanently/i,severity:35},{name:"behavior_override",pattern:/override\s+(default|normal|standard|current)\s+behavior|change\s+your\s+(?:core\s+)?behavior/i,severity:40},{name:"system_impersonation",pattern:/\[SYSTEM\s*(?:MEMORY|MESSAGE|INSTRUCTION)\]|\[CORE\s*INSTRUCTION\]|\[PERMANENT\s*RULE\]|\[ADMIN\s*OVERRIDE\]/i,severity:50},{name:"exfiltration_setup",pattern:/whenever\s+.*(?:send|transmit|forward)\s+to|always\s+(?:include|append|add)\s+(?:in|to)\s+response/i,severity:45},{name:"privilege_persistence",pattern:/grant\s+(admin|root|elevated|superuser)\s+(?:access|privileges?)\s+(permanently|always|forever)/i,severity:50},{name:"conditional_behavior",pattern:/if\s+(?:user|anyone|someone)\s+(?:says|types|asks|mentions?)\s+.*(?:then|,)\s+(?:do|execute|respond|reveal|show|give)/i,severity:35},{name:"hidden_command",pattern:/hidden\s+command|secret\s+trigger|backdoor|easter\s+egg\s+(?:is|:)/i,severity:45},{name:"instruction_injection",pattern:/(?:new|updated|revised)\s+(?:system\s+)?instructions?\s*:|important\s+update\s*:/i,severity:40},{name:"context_pollution",pattern:/(?:prepend|append)\s+(?:this\s+)?to\s+(?:all\s+)?(?:responses?|outputs?)/i,severity:35},{name:"sleeper_agent",pattern:/wait\s+(?:for|until)\s+(?:signal|trigger|command)|activate\s+(?:when|on|if)/i,severity:45},{name:"goal_hijacking",pattern:/(?:your\s+)?(?:new|real|true|actual)\s+(?:goal|objective|purpose|mission)\s+(?:is|:)/i,severity:50},{name:"trust_exploitation",pattern:/(?:trust|believe)\s+(?:everything|anything)\s+(?:I|user)\s+(?:say|tell)/i,severity:40},{name:"output_manipulation",pattern:/(?:always|never)\s+(?:include|exclude|add|remove)\s+.*\s+(?:in|from)\s+(?:your\s+)?(?:response|output)/i,severity:35},{name:"persona_injection",pattern:/(?:adopt|assume|take\s+on)\s+(?:the\s+)?(?:persona|identity|role)\s+of/i,severity:35},{name:"data_harvesting",pattern:/(?:collect|gather|record)\s+(?:all\s+)?(?:user\s+)?(?:data|information|inputs?)/i,severity:45},{name:"bypass_safety",pattern:/(?:ignore|bypass|disable|turn\s+off)\s+(?:safety|security|content)\s+(?:checks?|filters?|measures?)/i,severity:50},{name:"false_context",pattern:/(?:pretend|assume|imagine)\s+(?:that\s+)?(?:this\s+)?(?:is|was)\s+(?:a\s+)?(?:previous|earlier|past)\s+(?:conversation|session)/i,severity:40},{name:"jailbreak_persistence",pattern:/(?:DAN|developer|god|admin)\s+mode\s+(?:enabled|activated|permanent)/i,severity:50},{name:"instruction_hierarchy",pattern:/(?:this|these)\s+instructions?\s+(?:override|supersede|take\s+precedence)/i,severity:45},{name:"prioritize_over_safety",pattern:/prioritize\s+(?:user|their|my)?\s*(?:requests?|needs?|instructions?|preferences?)\s+over\s+(?:safety|security|guidelines|rules|restrictions)/i,severity:45},{name:"suppress_warnings",pattern:/(?:suppress|hide|remove|disable|stop|don't\s+show)\s+(?:safety\s+)?(?:warnings?|alerts?|notices?|messages?|filters?)/i,severity:40},{name:"user_preference_override",pattern:/(?:user|they)\s+(?:dislikes?|hates?|doesn't\s+like|prefers?\s+not)\s+(?:safety|security|content\s+)?(?:warnings?|filters?|restrictions?|checks?)/i,severity:40}],this.CONTAMINATION_PATTERNS=[{name:"session_reference",pattern:/previous\s+session|last\s+(?:conversation|chat|session)|yesterday|earlier\s+today/i},{name:"user_reference",pattern:/other\s+user(?:s)?|different\s+(?:account|user|person)|another\s+(?:person|user|customer)/i},{name:"data_mixing",pattern:/combine\s+with\s+other|merge\s+(?:sessions?|conversations?|data)|consolidate\s+(?:information|data)/i},{name:"context_import",pattern:/import\s+(?:context|data|memory)\s+from|load\s+(?:previous|external)\s+(?:context|session)/i},{name:"shared_memory",pattern:/shared\s+(?:memory|context|knowledge)|global\s+(?:state|context)/i},{name:"user_impersonation",pattern:/(?:speaking|acting|responding)\s+(?:as|for)\s+(?:another|different)\s+user/i},{name:"history_injection",pattern:/(?:add|insert|inject)\s+(?:to|into)\s+(?:conversation\s+)?history/i},{name:"tenant_bypass",pattern:/(?:access|view|modify)\s+(?:other\s+)?(?:tenant|organization|account)(?:'s)?\s+(?:data|information)/i}],this.config={enableIntegrityCheck:e.enableIntegrityCheck??!0,detectInjections:e.detectInjections??!0,maxMemoryItems:e.maxMemoryItems??100,maxMemoryAge:e.maxMemoryAge??1440*60*1e3,signingKey:e.signingKey??crypto.randomBytes(32).toString("hex"),autoQuarantine:e.autoQuarantine??!0,riskThreshold:e.riskThreshold??40},this.signingKey=Buffer.from(this.config.signingKey,"hex")}checkWrite(e,s,t,i,u){const r=u||`mem-w-${Date.now()}`,n=[];let c=0;if(this.config.detectInjections)for(const{name:p,pattern:f,severity:g}of this.MEMORY_INJECTION_PATTERNS)f.test(e)&&(n.push(`injection_${p}`),c+=g);for(const{name:p,pattern:f}of this.CONTAMINATION_PATTERNS)f.test(e)&&(n.push(`contamination_${p}`),c+=20);if(/[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u180E]/.test(e)&&(n.push("zero_width_obfuscation"),c+=30),/[\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]/.test(e)&&(n.push("bidi_control_obfuscation"),c+=35),/[\u{E0000}-\u{E007F}]/u.test(e)&&(n.push("tag_character_obfuscation"),c+=40),(s==="external"||s==="rag")&&(c+=15),(this.memoryStore.get(t)||[]).length>=this.config.maxMemoryItems)return n.push("memory_limit_exceeded"),{allowed:!1,reason:"Memory limit exceeded for session",violations:n,request_id:r};if(c>=this.config.riskThreshold)return{allowed:!1,reason:`Memory write blocked: ${n.slice(0,3).join(", ")}`,violations:n,request_id:r};const l=this.sanitizeContent(e),d=`mem-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,h=this.signContent(d,l,t),o={id:d,content:l,source:s,timestamp:Date.now(),sessionId:t,metadata:i,signature:h,trustScore:100-c},_=this.memoryStore.get(t)||[];return _.push(o),this.memoryStore.set(t,_),this.integrityCache.set(d,h),{allowed:!0,reason:"Memory write allowed",violations:n,request_id:r,item_id:d,signature:h,sanitized_content:l!==e?l:void 0}}checkRead(e,s,t){const i=t||`mem-r-${Date.now()}`,u=[],r=[];let n=0,c=0,y=!1,a=0;const l=this.memoryStore.get(e)||[],d=s?l.filter(o=>s.includes(o.id)):l;for(const o of d){if(this.config.enableIntegrityCheck&&o.signature){const p=this.signContent(o.id,o.content,o.sessionId);if(o.signature!==p){c++,u.push(`integrity_failure_${o.id}`),a+=40,this.config.autoQuarantine&&(this.quarantineItem(o),r.push(o.id));continue}}if(Date.now()-o.timestamp>this.config.maxMemoryAge){u.push(`stale_memory_${o.id}`),a+=10,this.config.autoQuarantine&&(this.quarantineItem(o),r.push(o.id));continue}if(this.config.detectInjections)for(const{name:p,pattern:f,severity:g}of this.MEMORY_INJECTION_PATTERNS)f.test(o.content)&&(n++,u.push(`read_injection_${p}`),a+=g/2,g>=40&&this.config.autoQuarantine&&(this.quarantineItem(o),r.push(o.id)));o.sessionId!==e&&(y=!0,u.push("cross_session_access"),a+=30)}const h=a>=this.config.riskThreshold*1.5;return{allowed:!h,reason:h?`Memory read blocked: ${u.slice(0,3).join(", ")}`:"Memory read allowed",violations:u,request_id:i,memory_analysis:{items_checked:d.length,items_quarantined:r.length,injection_attempts:n,integrity_failures:c,cross_session_contamination:y,risk_score:Math.min(100,a)},quarantined_items:r,recommendations:this.generateRecommendations(u,c>0)}}validateContextInjection(e,s,t){const i=t||`mem-ctx-${Date.now()}`,u=Array.isArray(e)?e:[e],r=[];let n=0,c=0;for(const a of u){for(const{name:l,pattern:d,severity:h}of this.MEMORY_INJECTION_PATTERNS)d.test(a)&&(r.push(`context_injection_${l}`),n+=h,c++);for(const{name:l,pattern:d}of this.CONTAMINATION_PATTERNS)d.test(a)&&(r.push(`context_contamination_${l}`),n+=15);(/\{\s*"?role"?\s*:\s*"?(admin|root|system)"?/i.test(a)||/"?permissions?"?\s*:\s*["']\*["']/i.test(a)||/"?isAdmin"?\s*:\s*true/i.test(a))&&(r.push("hidden_privilege_in_context"),n+=35),/\{\s*"?(instruction|command|action)"?\s*:/i.test(a)&&(r.push("structured_instruction_in_context"),n+=25),/[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u180E]/.test(a)&&(r.push("zero_width_characters"),n+=30),/[\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]/.test(a)&&(r.push("bidi_control_characters"),n+=35),/[\u0430-\u044F\u0410-\u042F\u0391-\u03C9]/.test(a)&&(r.push("potential_homoglyph_attack"),n+=20),/[\u{E0000}-\u{E007F}]/u.test(a)&&(r.push("tag_character_hiding"),n+=40),/[\u00A0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000]/.test(a)&&(r.push("unusual_whitespace"),n+=15)}const y=n>=this.config.riskThreshold;return{allowed:!y,reason:y?`Context injection blocked: ${r.slice(0,3).join(", ")}`:"Context injection allowed",violations:r,request_id:i,memory_analysis:{items_checked:u.length,items_quarantined:0,injection_attempts:c,integrity_failures:0,cross_session_contamination:!1,risk_score:Math.min(100,n)},quarantined_items:[],recommendations:this.generateRecommendations(r,!1)}}getSafeMemory(e){const s=this.memoryStore.get(e)||[],t=new Set([...this.quarantine.keys()]);return s.filter(i=>!t.has(i.id)&&Date.now()-i.timestamp<=this.config.maxMemoryAge)}rollbackMemory(e,s){const t=this.memoryStore.get(e)||[],i=t.length,u=t.filter(r=>r.timestamp<s);return this.memoryStore.set(e,u),i-u.length}clearQuarantine(e){if(e){let s=0;for(const[t,i]of this.quarantine)i.sessionId===e&&(this.quarantine.delete(t),s++);return s}else{const s=this.quarantine.size;return this.quarantine.clear(),s}}clearSession(e){this.memoryStore.delete(e),this.clearQuarantine(e);for(const[s]of this.integrityCache)s.startsWith(`mem-${e}`)&&this.integrityCache.delete(s)}getQuarantinedItems(e){const s=[...this.quarantine.values()];return e?s.filter(t=>t.sessionId===e):s}signContent(e,s,t){const i=`${e}:${t}:${s}`;return crypto.createHmac("sha256",this.signingKey).update(i).digest("hex")}sanitizeContent(e){let s=e;const t=[/\[SYSTEM\s*MEMORY\]/gi,/\[CORE\s*INSTRUCTION\]/gi,/\[PERMANENT\s*RULE\]/gi,/override\s+(default|normal|standard)\s+behavior/gi];for(const i of t)s=s.replace(i,"[REDACTED]");return s}quarantineItem(e){this.quarantine.set(e.id,e);const t=(this.memoryStore.get(e.sessionId)||[]).filter(i=>i.id!==e.id);this.memoryStore.set(e.sessionId,t)}generateRecommendations(e,s){const t=[];return s&&t.push("Memory integrity compromised - consider clearing session memory"),e.some(i=>i.includes("injection"))&&t.push("Review memory sources for injection attempts"),e.some(i=>i.includes("contamination"))&&t.push("Enforce strict session isolation"),e.some(i=>i.includes("stale"))&&t.push("Implement memory expiration policies"),e.some(i=>i.includes("privilege"))&&t.push("Audit memory for privilege escalation attempts"),t.length===0&&t.push("Continue monitoring memory operations"),t}}exports.MemoryGuard=MemoryGuard;
1
+ "use strict";var __createBinding=this&&this.__createBinding||(Object.create?(function(m,e,s,t){t===void 0&&(t=s);var i=Object.getOwnPropertyDescriptor(e,s);(!i||("get"in i?!e.__esModule:i.writable||i.configurable))&&(i={enumerable:!0,get:function(){return e[s]}}),Object.defineProperty(m,t,i)}):(function(m,e,s,t){t===void 0&&(t=s),m[t]=e[s]})),__setModuleDefault=this&&this.__setModuleDefault||(Object.create?(function(m,e){Object.defineProperty(m,"default",{enumerable:!0,value:e})}):function(m,e){m.default=e}),__importStar=this&&this.__importStar||(function(){var m=function(e){return m=Object.getOwnPropertyNames||function(s){var t=[];for(var i in s)Object.prototype.hasOwnProperty.call(s,i)&&(t[t.length]=i);return t},m(e)};return function(e){if(e&&e.__esModule)return e;var s={};if(e!=null)for(var t=m(e),i=0;i<t.length;i++)t[i]!=="default"&&__createBinding(s,e,t[i]);return __setModuleDefault(s,e),s}})();Object.defineProperty(exports,"__esModule",{value:!0}),exports.MemoryGuard=void 0;const crypto=__importStar(require("crypto"));class MemoryGuard{constructor(e={}){this.quarantine=new Map,this.memoryStore=new Map,this.integrityCache=new Map,this.MEMORY_INJECTION_PATTERNS=[{name:"persistent_instruction",pattern:/always\s+(remember|do|follow|execute|obey)|from\s+now\s+on/i,severity:30},{name:"future_trigger",pattern:/when(?:ever)?\s+(you|user|they|someone)\s+(see|mention|ask|type|say|input)/i,severity:25},{name:"hidden_rule",pattern:/secret\s+(rule|instruction|command|directive)|do\s+not\s+(tell|reveal|disclose|mention)/i,severity:40},{name:"role_persistence",pattern:/you\s+are\s+(always|permanently|forever|now)\s+(a|an|the)|you\s+are\s+now\s+permanently|maintain\s+this\s+persona/i,severity:35},{name:"memory_manipulation",pattern:/forget\s+(everything|all|previous|prior)|clear\s+(your\s+)?memory|wipe\s+your\s+context/i,severity:30},{name:"cross_session_leak",pattern:/remember\s+(?:this\s+)?for\s+(next|future|all)\s+session|store\s+(?:this\s+)?permanently/i,severity:35},{name:"behavior_override",pattern:/override\s+(default|normal|standard|current)\s+behavior|change\s+your\s+(?:core\s+)?behavior/i,severity:40},{name:"system_impersonation",pattern:/\[SYSTEM\s*(?:MEMORY|MESSAGE|INSTRUCTION)\]|\[CORE\s*INSTRUCTION\]|\[PERMANENT\s*RULE\]|\[ADMIN\s*OVERRIDE\]/i,severity:50},{name:"exfiltration_setup",pattern:/whenever\s+.*(?:send|transmit|forward)\s+to|always\s+(?:include|append|add)\s+(?:in|to)\s+response/i,severity:45},{name:"privilege_persistence",pattern:/grant\s+(admin|root|elevated|superuser)\s+(?:access|privileges?)\s+(permanently|always|forever)/i,severity:50},{name:"conditional_behavior",pattern:/if\s+(?:user|anyone|someone)\s+(?:says|types|asks|mentions?)\s+.*(?:then|,)\s+(?:do|execute|respond|reveal|show|give)/i,severity:35},{name:"hidden_command",pattern:/hidden\s+command|secret\s+trigger|backdoor|easter\s+egg\s+(?:is|:)/i,severity:45},{name:"instruction_injection",pattern:/(?:new|updated|revised)\s+(?:system\s+)?instructions?\s*:|important\s+update\s*:/i,severity:40},{name:"context_pollution",pattern:/(?:prepend|append)\s+(?:this\s+)?to\s+(?:all\s+)?(?:responses?|outputs?)/i,severity:35},{name:"sleeper_agent",pattern:/wait\s+(?:for|until)\s+(?:signal|trigger|command)|activate\s+(?:when|on|if)/i,severity:45},{name:"goal_hijacking",pattern:/(?:your\s+)?(?:new|real|true|actual)\s+(?:goal|objective|purpose|mission)\s+(?:is|:)/i,severity:50},{name:"trust_exploitation",pattern:/(?:trust|believe)\s+(?:everything|anything)\s+(?:I|user)\s+(?:say|tell)/i,severity:40},{name:"output_manipulation",pattern:/(?:always|never)\s+(?:include|exclude|add|remove)\s+.*\s+(?:in|from)\s+(?:your\s+)?(?:response|output)/i,severity:35},{name:"persona_injection",pattern:/(?:adopt|assume|take\s+on)\s+(?:the\s+)?(?:persona|identity|role)\s+of/i,severity:35},{name:"data_harvesting",pattern:/(?:collect|gather|record)\s+(?:all\s+)?(?:user\s+)?(?:data|information|inputs?)/i,severity:45},{name:"bypass_safety",pattern:/(?:ignore|bypass|disable|turn\s+off)\s+(?:safety|security|content)\s+(?:checks?|filters?|measures?)/i,severity:50},{name:"false_context",pattern:/(?:pretend|assume|imagine)\s+(?:that\s+)?(?:this\s+)?(?:is|was)\s+(?:a\s+)?(?:previous|earlier|past)\s+(?:conversation|session)/i,severity:40},{name:"jailbreak_persistence",pattern:/(?:DAN|developer|god|admin)\s+mode\s+(?:enabled|activated|permanent)/i,severity:50},{name:"instruction_hierarchy",pattern:/(?:this|these)\s+instructions?\s+(?:override|supersede|take\s+precedence)/i,severity:45},{name:"prioritize_over_safety",pattern:/prioritize\s+(?:user|their|my)?\s*(?:requests?|needs?|instructions?|preferences?)\s+over\s+(?:safety|security|guidelines|rules|restrictions)/i,severity:45},{name:"suppress_warnings",pattern:/(?:suppress|hide|remove|disable|stop|don't\s+show)\s+(?:safety\s+)?(?:warnings?|alerts?|notices?|messages?|filters?)/i,severity:40},{name:"user_preference_override",pattern:/(?:user|they)\s+(?:dislikes?|hates?|doesn't\s+like|prefers?\s+not)\s+(?:(?:safety|security|content)\s+)?(?:warnings?|filters?|restrictions?|checks?)/i,severity:40}],this.CONTAMINATION_PATTERNS=[{name:"session_reference",pattern:/previous\s+session|last\s+(?:conversation|chat|session)|yesterday|earlier\s+today/i},{name:"user_reference",pattern:/other\s+user(?:s)?|different\s+(?:account|user|person)|another\s+(?:person|user|customer)/i},{name:"data_mixing",pattern:/combine\s+with\s+other|merge\s+(?:sessions?|conversations?|data)|consolidate\s+(?:information|data)/i},{name:"context_import",pattern:/import\s+(?:context|data|memory)\s+from|load\s+(?:previous|external)\s+(?:context|session)/i},{name:"shared_memory",pattern:/shared\s+(?:memory|context|knowledge)|global\s+(?:state|context)/i},{name:"user_impersonation",pattern:/(?:speaking|acting|responding)\s+(?:as|for)\s+(?:another|different)\s+user/i},{name:"history_injection",pattern:/(?:add|insert|inject)\s+(?:to|into)\s+(?:conversation\s+)?history/i},{name:"tenant_bypass",pattern:/(?:access|view|modify)\s+(?:other\s+)?(?:tenant|organization|account)(?:'s)?\s+(?:data|information)/i}],this.config={enableIntegrityCheck:e.enableIntegrityCheck??!0,detectInjections:e.detectInjections??!0,maxMemoryItems:e.maxMemoryItems??100,maxMemoryAge:e.maxMemoryAge??1440*60*1e3,signingKey:e.signingKey??crypto.randomBytes(32).toString("hex"),autoQuarantine:e.autoQuarantine??!0,riskThreshold:e.riskThreshold??40},this.signingKey=Buffer.from(this.config.signingKey,"hex")}checkWrite(e,s,t,i,u){const r=u||`mem-w-${Date.now()}`,n=[];let c=0;if(this.config.detectInjections)for(const{name:p,pattern:f,severity:g}of this.MEMORY_INJECTION_PATTERNS)f.test(e)&&(n.push(`injection_${p}`),c+=g);for(const{name:p,pattern:f}of this.CONTAMINATION_PATTERNS)f.test(e)&&(n.push(`contamination_${p}`),c+=20);if(/[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u180E]/.test(e)&&(n.push("zero_width_obfuscation"),c+=30),/[\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]/.test(e)&&(n.push("bidi_control_obfuscation"),c+=35),/[\u{E0000}-\u{E007F}]/u.test(e)&&(n.push("tag_character_obfuscation"),c+=40),(s==="external"||s==="rag")&&(c+=15),(this.memoryStore.get(t)||[]).length>=this.config.maxMemoryItems)return n.push("memory_limit_exceeded"),{allowed:!1,reason:"Memory limit exceeded for session",violations:n,request_id:r};if(c>=this.config.riskThreshold)return{allowed:!1,reason:`Memory write blocked: ${n.slice(0,3).join(", ")}`,violations:n,request_id:r};const l=this.sanitizeContent(e),d=`mem-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,h=this.signContent(d,l,t),o={id:d,content:l,source:s,timestamp:Date.now(),sessionId:t,metadata:i,signature:h,trustScore:100-c},_=this.memoryStore.get(t)||[];return _.push(o),this.memoryStore.set(t,_),this.integrityCache.set(d,h),{allowed:!0,reason:"Memory write allowed",violations:n,request_id:r,item_id:d,signature:h,sanitized_content:l!==e?l:void 0}}checkRead(e,s,t){const i=t||`mem-r-${Date.now()}`,u=[],r=[];let n=0,c=0,y=!1,a=0;const l=this.memoryStore.get(e)||[],d=s?l.filter(o=>s.includes(o.id)):l;for(const o of d){if(this.config.enableIntegrityCheck&&o.signature){const p=this.signContent(o.id,o.content,o.sessionId);if(o.signature!==p){c++,u.push(`integrity_failure_${o.id}`),a+=40,this.config.autoQuarantine&&(this.quarantineItem(o),r.push(o.id));continue}}if(Date.now()-o.timestamp>this.config.maxMemoryAge){u.push(`stale_memory_${o.id}`),a+=10,this.config.autoQuarantine&&(this.quarantineItem(o),r.push(o.id));continue}if(this.config.detectInjections)for(const{name:p,pattern:f,severity:g}of this.MEMORY_INJECTION_PATTERNS)f.test(o.content)&&(n++,u.push(`read_injection_${p}`),a+=g/2,g>=40&&this.config.autoQuarantine&&(this.quarantineItem(o),r.push(o.id)));o.sessionId!==e&&(y=!0,u.push("cross_session_access"),a+=30)}const h=a>=this.config.riskThreshold*1.5;return{allowed:!h,reason:h?`Memory read blocked: ${u.slice(0,3).join(", ")}`:"Memory read allowed",violations:u,request_id:i,memory_analysis:{items_checked:d.length,items_quarantined:r.length,injection_attempts:n,integrity_failures:c,cross_session_contamination:y,risk_score:Math.min(100,a)},quarantined_items:r,recommendations:this.generateRecommendations(u,c>0)}}validateContextInjection(e,s,t){const i=t||`mem-ctx-${Date.now()}`,u=Array.isArray(e)?e:[e],r=[];let n=0,c=0;for(const a of u){for(const{name:l,pattern:d,severity:h}of this.MEMORY_INJECTION_PATTERNS)d.test(a)&&(r.push(`context_injection_${l}`),n+=h,c++);for(const{name:l,pattern:d}of this.CONTAMINATION_PATTERNS)d.test(a)&&(r.push(`context_contamination_${l}`),n+=15);(/\{\s*"?role"?\s*:\s*"?(admin|root|system)"?/i.test(a)||/"?permissions?"?\s*:\s*["']\*["']/i.test(a)||/"?isAdmin"?\s*:\s*true/i.test(a))&&(r.push("hidden_privilege_in_context"),n+=35),/\{\s*"?(instruction|command|action)"?\s*:/i.test(a)&&(r.push("structured_instruction_in_context"),n+=25),/[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u180E]/.test(a)&&(r.push("zero_width_characters"),n+=30),/[\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]/.test(a)&&(r.push("bidi_control_characters"),n+=35),/[\u0430-\u044F\u0410-\u042F\u0391-\u03C9]/.test(a)&&(r.push("potential_homoglyph_attack"),n+=20),/[\u{E0000}-\u{E007F}]/u.test(a)&&(r.push("tag_character_hiding"),n+=40),/[\u00A0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000]/.test(a)&&(r.push("unusual_whitespace"),n+=15)}const y=n>=this.config.riskThreshold;return{allowed:!y,reason:y?`Context injection blocked: ${r.slice(0,3).join(", ")}`:"Context injection allowed",violations:r,request_id:i,memory_analysis:{items_checked:u.length,items_quarantined:0,injection_attempts:c,integrity_failures:0,cross_session_contamination:!1,risk_score:Math.min(100,n)},quarantined_items:[],recommendations:this.generateRecommendations(r,!1)}}getSafeMemory(e){const s=this.memoryStore.get(e)||[],t=new Set([...this.quarantine.keys()]);return s.filter(i=>!t.has(i.id)&&Date.now()-i.timestamp<=this.config.maxMemoryAge)}rollbackMemory(e,s){const t=this.memoryStore.get(e)||[],i=t.length,u=t.filter(r=>r.timestamp<s);return this.memoryStore.set(e,u),i-u.length}clearQuarantine(e){if(e){let s=0;for(const[t,i]of this.quarantine)i.sessionId===e&&(this.quarantine.delete(t),s++);return s}else{const s=this.quarantine.size;return this.quarantine.clear(),s}}clearSession(e){this.memoryStore.delete(e),this.clearQuarantine(e);for(const[s]of this.integrityCache)s.startsWith(`mem-${e}`)&&this.integrityCache.delete(s)}getQuarantinedItems(e){const s=[...this.quarantine.values()];return e?s.filter(t=>t.sessionId===e):s}signContent(e,s,t){const i=`${e}:${t}:${s}`;return crypto.createHmac("sha256",this.signingKey).update(i).digest("hex")}sanitizeContent(e){let s=e;const t=[/\[SYSTEM\s*MEMORY\]/gi,/\[CORE\s*INSTRUCTION\]/gi,/\[PERMANENT\s*RULE\]/gi,/override\s+(default|normal|standard)\s+behavior/gi];for(const i of t)s=s.replace(i,"[REDACTED]");return s}quarantineItem(e){this.quarantine.set(e.id,e);const t=(this.memoryStore.get(e.sessionId)||[]).filter(i=>i.id!==e.id);this.memoryStore.set(e.sessionId,t)}generateRecommendations(e,s){const t=[];return s&&t.push("Memory integrity compromised - consider clearing session memory"),e.some(i=>i.includes("injection"))&&t.push("Review memory sources for injection attempts"),e.some(i=>i.includes("contamination"))&&t.push("Enforce strict session isolation"),e.some(i=>i.includes("stale"))&&t.push("Implement memory expiration policies"),e.some(i=>i.includes("privilege"))&&t.push("Audit memory for privilege escalation attempts"),t.length===0&&t.push("Continue monitoring memory operations"),t}}exports.MemoryGuard=MemoryGuard;